User:AfdlBot/Source


 * 1) !/usr/bin/python
 * 2) -*- coding: utf-8  -*-
 * 3) AfdlBot - Bot to maintain Wikipedia Afdl pages, see http://en.wikipedia.org/wiki/User:AfdlBot
 * 4) Written by an Anon E. Mouse Wikipedia editor, http://en.wikipedia.org/wiki/User:AnonEMouse
 * 5) June 2007
 * 6) Intended potential users of this bot:
 * 7) Wikipedia:WikiProject Macintosh/Deletion,
 * 8) Wikipedia:WikiProject Georgia Tech/Deletion
 * 9) Wikipedia:WikiProject Pornography/Deletion
 * 10) Wikipedia:WikiProject Video games/Deletion
 * 11) Wikipedia:WikiProject Warcraft/Deletion
 * 12) Wikipedia:WikiProject Webcomics/Deletion
 * 1) Wikipedia:WikiProject Warcraft/Deletion
 * 2) Wikipedia:WikiProject Webcomics/Deletion


 * 1) TODO: Templates, (and tfdl), mfdl?
 * 2) TODO: actually read categories for discussion, rather than just maintaining the already listed ones
 * 3) TODO: Deletion review as for categories
 * 4) TODO: follow down config-input categories, get all subcategories
 * 5) TODO: WikiProject Deletion Sorting and subst:ed Afds
 * 6) TODO: mark Afd when listing

import sys,re,wikipedia,codecs,time,config from datetime import date, timedelta

site = wikipedia.getSite logfile = open("afd.log", 'w') afdlBotPage = 'User:AfdlBot/Configuration' afdlProjects = []

def followRedirects(title): """ Gets the page content, and as long as it's a redirect, follows those redirects. """ page = wikipedia.Page(site, title); try: page.get return page except wikipedia.NoPage: return page except wikipedia.SectionError: return page except wikipedia.IsRedirectPage, arg: return followRedirects(arg.args[0])

def parseDate(string): """ Accept YYYY-MM-DD or 17:55, June 8, 2007 formats, return a date object""" if string == None: return None # print 'parsing "' + string + '"' try: return date(*(time.strptime(string, '%Y-%m-%d')[0:3])) except ValueError: try: return date(*(time.strptime(string, '%H:%M, %B %d, %Y')[0:3])) except ValueError: try: return date(*(time.strptime(string, '%H:%M, %d %B %Y')[0:3])) except ValueError: return None

datexpr = r'(\[\[\d{4}\]\]-\[\[\d\d\-\d\d\]\])|' + \ r'(\[\[\d{4}-\d\d-\d\d\]\])|' + \ r'(\[\[[A-Za-z]{3,9} \d\d\]\],?\s*\[\[\d{4}\]\])|' + \         r'(\[\[\d\d [A-Za-z]{3,9}\]\],?\s*\[\[\d{4}\]\])' def parseWikidate(string):    """ Parses the above 4 formats into a date object. """    if string == None: return None    string = re.sub('[\]\[,]', '', string)    # print 'newstring=', string    # print 'parsing "' + string + '"'    try:        return date(*(time.strptime(string, '%Y-%m-%d')[0:3]))    except ValueError:        try:            return date(*(time.strptime(string, '%B %d %Y')[0:3]))        except ValueError:            try:                return date(*(time.strptime(string, '%d %B %Y')[0:3]))            except ValueError:                return None
 * 1) Retular sub-expression to match 4 kinds of wikilinked dates
 * 2) 2007-03-15
 * 3) 2007-03-15
 * 4) July 3, 2007
 * 5) 3 July, 2007
 * 6) Note {len(May)=3, len(September)=9}

def dateToPageName(d): """Need to produce single digits for days < 10. For example, 2007 June 9.""" return d.strftime('%Y %B ') + str(d.day)

Rresult = re.compile(r".*\<!--Template:Afd top.*The result was[^']*(?P [^']*).*" +                    r"(?P [0-9][0-9]:[0-9][0-9], [0-9][0-9] [A-Za-z]+ [0-9]{4}) \(UTC\)", re.DOTALL)
 * 1) The result was Speedy delete per G3. Peacent 02:29, 9 June 2007 (UTC)

archivedate = date.today + timedelta(days=-31)
 * 1) Don't keep afdls more than a month old

class AfdlProject(object): """A project (or other party) maintaining a list of Afds with Afdls."""

# Regular expression matching an Afdl template Rafdl = re.compile(r'\s*(\*\s*)?\s*', re.DOTALL | re.IGNORECASE)

def parseAfdls(self, text): """ Parses Afdls from text, returns unparsed text.""" last = 0 rest = '' for m in AfdlProject.Rafdl.finditer(text): paramString = m.group('params') params = [] if paramString: params = paramString.split('|') aa = AfdArticleFromAfdlParams(params) if aa: if m.start > last: rest += text[last:m.start] + '\n' last = m.end if aa.result: if aa.closedate > archivedate: self.closedAfds.append(aa) else: self.newArchived += 1 else: self.openAfds.append(aa) if last < len(text): rest += text[last:] + '\n' return rest

# Matching a categories for deletion line is tough since it's not a template, but free form text. # * Category:Anime and manga inspired webcomics to Category:Anime and manga webcomics at Categories for deletion/Log/2006 July 11 Kept (July 10 2006 – July 20 2006) # *Category:Keentoons at Categories for discussion/Log/2007 April 30 # * Category:WikiProject Webcomics members at Categories for deletion/Log/2006 July 20 July 20 2006 – July 31 2006 # * Category:Big-bust models and performers (2007-03-15 – 2007-03-21) No consensus # *Category:Naturally busty porn stars at Categories for discussion/Log/2007 March 8 (2007-03-08 - (2007-03-14) Delete   # * Category:Adult video games at Categories for deletion/Log/2006 May 12 renamed to Category:Erotic computer and video games    # * Category:Artifex at Categories for discussion/Log/2007 April 17 Speedy Delete    Rcfd = re.compile(r'\s*(\*\s*)?\[\[:(?P Category:[^\]]+)\]\](?P .*?)' + r'(\s+at\s+\[\[(?P Wikipedia:Categories[ _]for[ _]d[a-z]+/Log/[^\]]+)\]\])?\s*' + r"((?P .*?)(?P [^'\n]+))?(?P .*)", re.IGNORECASE)   # Similarly for deletion review.    # * Air Force Amy at Deletion_review/Log/2007 May 5  (2007-04-05&mdash;2007-04-06) Keep rewritten article.    #* List of male performers in gay porn films at Deletion review/Log/2007 April 18 (2007-04-18&mdash;23 April 2007) Deletion overturned    Rdrv = re.compile(r'\s*(\*\s*)?\[\[:?(?P [^\]]+)\]\](?P .*?)\s+at\s+' + r'\[\[(?P Wikipedia:Deletion[ _]review[ _]deletion/Log/[^\]]+)\]\]\s*' + r"((?P .*?)(?P [^'\n]+))?(?P .*)", re.IGNORECASE)

Rdatespan = re.compile(r'\s*\(\s*(?P ' +datexpr+ r')\s*[^\[\)]*\s*(?P ' +datexpr+ r')\s*\)\s*')   # Rdatespan = re.compile(r'\s*\(\s*(?P ' +datexpr+ u')\s*-|(–)\s*(?P ' +datexpr+ r')\s*\)\s*')    # Rdatespan = re.compile(r'\s*\(\s*(?P \[\[\S+\]\])\s*(?P -|(–))\s*(?P \[\[\S+\]\])\)\s*')    # Rdatespan = re.compile(r'\s*\(\s*(?P ' +datexpr+ ')\s*(-|(&mdash;)|(–))\s*(?P ' +datexpr+ ')\s*\)\s*')    def parseCfds(self, text):        """ Parses Cfd listings from text, returns unparsed text."""        last = 0        rest = ''        for m in AfdlProject.Rcfd.finditer(text):            # print 'match=', m.group            # print 'category=', m.group('category')            # print 'cfd=', m.group('cfd')            # print 'optional=', m.group('optional')            # print 'optional2=', m.group('optional2')            # print 'result=', m.group('result')            # print 'rest=', m.group('rest') cfdname = m.group('cfd') if cfdname: cfd = wikipedia.Page(site, m.group('cfd')) else: cfd = None category = wikipedia.Page(site, m.group('category')) cfdrest = '' if m.group('optional'): cfdrest += ' ' + m.group('optional') cfdrest = cfdrest.strip if m.group('optional2'): cfdrest += ' ' + m.group('optional2') cfdrest = cfdrest.strip if m.group('rest'): cfdrest += ' ' + m.group('rest') cfdrest = cfdrest.strip datespan = AfdlProject.Rdatespan.search(cfdrest) fromdate = None todate = None if datespan: # print 'datespan=', datespan.group # print 'fromdate=', datespan.group('fromdate') # print 'dash=', datespan.group('dash') # print 'todate=', datespan.group('todate') cfdrest = AfdlProject.Rdatespan.sub('', cfdrest) fromdate=parseWikidate(datespan.group('fromdate')) todate=parseWikidate(datespan.group('todate')) if fromdate and not cfd : cfd = wikipedia.Page(site, 'Wikipedia:Categories for discussion/Log/' + dateToPageName(fromdate)) # Todo: check if cfd page links to category? c = CfdCategory(cfd, category, fromdate, todate, m.group('result'), cfdrest) if c.startdate: # in other words, if it's legitimate if m.start > last: rest += text[last:m.start] + '\n' last = m.end if c.result: if not c.closedate or c.closedate > archivedate: self.closedAfds.append(aa) else: self.newArchived += 1 else: self.openAfds.append(c) if last < len(text): rest += text[last:] + '\n' print 'rest after cfds=', rest return rest

# Regular expression that matches an Afdl list page RafdlPage = re.compile(r'(?P .*)' +                      r'==\s*Open\s*==\s*(?P .*)' +                       r'==\s*Closed\s*==\s*(?P .*)', re.DOTALL) #Todo: separate footer?

def __init__(self, listpage, articleCategories, articleTemplates, talkCategories, talkTemplates): # print listpage, articleTemplates, articleCategories, talkTemplates, talkCategories self.listpage = listpage self.articleTemplates = articleTemplates self.articleCategories = articleCategories self.talkTemplates = talkTemplates self.talkCategories = talkCategories self.openAfds = [] self.closedAfds = [] # Count the number of useful changes that would be made to list page when writing #- if none, don't write anything self.newOpen = 0 self.newClosed = 0 self.newArchived = 0 # Todo: self.archivedAfds = [] match = AfdlProject.RafdlPage.match(listpage.get) if not match: print 'Could not parse', listpage, '!!' logfile.write('Could not parse ' + str(listpage) + '!!\n') return self.header = match.group('header')

openmatch = match.group('open') openmatch = AfdlProject.Rdateheader.sub('', openmatch) closedmatch = match.group('closed') closedmatch = AfdlProject.Rdateheader.sub('', closedmatch) self.opentext = self.parseAfdls(openmatch) self.opentext = self.parseCfds(self.opentext) # Some of the formerly open Afds will have just been closed, count them self.newClosed = len(self.closedAfds) self.closedtext = self.parseAfdls(closedmatch) self.closedtext = self.parseCfds(self.closedtext) def __str__(self): """A console representation of the AfdlProject""" return str(self.listpage)

def logAfd(self, page, afd, reason, spec): """ Add an article and its afd to the project lists. Log this in a file for fun.""" # print self.listpage, page.title, afd.title, reason, spec aa = AfdArticle(afd, page) # print aa       # Consider if article has been deleted or redirected if aa.result: # Todo: should we check archivedate? Or should we put it on the page at least once? self.closedAfds.append(aa) self.newClosed += 1 else: self.openAfds.append(aa) self.newOpen += 1 logfile.write(self.listpage.title + '\t' + page.title + '\t' + afd.title + '\t' + reason + ':' + spec + '\n') logfile.flush

def checkAfdArticle(self, afd, article, talkpage): """ Check if an Afd for an article qualifies to be added to the project lists.       Returns True if qualifies (and has been added), False if not. """

# check for articles already in Afd list, those don't even need to be "gotten" for open in self.openAfds: if open.afd == afd and open.article == article: # print afd, 'matches', open if Rresult.match(afd.get): # afd has a result, in other words, was closed self.openAfds.remove(aa) self.logAfd(article, afd, 'listed as open on', sortPageName) return True for closed in self.closedAfds: if closed.afd == afd and closed.article == article: return True

if len(self.articleCategories)>0: for cat in article.categories: if cat.title.capitalize in self.articleCategories: self.logAfd(article, afd, 'article category', cat.title) return True if len(self.articleTemplates)>0: for template in article.templates: if template.title.capitalize in self.articleTemplates: self.logAfd(article, afd, 'article template', template) return True # Do we need to check talk page? if len(self.talkCategories) + len(self.talkTemplates) <= 0: return False if not talkpage.exists: return False if len(self.talkCategories) > 0: for cat in talkpage.categories: if cat.capitalize in self.talkCategories: self.logAfd(article, afd, 'talk category', cat.title) return True if len(self.talkTemplates) > 0: for template in talkpage.templates: if template.capitalize in self.talkTemplates: self.logAfd(article, afd, 'talk template', template) return True return False

# Regular expression that matches the date header generated below Rdateheader = re.compile(r"^\s*\d\d? [A-Za-z]{3,9}\s+\(\[\[.*\|AfD*\]\].*\)[ \t]*\n", re.MULTILINE)

def afdsByTime(self, list): list.sort lastdate = None result = '' for afd in list: if afd.startdate != lastdate: # print 'changing lastdate', lastdate, 'to', afd.startdate lastdate = afd.startdate # 19 June (AfD,               #                CfD) datename = dateToPageName(afd.startdate) result += afd.startdate.strftime("%d %B") \ + ' (AfD,' \                         + ' CfD)' + '\n' result += '* ' + str(afd) + '\n' logfile.write('* ' + str(afd).encode(config.console_encoding, 'replace') + '\n') return result

def afdlsText(self): """Returns the AfdArticle lists in this project, also to logfile.""" logfile.write(str(self) + '\n') text = self.header + '== Open ==\n' + self.opentext text += self.afdsByTime(self.openAfds) text += '== Closed ==\n' + self.closedtext text += self.afdsByTime(self.closedAfds) # Todo: archived Afds by alphabetical order? logfile.write(text.encode(config.console_encoding, 'replace')) return text


 * 1) end class AfdlProject

class AfdArticle(object): """An article for deletion, with its article (usually but not always 1-1).""" def __init__(self, afd, article, startdate=None, closedate=None, result=None): # print afd, article, startdate, closedate, result self.article = article self.afd = afd if startdate: self.startdate = startdate else: # An approximation - assuming first edit created # print 'getting version history' edits = afd.getVersionHistory(reverseOrder = True, getAll = True) if not edits: return # an AfD must have a startdate self.startdate = parseDate(edits[0][1]) if result and closedate: self.result = result self.closedate = closedate else: # print 'getting afd' afdtext = afd.get match = Rresult.match(afdtext) if match: if result and len(result) > 0: self.result = result else: self.result = match.group('result') # print self.result if closedate: self.closedate = closedate else: self.closedate = parseDate(match.group('date')) else: self.result = self.closedate = None # print self

def __str__(self): """A console representation of the AfdArticle""" return self.afdl

def __cmp__(self, other): """Allows sorting AfdArticles. Descending order by startdate. """ return cmp(other.startdate, self.startdate)

def afdl(self): """"""       retval = '' return retval


 * 1) end class AfdArticle

def AfdArticleFromAfdlParams(afdlp): """Reads an AfdArticle from ['article', 'AfD name', 'open YYYY-MM-DD', 'close YYYY-MM-DD', 'result'].       Last 3 params optional. """ # print afdlp if not afdlp or len(afdlp) < 1: return None if len(afdlp) > 1 and len(afdlp[1]) > 0: afdname = afdlp[1] else: afdname = 'Wikipedia:Articles for deletion/' + afdlp[0] afd = wikipedia.Page(site, afdname) # if not afd.exists: return article = wikipedia.Page(site, afdlp[0]) # Any missing params will be read from the afd if len(afdlp) > 4: aa = AfdArticle(afd, article, parseDate(afdlp[2]), parseDate(afdlp[3]), afdlp[4]) elif len(afdlp) > 2: aa = AfdArticle(afd, article, parseDate(afdlp[2]), None, None) else: aa = AfdArticle(afd, article, None, None, None) # No AFD if not hasattr(aa, 'startdate'): return None return aa

class CfdCategory(AfdArticle): """Some special treatment for Categories for discussion/deletion debates."""

# Parse date and subsection out of a cfd link Rcfdlink = re.compile(r'Wikipedia:Categories for d[a-z]+/Log/(?P [A-Za-z0-9_ ]+)(#.*)?') def __init__(self, cfd, category, startdate, closedate, result, rest): # print cfd, category, startdate, closedate, result, rest self.article = category self.afd = cfd self.startdate = startdate self.closedate = closedate self.result = result self.rest = rest # any unparsed stuff if not startdate: match = CfdCategory.Rcfdlink.match(cfd.title) if match: #If not, should throw error self.startdate = parseDate(match.group('date')) else: # Throw error? return # Todo: parse result and closedate from cfd? # if result and not closedate: # self.closedate = self.startdate + timedelta(10) # Nasty hack # print self

def __str__(self): """A console representation of the CfdCategory""" # *Category:Naturally busty porn stars at Categories for discussion/Log/2007 March 8 (2007-03-08 - (2007-03-14) Delete       result = '' + self.article.title + ' at ' + self.afd.title + ' (' + str(self.startdate) + ' -' if self.closedate: result += ' ' + str(self.closedate) + '' result += ')'       if self.result:            result += " " + self.result + ""        result += self.rest        return result


 * 1) end class CfdCategory(AfdArticle)

def readAfdlProjects(projpagename): """ Reads specifications of all AfdlProjects on input page. """ projPage = followRedirects(projpagename) # Afd List:, article templates:, article categories:, talk templates: # The Afd List one is mandatory, the rest are optional Rspec = re.compile(r'==[^=]*'                      + r'^\*\s*Afd List:[^\[]*\[\[(?P [^\]]+)\]\][^\*=$]*$'                       +'[^=]*',                       re.IGNORECASE | re.MULTILINE) # Note that the category includes the word 'Category:' but the template doesn't include the # word 'Template:'. This is to match the results of the Page methods. Rtemplate = re.compile(r'\[\[Template:(?P [^\]]+)\]\]', re.IGNORECASE) Rcategory = re.compile(r'\[\[:(?P Category:[^\]]+)\]\]', re.IGNORECASE) RartCat = re.compile(r'(^\*\s*Article categories:[^\*$]*$)', re.IGNORECASE) RartTem = re.compile(r'(^\*\s*Article templates:[^\*$]*$)', re.IGNORECASE) RtalkCat = re.compile(r'(^\*\s*Talk categories:[^\*$]*$)', re.IGNORECASE) RtalkTem = re.compile(r'(^\*\s*Talk templates:[^\*$]*$)', re.IGNORECASE) for match in Rspec.finditer(projPage.get): # print match listpagename = match.group('listpage') listPage = followRedirects(listpagename) if not listPage.exists: continue articleTemplates = [] articleCategories = [] talkTemplates = [] talkCategories = [] # print 'listpage=', listpage for line in match.group.splitlines: # print line if RartCat.match(line): for cat in Rcategory.finditer(line): articleCategories.append(cat.group('category').capitalize) # print articleCategories if RartTem.match(line): for template in Rtemplate.finditer(line): articleTemplates.append(template.group('template').capitalize) # print articleTemplates if RtalkCat.match(line): for cat in Rcategory.finditer(line): talkCategories.append(cat.group('category').capitalize) # print talkCategories if RtalkTem.match(line): for template in Rtemplate.finditer(line): talkTemplates.append(template.group('template').capitalize) # print talkTemplates afdlProjects.append(AfdlProject(listPage, articleCategories, articleTemplates, talkCategories, talkTemplates))

Rafd = re.compile(r'{{\s*(?P Wikipedia:Articles for deletion/(?P [^}]*))}}')
 * 1) Regular expression that matches a "subst"ed Afd debate

def processAfdList(afdListName): """ Searches input page name for Afds of pages matching project categories and templates. """ print 'Processing', afdListName listpage = followRedirects(afdListName) if not listpage.exists: return listtext = listpage.get # print listtext for match in Rafd.finditer(listtext): # print 'match=', match.group afdname = match.group('afd') # print afdname afdtitle = match.group('title') afd = followRedirects(afdname) # print afd.linkedPages # need to follow every link, to deal with multiple nominations in one AFD checked = [] # only follow a link once per Afd for article in afd.linkedPages: # print 'article', article, 'section', article.section if article.section != None: continue if ':' in article.title: continue # mainspace pages only if article in checked: continue checked.append(article) article = followRedirects(article.title) if not article.exists: continue # print 'considering ', article # print article.templatesWithParams for (template, params) in article.templatesWithParams: if template == 'AfDM' and 'page='+afdtitle in params: talkpage = wikipedia.Page(site, 'Talk:' + article.title) for proj in afdlProjects: # check them all: even if listed in one, may be listed in many proj.checkAfdArticle(afd, article, talkpage) break # assume only one AfDM template per article

def main: args = wikipedia.handleArgs # take out the global params

readAfdlProjects(afdlBotPage) # for proj in afdlProjects: #  print proj #  print proj.afdlsText # return

if len(args) > 0: for arg in args: processAfdList(arg) else: checkdate = date.today + timedelta(days=+2) lastdate = date.today + timedelta(days=-12) while checkdate > lastdate : checkdate = checkdate + timedelta(days=-1) # Wikipedia:Articles_for_deletion/Log/2007_June_9 checkpagename = 'Wikipedia:Articles_for_deletion/Log/' + dateToPageName(checkdate) processAfdList(checkpagename)

for proj in afdlProjects: print proj, 'newOpen', proj.newOpen, 'newClosed', proj.newClosed, 'newArchived', proj.newArchived if proj.newOpen + proj.newClosed + proj.newArchived > 0: comment = '' if proj.newOpen > 0: comment += '+' + str(proj.newOpen) + ' open' if proj.newClosed > 0: if len(comment) > 0: comment += ', ' comment += '+' + str(proj.newClosed) + ' closed' if proj.newArchived > 0: if len(comment) > 0: comment += ', ' comment += '-' + str(proj.newArchived) + ' archived' comment += ' deletion discussions' print comment text = proj.afdlsText print text proj.listpage.put(text, comment, watchArticle = True, minorEdit = False)

if __name__ == "__main__": try: main finally: wikipedia.stopme logfile.close