User:CobraBot/Code 3


 * 1) !/usr/bin/env python
 * 2) -*- coding: utf-8  -*-

import wikipedia import pagegenerators import re import warnings from time import sleep from contextlib import closing from sys import stdout from json import dump, load from itertools import ifilter

docuReplacements = { '&params;': pagegenerators.parameterHelp }
 * 1) This is required for the text that is shown when you run this script
 * 2) with the parameter -help.

SITE = wikipedia.getSite def pagesUsingTemplate(templateName): transclusionPageName = unicode(SITE.namespace(10)) + u":" + templateName transclusionPage = wikipedia.Page(SITE, transclusionPageName) gen = pagegenerators.ReferringPageGenerator(transclusionPage, onlyTemplateInclusion=True) return gen

def has_disambiguator(page): return u'(' in page.title

def list_redirects_to(page): return page.getReferences(follow_redirects=False,redirectsOnly=True)

def wordsRegex(words): return "(?:%s)" % ("|".join("(?:%s)" % word for word in words))

class CobraBot(object): EDIT_SUMMARY = u'Superfluous disambiguation removed per WP:NAMB (assisted editing using CobraBot; User talk:Cybercobra)' PERSON_SUMMARY = u'Person disambiguation tweaked (assisted editing using CobraBot; User talk:Cybercobra)' DABLINK = u"Dablink" DISAMBIGUATION = re.compile(u"\\{\\{[ \t]*" + wordsRegex("about dablink otheruses for the redirect this twootheruses".split + ["other uses", "two other uses"]) +"[^}]*\\}\\}(\n?)", re.IGNORECASE) DB_MOVE = "\n" OFFSET_FILE = 'N.json'

def __init__(self, debug): """       Constructor. Parameters:            * generator - The page generator that determines on which pages                          to work on.            * debug     - If True, doesn't do any real changes, but only shows                          what would have been changed.        """ self.generator = ifilter(has_disambiguator, pagesUsingTemplate(self.DABLINK)) self.debug = debug self.editCount = 0 self.log = file("skipped.log", 'a') self.log.write("BEGIN NEW SESSION\n") wikipedia.setAction(self.EDIT_SUMMARY)

def run(self): with closing(file(self.OFFSET_FILE, 'r')) as f:           N = load(f) # Set the edit summary message print "Advancing by %s..." % N       stdout.flush for i in xrange(N): next(self.generator) print "Done advancing!" stdout.flush try: for pageIndex, page in enumerate(self.generator): wikipedia.setAction(self.EDIT_SUMMARY) self.treat(page, pageIndex) finally: self.log.close with closing(file(self.OFFSET_FILE, 'w')) as f:               dump(N+pageIndex-5, f)

#########   def treat(self, page, pageIndex): """       Loads the given page, does some changes, and saves it.        """ print "==================================================================" print "PAGE TITLE:", page.title print "PAGE#:", pageIndex+1 print "EDIT COUNT:", self.editCount if page.namespace != 0: wikipedia.output(u"SKIPPING: Non-article namespace!") return try: # Load the page text = page.get except wikipedia.NoPage: wikipedia.output(u"Page %s does not exist; skipping." % page.aslink) return except wikipedia.IsRedirectPage: wikipedia.output(u"Page %s is a redirect; skipping." % page.aslink) return disams = list(re.finditer(self.DISAMBIGUATION, text)) if not disams: self.log.write("FALSE POSITIVE: "+page.title.encode('utf8')+"\n") print "FALSE POSITIVE:", page.title.encode('utf8') return print "REDIRECTS:" redirects = list(list_redirects_to(page)) print "  ", "\n    ".join([redirect.title for redirect in redirects]) norm_with_caps = page.title.split(u"(")[0].strip       normalized_title = norm_with_caps.lower        if any(redir.title.lower == normalized_title for redir in redirects):            print "***PRIMARY TOPIC REDIRECTS HERE***"        person = False        dbmove = False        while True:            print "Choose option:"            print "[0] Skip page"            for i, disamb in enumerate(disams):                lineno = text[:disamb.start].count("\n")                print "[%s] (line %s): %s" % (i+1, lineno, disamb.group.strip)            try:                input = raw_input("Enter number of your choice: ")                choice = int(input)            except ValueError:                if input == "person":                    person = True                    choice = 1                    break                if input == "dbmove":                    dbmove = True break print "Invalid input; try again." else: if choice <= len(disams): break else: print "Invalid input; try again." if dbmove: target = wikipedia.Page(SITE, norm_with_caps) text = self.DB_MOVE % page.title + target.get page = target elif choice == 0: print "SKIPPED" return else: redo = choice < 0 if choice < 0: choice = -choice choice -= 1 redact = disams[choice] if person: wikipedia.setAction(self.PERSON_SUMMARY) text = text[:redact.start] + "\n" % norm_with_caps + text[redact.end:] else: text = text[:redact.start] + text[redact.end:] # only save if something was changed if text != page.get: # Show the title of the page we're working on. # Highlight the title in purple. wikipedia.output(u"\n>>> \03{lightpurple}%s\03{default} <<<" % page.title) # show what was changed wikipedia.showDiff(page.get, text) # raw_input("Continue?") # sleep(3) if dbmove or self.debug: choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No'], ['y', 'N'], 'N') if choice == 'n': return try: # Save the page page.put(text) except wikipedia.LockedPage: wikipedia.output(u"Page %s is locked; skipping." % page.aslink) except wikipedia.EditConflict: wikipedia.output(u'Skipping %s because of edit conflict' % (page.title)) except wikipedia.SpamfilterError, error: wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (page.title, error.url)) else: self.editCount += 1 if redo: self.treat(wikipedia.Page(SITE, page.title), pageIndex)

def main: DEBUG = False bot = CobraBot(DEBUG) with warnings.catch_warnings: warnings.simplefilter("ignore") bot.run

if __name__ == "__main__": try: main finally: wikipedia.stopme