User:Drinibot/CapitalizationRedirects

The following is based on pywikipedia and was done by the kind es: user Yrithinnd.

""" This bot will make direct text replacements. It will retrieve information on which pages might need changes either from an XML dump or a text file, or only change a single page.
 * 1) -*- coding: utf-8  -*-

You can run the bot with the following commandline parameters:

-xml        - Retrieve information from a local XML dump (pages_current, see               http://download.wikimedia.org). Argument can also be given as "-xml:filename". -file       - Work on all pages given in a local text file. Will read any wiki link and use these articles. Argument can also be given as "-file:filename". -cat        - Work on all pages which are in a specific category. Argument can also be given as "-cat:categoryname". -page       - Only edit a single page. Argument can also be given as "-page:pagename". You can give this parameter multiple times to edit multiple pages. -ref        - Work on all pages that link to a certain page. Argument can also be given as "-ref:referredpagename". -start      - Work on all pages in the wiki, starting at a given page. Choose "-start:!" to start at the beginning. NOTE: You are advised to use -xml instead of this option; this is              meant for cases where there is no recent XML dump. -regex      - Make replacements using regular expressions. If this argument isn't given, the bot will make simple text replacements. -except:XYZ - Ignore pages which contain XYZ. If the -regex argument is given, XYZ will be regarded as a regular expression. -fix:XYZ    - Perform one of the predefined replacements tasks, which are given in the dictionary 'fixes' defined inside this file. The -regex argument and given replacements will be ignored if              you use -fix. Currently available predefined fixes are: * HTML - convert HTML tags to wiki syntax, and fix XHTML -namespace:n - Number of namespace to process. The parameter can be used multiple times. It works in combination with all other parameters, except for the -start parameter. If you e.g. want to              iterate over all user pages starting at User:M, use -start:User:M. -always     - Don't prompt you for each replacement other:      - First argument is the old text, second argument is the new text. If the -regex argument is given, the first argument will be              regarded as a regular expression, and the second argument might contain expressions like \\1 or \g. NOTE: Only use either -xml or -file or -page, but don't mix them.

Examples:

If you want to change templates from the old syntax, e.g., to the new syntax, e.g., download an XML dump file (cur table) from http://download.wikimedia.org, then use this command:

python replace.py -xml -regex "" ""

If you have a dump called foobar.xml and want to fix typos, e.g. Errror -> Error, use this:

python replace.py -xml:foobar.xml "Errror" "Error"

If you have a page called 'John Doe' and want to convert HTML tags to wiki syntax, use: python replace.py -page:John_Doe -fix:HTML """ __version__='$Id: replace.py,v 1.87 2006/01/26 19:08:27 leogregianin Exp $'
 * 1) (C) Daniel Herding, 2004
 * 2) Distributed under the terms of the MIT license.
 * 1) Distributed under the terms of the MIT license.
 * 1) Distributed under the terms of the MIT license.

from __future__ import generators import sys, re import wikipedia, pagegenerators, catlib, config

msg = { 'es':u'Robot: Borrado masivo de artÃ­culos', }
 * 1) Summary messages in different languages
 * 2) NOTE: Predefined replacement tasks might use their own dictionary, see 'fixes'
 * 3) below.

class Drinibot: def __init__(self, generator, acceptall = False): self.generator = generator self.acceptall = False
 * 1) (C) Yrithinnd
 * 2) Class licensed under terms of the MIT license
 * 1) Class licensed under terms of the MIT license

def run(self): """       Starts the robot.        """ # Run the generator which will yield Pages which might need to be       # changed. for page in self.generator: titulo=page.title np=wikipedia.Page(wikipedia.getSite, page.title.capitalize) if not np.exists: wikipedia.output(u'%s no existe' % np.title) if not self.acceptall: choice = wikipedia.inputChoice(u'Quieres crear la redireccion asociada?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N') if choice in ['a', 'A']: self.acceptall = True if self.acceptall or choice in ['y', 'Y']: np.put(u"#REDIRECT %s" % titulo, u"capitalization redirect") else: wikipedia.output(u'%s ya existe\nNo se hace nada' % np.title)
 * 1)    def put(self, newtext, comment=None, watchArticle = None, minorEdit = True):

def main: gen = None # How we want to retrieve information on which pages need to be changed. # Can either be 'xmldump', 'textfile' or 'userinput'. source = None # Array which will collect commandline parameters. # First element is original text, second element is replacement text.

textfilename = None # the category name which will be used when source is 'category'. categoryname = None # pages which will be processed when the -page parameter is used pageNames = [] # a page whose referrers will be processed when the -ref parameter is used referredPageName = None # will become True when the user presses a ('yes to all') or uses the -always # commandline paramater. acceptall = False # Which namespaces should be processed? # default to [] which means all namespaces will be processed namespaces = [] # Which page to start startpage = None # Load default summary message. wikipedia.setAction(wikipedia.translate(wikipedia.getSite, msg))

# Read commandline parameters. for arg in sys.argv[1:]: arg = wikipedia.argHandler(arg, 'replace') if arg: if arg.startswith('-file'): if len(arg) == 5: textfilename = wikipedia.input(u'Please enter the filename:') else: textfilename = arg[6:] source = 'textfile' elif arg.startswith('-cat'): if len(arg) == 4: categoryname = wikipedia.input(u'Please enter the category name:') else: categoryname = arg[5:] source = 'category' elif arg.startswith('-page'): if len(arg) == 5: pageNames.append(wikipedia.input(u'Which page do you want to chage?')) else: pageNames.append(arg[6:]) source = 'singlepage' elif arg.startswith('-ref'): if len(arg) == 4: referredPageName = wikipedia.input(u'Links to which page should be processed?') else: referredPageName = arg[5:] source = 'ref' elif arg.startswith('-start'): if len(arg) == 6: firstPageTitle = wikipedia.input(u'Which page do you want to chage?') else: firstPageTitle = arg[7:] source = 'allpages' elif arg == '-always': acceptall = True elif arg.startswith('-namespace:'): namespaces.append(int(arg[11:])) else: commandline_replacements.append(arg)

if source == 'textfile': gen = pagegenerators.TextfilePageGenerator(textfilename) elif source == 'category': cat = catlib.Category(wikipedia.getSite, categoryname) gen = pagegenerators.CategorizedPageGenerator(cat) elif source == 'singlepage': pages = [wikipedia.Page(wikipedia.getSite, pageName) for pageName in pageNames] gen = iter(pages) elif source == 'allpages': namespace = wikipedia.Page(wikipedia.getSite, firstPageTitle).namespace gen = pagegenerators.AllpagesPageGenerator(firstPageTitle, namespace) elif source == 'ref': referredPage = wikipedia.Page(wikipedia.getSite, referredPageName) gen = pagegenerators.ReferringPageGenerator(referredPage) elif source == None or len(commandline_replacements) not in [0, 2]: # syntax error, show help text from the top of this file wikipedia.output(__doc__, 'utf-8') wikipedia.stopme sys.exit if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 20) bot = Drinibot(preloadingGen, acceptall) bot.run

if __name__ == "__main__": try: main finally: wikipedia.stopme