User:AntiVandalBot/temp

<table' end_s = '</table' ibegin = returned_html.index(begin_s) iend = returned_html.index(end_s,ibegin + 3) except ValueError: raise ServerError('Couldn\'t extract allpages special page. Make sure you\'re using the MonoBook skin.') # remove the irrelevant sections returned_html = returned_html[ibegin:iend] if self.version=="1.2": R = re.compile('/wiki/(.*?)" *class=[\'\"]printable') else: R = re.compile('title ?="(.*?)"') # Count the number of useful links on this page n = 0 for hit in R.findall(returned_html): # count how many articles we found on the current page n = n + 1 if self.version=="1.2": yield Page(self, url2link(hit, site = self, insite = self)) else: yield Page(self, hit) # save the last hit, so that we know where to continue when we               # finished all articles on the current page. Append a '!' so that # we don't yield a page twice. start = Page(self,hit).titleWithoutNamespace + '!' # A small shortcut: if there are less than 100 pages listed on this # page, there is certainly no next. Probably 480 would do as well, # but better be safe than sorry. if n < 100: break

def __repr__(self): return self.family.name+":"+self.lang

def linkto(self, title, othersite = None): if othersite and othersite.lang != self.lang: return '%s:%s' % (self.lang, title) else: return '%s' % title

def isInterwikiLink(self, s): """       Try to check whether s is in the form "foo:bar" or ":foo:bar"        where foo is a known language code or family. In such a case        we are dealing with an interwiki link.        """ s = s.lstrip(":") if not ':' in s:           return False first, rest = s.split(':',1) # interwiki codes are case-insensitive first = first.lower.strip if first in self.validLanguageLinks or (               first in self.family.known_families                and self.family.known_families[first] != self.family.name): return True return self.isInterwikiLink(rest)

def encoding(self): return self.family.code2encoding(self.lang)

def encodings(self): return self.family.code2encodings(self.lang)

def redirect(self, default = False): """       Gives the localized redirect tag for the site. Falls back        to 'REDIRECT' if the site has no special redirect tag.        """ if default: return self.family.redirect.get(self.lang, "REDIRECT") else: return self.family.redirect.get(self.lang, None)

def redirectRegex(self): """       Regular expression recognizing redirect pages        """ redirKeywords = [u'redirect'] try: redirKeywords += self.family.redirect[self.lang] except KeyError: pass txt = '(?:'+'|'.join(redirKeywords)+')' return re.compile(r'\#'+txt+'[^[]*\[\[(.*?)(\]|\|)', re.IGNORECASE)

def category_namespace(self): return self.family.category_namespace(self.lang)

def category_namespaces(self): return self.family.category_namespaces(self.lang)

def image_namespace(self, fallback = '_default'): return self.family.image_namespace(self.lang, fallback)

def template_namespace(self, fallback = '_default'): return self.family.template_namespace(self.lang, fallback)

def export_address(self): return self.family.export_address(self.lang)

def query_address(self): return self.family.query_address(self.lang)

def hostname(self): return self.family.hostname(self.lang)

def dbName(self): return self.family.dbName(self.lang)

def move_address(self): return self.family.move_address(self.lang)

def delete_address(self, s): return self.family.delete_address(self.lang, s)

def protect_address(self, s): return self.family.protect_address(self.lang, s)

def put_address(self, s): return self.family.put_address(self.lang, s)

def get_address(self, s): return self.family.get_address(self.lang, s)

def nice_get_address(self, s): return self.family.nice_get_address(self.lang, s)

def edit_address(self, s): return self.family.edit_address(self.lang, s)

def purge_address(self, s): return self.family.purge_address(self.lang, s)

def checkCharset(self, charset): if not hasattr(self,'charset'): self.charset = charset assert self.charset.lower == charset.lower, "charset for %s changed from %s to %s" % (repr(self), self.charset, charset) if self.encoding.lower != charset.lower: raise ValueError("code2encodings has wrong charset for %s. It should be %s, but is %s" % (repr(self), charset, self.encoding))

def allpages_address(self, s, ns = 0): return self.family.allpages_address(self.lang, start = s, namespace = ns)

def newpages_address(self, n=50): return self.family.newpages_address(self.lang, n)

def longpages_address(self, n=500): return self.family.longpages_address(self.lang, n)

def shortpages_address(self, n=500): return self.family.shortpages_address(self.lang, n)

def categories_address(self, n=500): return self.family.categories_address(self.lang, n)

def deadendpages_address(self, n=500): return self.family.deadendpages_address(self.lang, n)

def ancientpages_address(self, n=500): return self.family.ancientpages_address(self.lang, n)

def lonelypages_address(self, n=500): return self.family.lonelypages_address(self.lang, n)

def uncategorizedcategories_address(self, n=500): return self.family.uncategorizedcategories_address(self.lang, n)

def uncategorizedpages_address(self, n=500): return self.family.uncategorizedpages_address(self.lang, n)

def unusedcategories_address(self, n=500): return self.family.unusedcategories_address(self.lang, n)

def references_address(self, s): return self.family.references_address(self.lang, s)

def allmessages_address(self): return self.family.allmessages_address(self.lang)

def upload_address(self): return self.family.upload_address(self.lang)

def maintenance_address(self, sub, default_limit = True): return self.family.maintenance_address(self.lang, sub, default_limit)

def double_redirects_address(self, default_limit = True): return self.family.double_redirects_address(self.lang, default_limit)

def broken_redirects_address(self, default_limit = True): return self.family.broken_redirects_address(self.lang, default_limit)

def __hash__(self): return hash(repr(self))

def version(self): return self.family.version(self.lang)

def __cmp__(self, other): """Pseudo method to be able to use equality and inequality tests on          Site objects""" if not isinstance(other,Site): return 1 if self.family==other.family: return cmp(self.lang,other.lang) return cmp(self.family.name,other.family.name)

def category_on_one_line(self): return self.lang in self.family.category_on_one_line

def interwiki_putfirst(self): return self.family.interwiki_putfirst.get(self.lang,None)

def interwiki_putfirst_doubled(self,list_of_links): if self.lang in self.family.interwiki_putfirst_doubled: if len(list_of_links) >= self.family.interwiki_putfirst_doubled[self.lang][0]: list_of_links2 = [] for lang in list_of_links: list_of_links2.append(lang.language) list = [] for lang in self.family.interwiki_putfirst_doubled[self.lang][1]: try: list.append(list_of_links[list_of_links2.index(lang)]) except ValueError: pass return list else: return False else: return False

def login_address(self): return self.family.login_address(self.lang)

def watchlist_address(self): return self.family.watchlist_address(self.lang)

def getSite(self, code): return getSite(code = code, fam = self.family, user=self.user)

def namespace(self, num): return self.family.namespace(self.lang, num)

def normalizeNamespace(self, value): return self.family.normalizeNamespace(self.lang, value)

def namespaces(self): if self in _namespaceCache: return _namespaceCache[self] else: nslist = [] for n in self.family.namespaces: try: ns = self.family.namespace(self.lang, n)               except KeyError: # No default namespace defined continue if ns is not None: nslist.append(self.family.namespace(self.lang, n)) _namespaceCache[self] = nslist return nslist

def linktrail(self): return self.family.linktrail(self.lang)

def language(self): return self.lang

def fam(self): return self.family

def sitename(self): return self.family.name+':'+self.lang

def languages(self): return self.family.langs.keys def validLanguageLinks(self): return self._validlanguages

def disambcategory(self): import catlib try: return catlib.Category(self,self.namespace(14)+':'+self.family.disambcatname[self.lang]) except KeyError: raise NoPage

def getToken(self, getalways = True, getagain = False, sysop = False): if getagain or (getalways and ((sysop and not self._sysoptoken) or (not sysop and not self._token))): output(u"Getting page to get a token.") try: Page(self, "%s:Sandbox" % self.family.namespace(self.lang, 4)).get(force = True, get_redirect = True, sysop = sysop) #Page(self, "Non-existing page").get(force = True, sysop = sysop) except UserBlocked: raise except Error: pass if sysop: if not self._sysoptoken: return False else: return self._sysoptoken else: if not self._token: return False else: return self._token

def putToken(self,value, sysop = False): if sysop: self._sysoptoken = value else: self._token = value return

_sites = {} _namespaceCache = {}
 * 1) Caches to provide faster access

def getSite(code = None, fam = None, user=None): if code == None: code = default_code if fam == None: fam = default_family key = '%s:%s'%(fam,code) if key not in _sites: _sites[key] = Site(code=code, fam=fam, user=user) return _sites[key]

def setSite(site): default_code = site.language default_family = site.family

def argHandler(arg, moduleName): '''   DEPRECATED - use handleArgs instead Takes a commandline parameter, converts it to unicode, and returns it unless it is one of the global parameters as -lang or -log. If it is a global parameter, processes it and returns None.

moduleName should be the name of the module calling this function. This is   required because the -help option loads the module docstring and because the module name will be used for the filename of the log. '''   global default_code, default_family if sys.platform=='win32': # stupid Windows gives parameters encoded as windows-1252, but input # encoded as cp850 arg = unicode(arg, 'windows-1252') else: # Linux uses the same encoding for both arg = unicode(arg, config.console_encoding) if arg == '-help': showHelp(moduleName) sys.exit(0) elif arg.startswith('-family:'): global default_family default_family = arg[8:] elif arg.startswith('-lang:'): global default_code default_code = arg[6:] elif arg.startswith('-putthrottle:'): put_throttle.setDelay(int(arg[13:]),absolute = True) elif arg == '-log': activateLog('%s.log' % moduleName) elif arg.startswith('-log:'): activateLog(arg[5:]) elif arg == '-nolog': global logfile logfile = None else: return arg return None

def handleArgs: '''   Takes the commandline arguments, converts them to Unicode, processes all global parameters such as -lang or -log. Returns a list of all arguments that are not global. '''   global default_code, default_family # get commandline arguments args = sys.argv # get the name of the module calling this function. This is   # required because the -help option loads the module's docstring and because # the module name will be used for the filename of the log. # TODO: check if the following line is platform-independent moduleName = args[0][:args[0].rindex('.')] nonGlobalArgs = [] for arg in args[1:]: if sys.platform=='win32': # stupid Windows gives parameters encoded as windows-1252, but input # encoded as cp850 arg = unicode(arg, 'windows-1252') else: # Linux uses the same encoding for both arg = unicode(arg, config.console_encoding) if arg == '-help': showHelp(moduleName) sys.exit(0) elif arg.startswith('-family:'): global default_family default_family = arg[8:] elif arg.startswith('-lang:'): global default_code default_code = arg[6:] elif arg.startswith('-putthrottle:'): put_throttle.setDelay(int(arg[13:]), absolute = True) elif arg == '-log': activateLog('%s.log' % moduleName) elif arg.startswith('-log:'): activateLog(arg[5:]) elif arg == '-nolog': global logfile logfile = None else: # the argument is not global. Let the specific bot script care # about it. nonGlobalArgs.append(arg) return nonGlobalArgs


 * 1) Interpret configuration
 * 1) Interpret configuration

import wikipediatools as _wt sys.path.append(_wt.absoluteFilename('userinterfaces')) exec "import %s_interface as uiModule" % config.userinterface ui = uiModule.UI
 * 1) search for user interface module in the 'userinterfaces' subdirectory

default_family = config.family default_code = config.mylang logfile = None try: getSite except KeyError: print( u"""Please create a file user-config.py, and put in there:\n One line saying \"mylang='language'\" One line saying \"usernames['wikipedia']['language']='yy'\"\n ...filling in your username and the language code of the wiki you want to work on.\n For other possible configuration variables check config.py. """) sys.exit(1)
 * 1) Check


 * 1) Languages to use for comment text after the actual language but before
 * en:. For example, if for language 'xx', you want the preference of
 * 1) languages to be:
 * xx:, then fr:, then ru:, then en:
 * 1) you let altlang return ['fr','ru'].
 * 2) This code is used by translate below.

def altlang(code): if code=='aa': return ['am'] if code in ['fa','so']: return ['ar'] if code=='ku': return ['ar','tr'] if code=='sk': return ['cs'] if code in ['bar','hsb','ksh']: return ['de'] if code in ['als','lb']: return ['de','fr'] if code=='io': return ['eo'] if code in ['an','ast','ay','ca','gn','nah','qu']: return ['es'] if code == ['cbk-zam']: return ['es','tl'] if code=='eu': return ['es','fr'] if code in ['glk','mzn']: return ['fa','ar'] if code=='gl': return ['es','pt'] if code=='lad': return ['es','he'] if code in ['br','ht','ln','lo','nrm','vi','wa']: return ['fr'] if code in ['ie','oc']: return ['ie','oc','fr'] if code in ['co','frp']: return ['fr','it'] if code=='yi': return ['he'] if code=='sa': return ['hi'] if code in ['eml','lij','lmo','nap','pms','roa-tara','sc','scn','vec']: return ['it'] if code=='rm': return ['it','de','fr'] if code in ['bat-smg','ltg']: return ['lt'] if code=='ia': return ['la','es','fr','it'] if code=='nds': return ['nds-nl','de'] if code=='nds-nl': return ['nds','nl'] if code in ['fy','pap','vls','zea']: return ['nl'] if code=='li': return ['nl','de'] if code=='csb': return ['pl'] if code in ['fab','tet']: return ['pt'] if code in ['mo','roa-rup']: return ['ro'] if code in ['av','be','bxr','cv','hy','lbe','ru-sib','tt','udm','uk','xal']: return ['ru'] if code=='got': return ['ru','uk'] if code in ['kk','ky','tk','ug','uz']: return ['tr','ru'] if code == 'diq': return ['tr'] if code in ['ja','ko','minnan','zh','zh-cn']: return ['zh','zh-tw','zh-classical','zh-cn'] if code in ['bo','cdo','wuu','za','zh-cdo','zh-classical','zh-tw','zh-yue']: return ['zh','zh-cn','zh-classical','zh-tw'] if code=='da': return ['nb','no'] if code in ['is','no','nb','nn']: return ['no','nb','nn','da','sv'] if code=='sv': return ['da','no','nb'] if code=='se': return ['no','nb','sv','nn','fi','da'] if code in ['bug','id','jv','map-bms','ms','su']: return ['id','ms','jv'] if code in ['bs','hr','mk','sh','sr']: return ['sh','hr','sr','bs'] if code in ['ceb','pag','war']: return ['tl'] if code=='bi': return ['tpi'] if code=='tpi': return ['bi'] if code == 'new': return ['ne'] if code == 'nov': return ['io','eo'] return []

def translate(code, dict): """   Given a language code and a dictionary, returns the dictionary's value for    key 'code' if this key exists; otherwise tries to return a value for an    alternative language that is most applicable to use on the Wikipedia in    language 'code'.    The language itself is always checked first, then languages that    have been defined to be alternatives, and finally English. If none of    the options gives result, we just take the first language in the    list.    """ # If a site is given instead of a code, use its language if hasattr(code,'lang'): code = code.lang

if code in dict: return dict[code] for alt in altlang(code): if alt in dict: return dict[alt] if 'en' in dict: return dict['en'] return dict.values[0]

def showDiff(oldtext, newtext): """   Prints a string showing the differences between oldtext and newtext.    The differences are highlighted (only on Unix systems) to show which    changes were made.    """ # For information on difflib, see http://pydoc.org/2.3/difflib.html color = { '+': 10, # green '-': 12 # red }   diff = u'' colors = [] # This will store the last line beginning with + or -. lastline = None # For testing purposes only: show original, uncolored diff #    for line in difflib.ndiff(oldtext.splitlines, newtext.splitlines): #        print line for line in difflib.ndiff(oldtext.splitlines, newtext.splitlines): if line.startswith('?'): # initialize color vector with None, which means default color lastcolors = [None for c in lastline] # colorize the + or - sign lastcolors[0] = color[lastline[0]] # colorize changed parts in red or green for i in range(min(len(line), len(lastline))): if line[i] != ' ': lastcolors[i] = color[lastline[0]] diff += lastline + '\n' # append one None (default color) for the newline character colors += lastcolors + [None] elif lastline: diff += lastline + '\n' # colorize the + or - sign only lastcolors = [None for c in lastline] lastcolors[0] = color[lastline[0]] colors += lastcolors + [None] lastline = None if line[0] in ('+', '-'): lastline = line # there might be one + or - line left that wasn't followed by a ? line. if lastline: diff += lastline + '\n' # colorize the + or - sign only lastcolors = [None for c in lastline] lastcolors[0] = color[lastline[0]] colors += lastcolors + [None]

output(diff, colors = colors)

def activateLog(logname): global logfile import wikipediatools as _wt logfn = _wt.absoluteFilename('logs', logname) try: logfile = codecs.open(logfn, 'a', 'utf-8') except IOError: logfile = codecs.open(logfn, 'w', 'utf-8')

def output(text, decoder = None, colors = [], newline = True): """   Works like print, but uses the encoding used by the user's console    (console_encoding in the configuration file) instead of ASCII.    If decoder is None, text should be a unicode string. Otherwise it    should be encoded in the given encoding.

colors is a list of integers, one for each character of text. If a   list entry is None, the default color will be used for the character at that position.

If newline is True, a linebreak will be added after printing the text. """   if decoder:        text = unicode(text, decoder)    elif type(text) != type(u''):        print "DBG> BUG: Non-unicode passed to wikipedia.output without decoder!"        print traceback.print_stack        print "DBG> Attempting to recover, but please report this problem"        try:            text = unicode(text, 'utf-8')        except UnicodeDecodeError:            text = unicode(text, 'iso8859-1')    if logfile:        # save the text in a logfile (will be written in utf-8)        logfile.write(text + '\n')        logfile.flush    ui.output(text, colors = colors, newline = newline)

def input(question, colors = None): return ui.input(question, colors)

def inputChoice(question, answers, hotkeys, default = None): return ui.inputChoice(question, answers, hotkeys, default)

def showHelp(moduleName = None): # the parameter moduleName is deprecated and should be left out. moduleName = moduleName or sys.argv[0][:sys.argv[0].rindex('.')] try: moduleName = moduleName[moduleName.rindex("\\")+1:] except ValueError: # There was no \ in the module name, so presumably no problem pass globalHelp =u''' Global arguments available for all bots:

-lang:xx         Set the language of the wiki you want to work on, overriding the configuration in user-config.py. xx should be the language code.

-family:xyz      Set the family of the wiki you want to work on, e.g.                  wikipedia, wiktionary, wikitravel, ...                  This will override the configuration in user-config.py.

-log             Enable the logfile. Logs will be stored in the logs subdirectory.

-log:xyz         Enable the logfile, using xyz as the filename.

-nolog           Disable the logfile (if it is enabled by default).

-putthrottle:nn  Set the minimum time (in seconds) the bot will wait between saving pages. '''   output(globalHelp) try: exec('import %s as module' % moduleName) output(module.__doc__, 'utf-8') except: output(u'Sorry, no help available for %s' % moduleName)

def stopme: """This should be run when a bot does not interact with the Wiki, or      when it has stopped doing so. After a bot has run stopme it will       not slow down other bots any more.    """ get_throttle.drop

def debugDump(name, site, error, data): import time name = unicode(name) error = unicode(error) site = unicode(repr(site).replace(u':',u'_')) filename = '%s_%s__%s.dump' % (name, site, time.asctime) filename = filename.replace(' ','_').replace(':','-') f = file(filename, 'wb') #trying to write it in binary  #f = codecs.open(filename, 'w', 'utf-8') f.write(u'Error reported: %s\n\n' % error) try: f.write(data.encode("utf8")) except UnicodeDecodeError: f.write(data) f.close output( u'ERROR: %s caused error %s. Dump %s created.' % (name,error,filename) )