User:Qwerfjkl/scripts/cleanup.py

r'<pre.*? ',	r' ', r'', ]), re.I | re.S)

def hideText(text): global hideTokens n=111 for m in hideRegex.finditer(text): n+=1 hideTokens[n] = m.group text = text.replace(m.group, u'âŒŠâŒŠâŒŠâŒŠ%06dâŒ‹âŒ‹âŒ‹âŒ‹'%n) return text def showText(text): global hideTokens for (key, value) in hideTokens.items: text = text.replace(u'âŒŠâŒŠâŒŠâŒŠ%06dâŒ‹âŒ‹âŒ‹âŒ‹'%key, value) if re.search(ur'âŒŠâŒŠâŒŠâŒŠ\d{6,}âŒ‹âŒ‹âŒ‹âŒ‹', text): wikipedia.output("WARNING: Unable to replace all hidden tokens") raise "Please report this problem at User talk:Dispenser" hideTokens = {} # Empty return text

def main: gen = None namespaces = [] genFactory = pagegenerators.GeneratorFactory summary = "Applying general fixes for links, HTML, and/or references"

for arg in wikipedia.handleArgs: if arg == '-test' or arg.startswith('-test:'): f = open('../cgi-bin/text/%s'%(arg[6:].replace('/', '|') or 'Tests.html')) test = unicode(f.read, 'utf-8') site = wikipedia.getSite page = wikipedia.Page(site, 'Special:Snippet') page._namespace=0 # Disable cgitb disk loggging import cgitb; cgitb.enable wikipedia.output("Default site: %s"%site.sitename) result = fix(text=test, page=page) wikipedia.showDiff(test, result) import parser print(b  % (parser.parser(test).encode('utf-8'), parser.parser(result).encode('utf-8'))) wikipedia.output('\n\n== Double pass text ==') wikipedia.showDiff(result, fix(text=result, page=page)) return else: genFactory.handleArg(arg)

if not gen: gen = genFactory.getCombinedGenerator if not gen: wikipedia.showHelp('commonfixes') return for page in gen: try: page.get except wikipedia.NoPage: wikipedia.output('%s does not exist!' % page.aslink) continue except wikipedia.IsRedirectPage: wikipedia.output(u'Page %s is a redirect' % page.aslink) continue text = fix(page=page) if text != page.get: wikipedia.showDiff(page.get, text) wikipedia.setAction(summary) page.put(text) else: print(b'No changes necessary') if __name__ == "__main__" and wikipedia.handleUrlAndHeader: try: wikipedia.startContent main finally: wikipedia.endContent wikipedia.stopme