User:VWBot/source

#!/usr/bin/env python
 * 1) -*- coding: utf-8 -*-

import difflib import simplejson as json # safely retrieve json objects (and correctly handle '/' in article titles) import pickle # save arrays in files import re import sys # read/write files import time # what day is it? import urllib # read/write websites
 * 1) import string # string.atoi - variable wait when lagged

null = 0 cj = None ClientCookie = None cookielib = None try: import cookielib except ImportError: pass else: import urllib2 urlopen = urllib2.urlopen cj = cookielib.LWPCookieJar Request = urllib2.Request

if not cookielib: try: import ClientCookie except ImportError: import urllib2 urlopen = urllib2.urlopen Request = urllib2.Request else: urlopen = ClientCookie.urlopen cj = ClientCookie.LWPCookieJar Request = ClientCookie.Request

if cj != None: if cookielib: opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener)

def act (txdata, url = 'http://en.wikipedia.org/w/api.php', txheaders = {'User-agent' : 'VWBot'}): while True: # Loop so that it will continue to retry until it connects to the server, handles error occasionally thrown by server try: req = Request(url, txdata, txheaders) handle = urlopen(req) except IOError, e:			#print 'We failed to open "%s".' % url #if hasattr(e, 'code'): #	print 'We failed with error code - %s.' % e.code #elif hasattr(e, 'reason'): #	print "The error object has the following 'reason' attribute :", e.reason #	print "This usually means the server doesn't exist, is down, or we don't have an internet connection." time.sleep(5) else: return handle.read
 * 1) LOWER-LEVEL URL INTERFACE ###


 * 1) handle.info returns headers, handle.read returns the page, handle.geturl returns the true url of the page fetched (in case urlopen has followed any redirects)

def action (params): if 'url' in params: url = params['url'] del params['url'] else: url = 'http://en.wikipedia.org/w/api.php' while True: # Loop so that it passes all of the errors params['format'] = 'json' # An appropriate non-aggressive value is maxlag=5 (5 seconds), used by most of the server-side scripts. # Higher values mean more aggressive behaviour, lower values are nicer. #params['maxlag'] = 2 - impractical due to number params['assert'] = 'bot' # If we're trying to make an edit, get an edit token first and set the timestamps to recognize an edit conflict. if params['action'] == 'edit': page = action({'action': 'query', 'prop': 'info|revisions', 'intoken': 'edit', 'titles': params['title']}) params['token'] = page['query']['pages'][page['query']['pages'].keys[0]]['edittoken'] params['starttimestamp'] = page['query']['pages'][page['query']['pages'].keys[0]]['starttimestamp'] if 'revisions' in page['query']['pages'][page['query']['pages'].keys[0]].keys: # page exists params['basetimestamp'] = page['query']['pages'][page['query']['pages'].keys[0]]['revisions'][0]['timestamp'] else: # page doesn't exist params['basetimestamp'] = params['starttimestamp'] page = json.loads(act(urllib.urlencode(params), url)) # log reply file = open(time.strftime('log %Y-%m-%d.txt', time.gmtime), 'a') file.write(time.asctime(time.gmtime) + '\t' + str(page) + '\n\n') file.close # make sure we got a result if params['action'] in page.keys[0]: # if 'continue' in params['action']: if params['action'] == 'edit': time.sleep(5) return page if page['error']['code'] == 'emptynewsection': return page # We've lagged: wait the duration of the lag (or a minimum of 5 seconds) and try again #if page['error']['code'] == 'maxlag': #	time.sleep(max(5,string.atoi(page['error']['info'][page['error']['info'].find(':')+2:page['error']['info'].find('seconds')-1]))) # We've hit an edit conflict or some other unknown error. time.sleep(5)
 * 1) THIS DOES NOT ACCOUNT FOR QUERY-CONTINUE RESULTS, THESE MUST BE RE-QUERIED LATER

startTime = time.time
 * 1) @ 00:00 GMT #####
 * 1) @ 00:00 GMT #####

def login: page = action({'action': 'login', 'lgname': foo, 'lgpassword': bar}) page = action({'action': 'login', 'lgname': foo, 'lgpassword': bar, 'lgtoken': page['login']['token']}) if page['login']['result'] == 'Throttled': time.sleep(page['login']['wait']) login
 * 1) 2-STEP LOGIN #####

login

page = action({'action': 'edit', 'bot': 1, 'title': 'Wikipedia:Suspected copyright violations', 'appendtext': time.strftime('\n', time.gmtime), 'section': 0, 'summary': time.strftime('Adding new listing for %-d %B %Y (bot) (op)', time.gmtime)})
 * 1) TASK 1 #####
 * 2) TASK 2 - backlogSCV

page = action({'action': 'query', 'list': 'categorymembers', 'cmtitle': 'Category:Articles tagged for copyright problems', 'cmlimit': 'max'}) blankedPages = [] for i in page['query']['categorymembers']: blankedPages.append(i['title'])
 * 1) TASK 3 #####

file = open('todayLogCopyvio', 'rb') # pull up the previous run alreadyBlankedPages = pickle.load(file) file.close file = open('yesterdayLogCopyvio', 'wb') # overwrite yesterday's log with today's now that we have the change in articles pickle.dump(alreadyBlankedPages, file) file.close file = open('todayLogCopyvio', 'wb') # save log so it can be compared to the next run pickle.dump(blankedPages, file) file.close newBlankedPages = [] for x in blankedPages: if x not in alreadyBlankedPages: newBlankedPages.append(x) # now we have our list to run searches for

for i in newBlankedPages: if i[:5] == 'File:': newBlankedPages.remove(i) # also need to report elsewhere - list at WP:PUF?

page = action({'action': 'query', 'list': 'embeddedin', 'eititle': 'Template:Close paraphrasing', 'eilimit': 'max'}) closeParaphrases = [] for i in page['query']['embeddedin']: closeParaphrases.append(i['title'])
 * 1) TASK 5 #####

file = open('todayLogCloseParaphrasing', 'rb') # pull up the previous run oldCloseParaphrases = pickle.load(file) file.close file = open('yesterdayLogCloseParaphrasing', 'wb') # overwrite yesterday's log with today's now that we have the change in articles pickle.dump(oldCloseParaphrases, file) file.close file = open('todayLogCloseParaphrasing', 'wb') # save log so it can be compared to the next run pickle.dump(closeParaphrases, file) file.close

newCloseParaphrases = [] for x in closeParaphrases: if x not in oldCloseParaphrases: newCloseParaphrases.append(x) # now we have our list to run searches for

page = action({'action': 'query', 'list': 'embeddedin', 'eititle': 'Template:Copypaste', 'eilimit': 'max'}) copyPastes = [] for i in page['query']['embeddedin']: copyPastes.append(i['title'])
 * 1) TASK 10 #####

file = open('todayLogCopypaste', 'rb') # pull up the previous run oldCopyPastes = pickle.load(file) file.close file = open('yesterdayLogCopypaste', 'wb') # overwrite yesterday's log with today's pickle.dump(oldCopyPastes, file) file.close file = open('todayLogCopypaste', 'wb') # save the new log so it can be compared to the next run tomorrow pickle.dump(copyPastes, file) file.close

newCopyPastes = [] for x in copyPastes: if x not in oldCopyPastes: newCopyPastes.append(x) # now we have our list to run searches for

while time.time - startTime < 600: # no earlier than 00:10 GMT time.sleep(600 - (time.time - startTime))
 * 1) @ 00:10 GMT #####
 * 1) @ 00:10 GMT #####

page = action({'action': 'edit', 'bot': 1, 'title': 'Wikipedia:Copyright problems/NewListings', 'text': time.strftime('\n\n\n\n\n\n\n') p4 = re.compile('====.*====')
 * 1) always update NewListings - this is only needed so Zorglbot doesn't screw up; has no actual effect

page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': time.strftime('Wikipedia:Copyright problems/%Y %B %-d', time.gmtime), 'rvlimit': 1})

if 'missing' in page['query']['pages'][page['query']['pages'].keys[0]]: # CREATE AND POPULATE "BOT: Automatic creation of new daily page for copyright problems" page = action({'action': 'edit', 'bot': 1, 'title': time.strftime('Wikipedia:Copyright problems/%Y %B %-d', time.gmtime), 'text': '\n\n', 'summary': 'Automatic creation of new daily page for copyright problems including automated findings (bot) (op)'}) page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': 'Wikipedia:Copyright problems', 'rvlimit': 1}) newtext = page['query']['pages'][page['query']['pages'].keys[0]]['revisions'][0]['*'].replace('\n\n===New listings===', time.strftime('\n\n\n===New listings===', time.gmtime(time.time-60*60*192))) page = action({'action': 'edit', 'bot': 1, 'title': 'Wikipedia:Copyright problems', 'text': newtext.encode('utf-8'), 'summary': 'Automatic archiving of listings older than 7 days (bot) (op)'}) elif not re.search(p3, page['query']['pages'][page['query']['pages'].keys[0]]['revisions'][0]['*']): # POPULATE "adding CorenSearchBot findings" page = action({'action': 'edit', 'bot': 1, 'title': time.strftime('Wikipedia:Copyright problems/%Y %B %-d', time.gmtime), 'text': page['query']['pages'][page['query']['pages'].keys[0]]['revisions'][0]['*'].replace(re.search(p4, page['query']['pages'][page['query']['pages'].keys[0]]['revisions'][0]['*']).group,''), 'summary': 'Adding automated findings (bot) (op)'})
 * 1) group new page creation AND old page archival

def isAlreadyListed(title): page = action({'action': 'query', 'list': 'backlinks', 'bltitle': title.encode('utf-8'), 'bllimit': 'max', 'blfilterredir': 'redirects'}) page['query']['backlinks'].append({'title': title}) for i in page['query']['backlinks']: page = action({'action': 'query', 'list': 'backlinks', 'bltitle': i['title'].encode('utf-8'), 'bllimit': 'max', 'blnamespace': '4'}) for j in page['query']['backlinks']: if 'Wikipedia:Copyright problems' == j['title'] or 'Wikipedia:Suspected copyright violations' == j['title'] or 'Wikipedia:Copyright problems/NewListings' == j['title']: return True return False
 * 1) TASKS 3, 5, 7 and 10 #####


 * 1) replace NewListings check with one for each of the 8 always-listed days ???

def shouldBeRelisted(title): page = action({'action': 'query', 'list': 'backlinks', 'bltitle': title.encode('utf-8'), 'bllimit': 'max', 'blfilterredir': 'redirects'}) page['query']['backlinks'].append({'title': title}) wasListed = False isListed = False for i in page['query']['backlinks']: page = action({'action': 'query', 'list': 'backlinks', 'bltitle': i['title'].encode('utf-8'), 'bllimit': 'max', 'blnamespace': '4'}) for j in page['query']['backlinks']: if 'Wikipedia:Suspected copyright violations/' in j['title'] or 'Wikipedia:Copyright problems/' in j['title']: wasListed = True if 'Wikipedia:Copyright problems' == j['title'] or 'Wikipedia:Suspected copyright violations' == j['title'] or 'Wikipedia:Copyright problems/NewListings' == j['title']: isListed = True if wasListed and not isListed: return True return False


 * 1) replace NewListings check with one for each of the 8 always-listed days ???

addtext = '' p0 = re.compile('', re.IGNORECASE | re.DOTALL) p1 = re.compile('', re.IGNORECASE | re.DOTALL) # gets and p1a = re.compile('\|\W*free\W*=\W*yes', re.IGNORECASE | re.DOTALL) # is source free?

for i in newCloseParaphrases: if not isAlreadyListed(i): page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1}) if 'missing' not in page['query']['pages'][page['query']['pages'].keys[0]]: pageSource = page['query']['pages'][page['query']['pages'].keys[0]]['revisions'][0]['*'] if re.search(p0, pageSource): # could be tag removed before it's analyzed temp = re.search(p0, pageSource).group tag = re.search(p1, temp) if not re.search(p1a, temp): # only list at WP:CP if non-free if tag: if '|' in tag.group[tag.group.find('source') + tag.group[tag.group.find('source'):].find('='):]: addtext += '* :' + i + ' Close paraphrase of ' + tag.group[tag.group.find('source') +\ tag.group[tag.group.find('source'):].find('=') + 1:tag.group.find('source') + tag.group[tag.group.find('source'):].find('=') +\ tag.group[tag.group.find('source') + tag.group[tag.group.find('source'):].find('='):].find('|')].strip + '. ~\n' else: addtext += '* :' + i + ' Close paraphrase of ' +\ tag.group[tag.group.find('source') + tag.group[tag.group.find('source'):].find('=') + 1:-2].strip + '. ~\n' else: addtext += '* :' + i + ' Close paraphrase. ~\n'

moretext = '' p2 = re.compile('', re.IGNORECASE | re.DOTALL)

for i in newBlankedPages: if not isAlreadyListed(i): page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1}) if 'missing' not in page['query']['pages'][page['query']['pages'].keys[0]]: pageSource = page['query']['pages'][page['query']['pages'].keys[0]]['revisions'][0]['*'] tag = re.search(p2, pageSource) if tag: if '|' in tag.group[tag.group.find('url') + tag.group[tag.group.find('url'):].find('='):]: moretext += '* :' + i + ' from ' + tag.group[tag.group.find('url') +\ tag.group[tag.group.find('url'):].find('=') + 1:tag.group.find('url') + tag.group[tag.group.find('url'):].find('=') +\ tag.group[tag.group.find('url') + tag.group[tag.group.find('url'):].find('='):].find('|')].strip + '. Nomination completed by ~\n' else: moretext += '* :' + i + ' from ' +\ tag.group[tag.group.find('url') + tag.group[tag.group.find('source'):].find('=') + 1:-2].strip + '. Nomination completed by ~\n' else: moretext += '* :' + i + ' Nomination completed by ~\n'

CopyPasteText = '' p5 = re.compile('||', re.IGNORECASE | re.DOTALL) p6 = re.compile('|', re.IGNORECASE | re.DOTALL)

for i in newCopyPastes: if not isAlreadyListed(i): page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1}) if 'missing' not in page['query']['pages'][page['query']['pages'].keys[0]]: pageSource = page['query']['pages'][page['query']['pages'].keys[0]]['revisions'][0]['*'] if re.search(p5, pageSource): # could be tag removed before it's analyzed temp = re.search(p5, pageSource).group tag = re.search(p6, temp) if tag: if '|' in tag.group[tag.group.find('url') + tag.group[tag.group.find('url'):].find('='):]: CopyPasteText += '* :' + i + ' Copied and pasted from ' + tag.group[tag.group.find('url') +\ tag.group[tag.group.find('url'):].find('=') + 1:tag.group.find('url') + tag.group[tag.group.find('url'):].find('=') +\ tag.group[tag.group.find('url') + tag.group[tag.group.find('url'):].find('='):].find('|')].strip + '. ~\n' else: CopyPasteText += '* :' + i + ' Copied and pasted from ' +\ tag.group[tag.group.find('url') + tag.group[tag.group.find('url'):].find('=') + 1:-2].strip + '. ~\n' else: CopyPasteText += '* :' + i + ' Copied and pasted. ~\n'

evenmoretext = '' for i in blankedPages: if i in alreadyBlankedPages and shouldBeRelisted(i): # need to check alreadyBlankedPages as there is a delay between transclusion and backlinks page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1}) if 'missing' not in page['query']['pages'][page['query']['pages'].keys[0]]: pageSource = page['query']['pages'][page['query']['pages'].keys[0]]['revisions'][0]['*'] tag = re.search(p2, pageSource) if tag: if '|' in tag.group[tag.group.find('url') + tag.group[tag.group.find('url'):].find('='):]: evenmoretext += '* :' + i + ' from ' + tag.group[tag.group.find('url') +\ tag.group[tag.group.find('url'):].find('=') + 1:tag.group.find('url') + tag.group[tag.group.find('url'):].find('=') +\ tag.group[tag.group.find('url') + tag.group[tag.group.find('url'):].find('='):].find('|')].strip + '. Relisting. ~\n' else: evenmoretext += '* :' + i + ' from ' +\ tag.group[tag.group.find('url') + tag.group[tag.group.find('source'):].find('=') + 1:-2].strip + '. Relisting. ~\n' else: evenmoretext += '* :' + i + ' Relisting. ~\n'
 * 1) NOW FOR THE RELISTINGS ###

for i in copyPastes: if i in oldCopyPastes and shouldBeRelisted(i): page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1}) if 'missing' not in page['query']['pages'][page['query']['pages'].keys[0]]: pageSource = page['query']['pages'][page['query']['pages'].keys[0]]['revisions'][0]['*'] temp = re.search(p5, pageSource).group tag = re.search(p6, temp) if tag: if '|' in tag.group[tag.group.find('url') + tag.group[tag.group.find('url'):].find('='):]: CopyPasteText += '* :' + i + ' Copied and pasted from ' + tag.group[tag.group.find('url') +\ tag.group[tag.group.find('url'):].find('=') + 1:tag.group.find('url') + tag.group[tag.group.find('url'):].find('=') +\ tag.group[tag.group.find('url') + tag.group[tag.group.find('url'):].find('='):].find('|')].strip + '. Relisting. ~\n' else: evenmoretext += '* :' + i + ' Copied and pasted from ' +\ tag.group[tag.group.find('url') + tag.group[tag.group.find('url'):].find('=') + 1:-2].strip + '. Relisting. ~\n' else: evenmoretext += '* :' + i + ' Copied and pasted. Relisting. ~\n'

for i in closeParaphrases: if i in oldCloseParaphrases and shouldBeRelisted(i): # need to check alreadyBlankedPages as there is a delay between transclusion and backlinks page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1}) if 'missing' not in page['query']['pages'][page['query']['pages'].keys[0]]: pageSource = page['query']['pages'][page['query']['pages'].keys[0]]['revisions'][0]['*'] temp = re.search(p0, pageSource).group tag = re.search(p1, temp) if not re.search(p1a, temp): # only list at WP:CP if non-free if tag: if '|' in tag.group[tag.group.find('source') + tag.group[tag.group.find('source'):].find('='):]: evenmoretext += '* :' + i + ' Close paraphrase of ' + tag.group[tag.group.find('source') +\ tag.group[tag.group.find('source'):].find('=') + 1:tag.group.find('source') + tag.group[tag.group.find('source'):].find('=') +\ tag.group[tag.group.find('source') + tag.group[tag.group.find('source'):].find('='):].find('|')].strip + '. Relisting. ~\n' else: evenmoretext += '* :' + i + ' Close paraphrase of ' +\ tag.group[tag.group.find('source') + tag.group[tag.group.find('source'):].find('=') + 1:-2].strip + '. Relisting. ~\n' else: evenmoretext += '* :' + i + ' Close paraphrase. Relisting. ~\n'


 * 1) addtext should be CloseParaphraseText
 * 2) moretext should be CopyvioText
 * 3) evenmoretext should be RelistText

editsum = '' if len(addtext) + len(moretext) + len(evenmoretext) + len(CopyPasteText): if len(addtext): if len(moretext): if len(evenmoretext): if len(CopyPasteText): editsum = 'Adding incomplete nominations, copy/pastes, close paraphrases and relisting overlooked pages' else: editsum = 'Adding incomplete nominations, close paraphrases and relisting overlooked pages' elif len(CopyPasteText): editsum = 'Adding incomplete nominations, copy/pastes and close paraphrases' else: editsum = 'Adding incomplete nominations and close paraphrases' elif len(evenmoretext): if len(CopyPasteText): editsum = 'Adding copy/pastes, close paraphrases and relisting overlooked pages' else: editsum = 'Adding close paraphrases and relisting overlooked pages' elif len(CopyPasteText): editsum = 'Adding copy/pastes and close paraphrases' else: editsum = 'Adding close paraphrases' elif len(moretext): if len(evenmoretext): if len(CopyPasteText): editsum = 'Adding incomplete nominations, copy/pastes and relisting overlooked pages' else: editsum = 'Adding incomplete nominations and relisting overlooked pages' elif len(CopyPasteText): editsum = 'Adding incomplete nominations and copy/pastes' else: editsum = 'Adding incomplete nominations' elif len(evenmoretext): if len(CopyPasteText): editsum = 'Adding copy/pastes and relisting overlooked pages' else: editsum = 'Relisting overlooked pages' else: editsum = 'Adding copy/pastes'

if len(editsum): page = action({'action': 'edit', 'bot': 1, 'title': time.strftime('Wikipedia:Copyright problems/%Y %B %-d', time.gmtime(time.time-60*60*24)), 'appendtext': (u'\n' + moretext + CopyPasteText + addtext + evenmoretext).encode('utf-8'), 'section': 2, 'summary': editsum + ' (bot) (op)'})


 * 1) USERSPACE TRIALS #####
 * 1) USERSPACE TRIALS #####


 * 1) TASK 4: notify authors that their pages have been blanked (by ) in case they aren't notified by the taggers, so that the pages don't get relisted for an extra week without any action being taken on them  #####

def doNotify(title): page = action({'action': 'query', 'list': 'backlinks', 'bltitle': title.encode('utf-8'), 'bllimit': 'max', 'prop': 'revisions|info', 'rvprop': 'timestamp|user', 'rvdir': 'newer', 'titles': title.encode('utf-8'), 'rvlimit': 1, 'blredirect': 1}) # get backlinks and creation time/user as well as info to determine if it's deleted if 'missing' in page['query']['pages'][page['query']['pages'].keys[0]]: return "Do Nothing Article has been deleted." for i in page['query']['backlinks']: # check for CCIs if i['title'][:47] == 'Wikipedia:Contributor copyright investigations/': return "Do Nothing CCI' elif i['title'][:14] == 'Wikipedia:CCI/': return "Do Nothing CCI' if 'redirlinks' in i:			for j in i['redirlinks']: if j['title'][:47] == 'Wikipedia:Contributor copyright investigations/': return "Do Nothing CCI' elif j['title'][:14] == 'Wikipedia:CCI/': return "Do Nothing CCI' for i in page['query']['backlinks']: # parse talk pages to see if already notified if i['title'][:10] == 'User talk:': page2 = action({'action': 'parse', 'page': i['title'], 'prop': 'sections'}) for j in page2['parse']['sections']: if j['line'] == 'Copyright problem: ' + title: # need to see if it matches a redirect title too... :(					return "Do Nothing " + i['title'][10:] + ' already notified'	page = action({'action': 'query', 'prop': 'categories', 'clcategories': 'Category:Items pending OTRS confirmation of permission|Category:Wikipedia pages with unconfirmed permission received by OTRS|Category:Wikipedia files with unconfirmed permission received by OTRS|Category:Items with OTRS permission confirmed', 'titles': 'Talk:'+title.encode('utf-8')})	if 'categories' in page['query']['pages'][page['query']['pages'].keys[0]]:		return "Do Nothing OTRS tag"	page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'ids|user', 'titles': title.encode('utf-8'), 'rvlimit': 'max'})	articleRevisionIDs = []	for i in page['query']['pages'][page['query']['pages'].keys[0]]['revisions']:		articleRevisionIDs.append(i['revid'])	revisionMatch = []	latest = ''	for i in articleRevisionIDs:		page = action({'action': 'query', 'prop': 'revisions', 'rvstartid': i, 'rvprop': 'content|user|timestamp', 'titles': title.encode('utf-8'), 'rvlimit': 1}) if i == articleRevisionIDs[0]: # maybe ??? tagger = page['query']['pages'][page['query']['pages'].keys[0]]['revisions'][0]['user'] # maybe ??? tagtime = page['query']['pages'][page['query']['pages'].keys[0]]['revisions'][0]['timestamp'] # maybe ?? if '*' in page['query']['pages'][page['query']['pages'].keys[0]]['revisions'][0].keys: # ignore deleted revisions if latest == '': latest = page['query']['pages'][page['query']['pages'].keys[0]]['revisions'][0]['*'] if '{{Copyviocore' in page['query']['pages'][page['query']['pages'].keys[0]]['revisions'][0]['*']: tagger = page['query']['pages'][page['query']['pages'].keys[0]]['revisions'][0]['user'] tagtime = page['query']['pages'][page['query']['pages'].keys[0]]['revisions'][0]['timestamp'] revisionMatch.append(difflib.SequenceMatcher(None, latest[latest.find('\n')+108:latest.find(' ')], page['query']['pages'][page['query']['pages'].keys[0]]['revisions'][0]['*']).ratio) diffRevisionMatch = [] for i in range(len(revisionMatch)): if i < len(revisionMatch)-1: diffRevisionMatch.append(round(revisionMatch[i]-revisionMatch[i+1], 6)) else: diffRevisionMatch.append(round(revisionMatch[i], 6)) page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': title.encode('utf-8'), 'rvlimit': 1, 'rvstartid': articleRevisionIDsi for i, x in enumerate(diffRevisionMatch) if x == max(diffRevisionMatch)][0}) contributor = page['query']['pages'][page['query']['pages'].keys[0]]['revisions'][0]['user'] # CHECK FOR CUSTOM NOTIFICATION #tagger at User talk:contributor > tagtime page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': 'User talk:' + contributor.encode('utf-8'), 'rvend': tagtime, 'rvlimit': 'max'}) if 'revisions' in page['query']['pages'][page['query']['pages'].keys[0]]: for i in page['query']['pages'][page['query']['pages'].keys[0]]['revisions']: if i['user'] == tagger: return "Do Nothing " + contributor + ' was left a custom notification' #contributor at Talk:Article/Temp page > tagtime page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': 'Talk:' + title.encode('utf-8') + '/Temp', 'rvend': tagtime, 'rvlimit': 'max'}) if 'revisions' in page['query']['pages'][page['query']['pages'].keys[0]]: for i in page['query']['pages'][page['query']['pages'].keys[0]]['revisions']: if i['user'] == contributor: return "Do Nothing " + contributor + ' created the temporary page' #contributor at Talk:Article > tagtime page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': 'Talk:' + title.encode('utf-8'), 'rvend': tagtime, 'rvlimit': 'max'}) if 'revisions' in page['query']['pages'][page['query']['pages'].keys[0]]: for i in page['query']['pages'][page['query']['pages'].keys[0]]['revisions']: if i['user'] == contributor: return "Do Nothing " + contributor + ' edited the article talk page after it was tagged' #contributor at Article > tagtime page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': title.encode('utf-8'), 'rvend': tagtime, 'rvlimit': 'max'}) if 'revisions' in page['query']['pages'][page['query']['pages'].keys[0]]: for i in page['query']['pages'][page['query']['pages'].keys[0]]['revisions']: if i['user'] == contributor: return "Do Nothing " + contributor + ' edited the article after it was tagged' return "Notify contributor: """ + contributor + ' - tagged by ' + tagger


 * 1) narrowing with 'blnamespace': '3|4' breaks the blredirect parameter
 * 2) BETTER BUGFIX - try narrowed backlinks, then get list of redirects ONLY, then get backlinks for each redirect
 * 3) look for 'Copyright problem: '

article = '' for i in newBlankedPages: article += '*' + i + ' - ' + doNotify(i) + '\n'
 * 1) list of all blanked pages

page = action({'action': 'edit', 'bot': 1, 'title': 'User:VWBot/Trial', 'text': (article + '\n').encode('utf-8'), 'section': 'new', 'summary': time.strftime('== %-d %B %Y ==', time.gmtime)})

page = action({'action': 'query', 'list': 'usercontribs', 'ucuser': 'CorenSearchBot', 'uclimit': 'max', 'ucstart': time.strftime('%Y-%m-%dT23:59:59Z', time.gmtime(time.time-60*60*24)), 'ucend': time.strftime('%Y-%m-%dT00:00:00Z', time.gmtime(time.time-60*60*24)), 'ucnamespace': '3'})
 * 1) TASK 6: flag when a contributor gets a CorenSearchBot/VWBot notice if he has had a significant amount before #####
 * 2) CSBot's user talk contribs from 00:00:00 to 23:59:59 the previous day

users = {} for i in page['query']['usercontribs']: users[i['title']] = []

page = action({'action': 'query', 'list': 'usercontribs', 'ucuser': 'VWBot', 'uclimit': 'max', 'ucstart': time.strftime('%Y-%m-%dT23:59:59Z', time.gmtime(time.time-60*60*24)), 'ucend': time.strftime('%Y-%m-%dT00:00:00Z', time.gmtime(time.time-60*60*24)), 'ucnamespace': '3'})
 * 1) VWBot's user talk contribs from 00:00:00 to 23:59:59 the previous day

for i in page['query']['usercontribs']: users[i['title']] = []

for i in ['Merovingian', u'Leszek Jańczuk', 'Ganeshbot', 'Starzynka', 'Ser Amantio di Nicolao', 'Kumioko', 'Packerfansam', 'Alan Liefting']: try: del users['User talk:' + i]	except: pass

for user in users.keys: # only checks last 5,000 edits page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'comment|timestamp|user', 'titles': user.encode('utf-8'), 'rvlimit': 'max'}) for i in page['query']['pages'][page['query']['pages'].keys[0]]['revisions']: if 'user' in i: # needed because RevDelete can return edits with no user field...apparently if i['user'] == 'VWBot' or i['user'] == 'CorenSearchBot': users[user].append([i['comment'][i['comment'].find('on')+3:], time.strftime('%Y %B %-d', time.strptime(i['timestamp'],'%Y-%m-%dT%H:%M:%SZ'))])

addition = u'' for user in users.keys: if len(users[user]) > 4: addition += '\n==== ' + str(len(users[user])) + ': {{User|1=' + user[10:] + '}} ====\n{{Collapse top|Tagged articles}}\n' for i in users[user]: addition += '* {{subst:article-cv|' + i[0] + '}} created on ' + i[1] + '\n' addition += '{{Collapse bottom}}\n'

if len(addition): page = action({'action': 'edit', 'bot': 1, 'title': 'User:VWBot/Trial', 'appendtext': (u'\n\n=== Task 6 ===' + addition).encode('utf-8'), 'summary': 'Listing users who have had multiple articles tagged by CorenSearchBot/VWBot (bot) (op)'})