User:Demi/wikicontribs

The following script can be used to summarize an editor's contributions to Wikipedia, for example mine as of 2005-10-18 (the small numbers in parentheses are edit summaries):

Note that the script is slow because of the X(HT)ML parsing going on. Also, as written the script will usually time out when requesting the URL from mediawiki. The fix is to paste that URL into wget or something and then run wikicontribs on the resulting file.

The maximum number of contributions mediawiki will serve up per page is 5000, so you may need to download several contribution pages (adjusting the URL each time) by hand and run them all through this script (e.g. wikicontribs file1 file2 file3...).

require 'rexml/document' require 'open-uri' require 'tempfile' require 'getoptlong' $NAMESPACES = [ 'Media', 'Special', 'Talk', 'User', 'User talk', 'Wikipedia', 'Wikipedia talk', 'Image', 'Image talk', 'MediaWiki', 'MediaWiki talk', 'Template', 'Template talk', 'Help', 'Help talk', 'Category', 'Category talk' ] class Contrib attr_reader :namespace, :title, :editsummary def initialize(li) begin a, = li.get_elements('a[last]') title = a.attributes['title'] if m = title.match(/^([\s\w]+)\:(.*)$/) ns = m[1] if ns == "Talk" @namespace = "Main talk" @title = m[2] elsif $NAMESPACES.include? ns               @namespace = ns                @title = m[2] else @namespace = 'Main' @title = title end else @namespace = 'Main' @title = title end rescue NoMethodError @namespace = 'Nil' @title = 'nil' end # Does it have an edit summary? em = li.get_elements("span[@class='comment']") if ! em.empty? @editsummary = true else @editsummary = false end end end $opt = { 'limit' => 5000, 'url' => 'http://en.wikipedia.org/w/index.php', 'page' => 'Special:Contributions', 'output' => 'plain' } begin GetoptLong::new(      ['--limit',    GetoptLong::REQUIRED_ARGUMENT],       ['--url',      GetoptLong::REQUIRED_ARGUMENT],       ['--page',     GetoptLong::REQUIRED_ARGUMENT],       ['--output',   GetoptLong::REQUIRED_ARGUMENT],       ['--user',     GetoptLong::REQUIRED_ARGUMENT]    ).each { |k, v| $opt[k.sub(/^--/,'')] = v } rescue GetoptLong::InvalidOption true end contributions = { } ct = 0 summaries = 0 if $opt.key? 'user' ARGV.unshift($opt['url'] + '?title=' + $opt['page'] +      '&target=' + $opt['user'] + '&namespace=' + '&offset=0' +       '&limit=' + $opt['limit'].to_s) end ARGV.each { |file| puts "Scanning %s" % file open(file) { |fh| fh = fh.open if fh.class == Tempfile begin doc = REXML::Document::new(fh) rescue REXML::ParseException => parserr puts parserr end ul, = doc.get_elements("//div[@id='bodyContent']//ul") ul.each_element('li') { |li| ct += 1 contrib = Contrib::new(li) unless contributions.key? contrib.namespace contributions[contrib.namespace] = { 'count' => 0, 'summaries' => 0 } end contributions[contrib.namespace]['count'] += 1 if contrib.editsummary summaries += 1 contributions[contrib.namespace]['summaries'] += 1 end }   } } talkct = 0 talksummaries = 0 contributions.keys.each { |k| talkns = k + ' talk' if contributions.key? talkns talkct += contributions[k]['talk'] = contributions[talkns]['count'] talksummaries += contributions[k]['talksummaries'] = contributions[talkns]['summaries'] contributions.delete(talkns) end } sortarr = contributions.sort { |a, b| b[1]['count'] <=> a[1]['count'] } artct = 0 artsummaries = 0 if $opt['output'] == 'plain' sortarr.each { |p| k, v = p      if v.key? 'talk' puts "%s: %i (%i) / %i (%i); %i (%i)" % [k, v['count'], v['summaries'], v['talk'], v['talksummaries'], v['count'] + v['talk'], v['summaries'] + v['talksummaries']] else puts "%s: %i (%i)" % [k, v['count'], v['summaries']] end artct += v['count'] artsummaries += v['summaries'] }   puts "Total: %i (%i); %i (%i)" % [artct, artsummaries, talkct, talksummaries, ct, summaries] elsif $opt['output'] == 'wikitable' puts '{| cellpadding="0" style="border-top: 2px solid; border-bottom: 2px solid; font-size: 90%"', '| style="text-align: center; font-weight: bold; background: #eee; border-bottom: 1px solid" | Namespace', '| style="text-align: center; font-weight: bold; background: #eee; border-bottom: 1px solid" | Article', '| style="text-align: center; font-weight: bold; background: #eee; border-bottom: 1px solid" | Talk', '| style="text-align: center; font-weight: bold; background: #eee; border-bottom: 1px solid" | Total' cell = '| style="text-align: right" | %i (%i) ' sortarr.each { |p| k, v = p      puts "|-" if v.key? 'talk' puts "|#{k}", cell % [v['count'], v['summaries']], cell % [v['talk'], v['talksummaries']], cell % [v['count'] + v['talk'], v['summaries'] + v['talksummaries']], '|-'      else puts "|#{k}", cell % [v['count'], v['summaries']], '|',            cell % [v['count'], v['summaries']] '|-'      end artct += v['count'] artsummaries += v['summaries'] }   puts "|-", "|Total", cell % [artct, artsummaries], cell % [talkct, talksummaries], cell % [ct, summaries], "|}" end
 * 1) !/usr/bin/ruby