User:HighInBC/MCP/RecoverTalkArchive

package HBCPlugins::RecoverTalkArchive; use    strict; use    Algorithm::Diff qw(diff); use    HTTP::Request; use    HTTP::Request::Common; use    LWP::UserAgent; use    XML::Simple; use    URI::Escape; use    Data::Dumper; our    $self; our    $nowiki = ('nowiki');

sub new { shift; $self = shift; bless($self);

register_method; $self->{ua} = new LWP::UserAgent; $self->{ua}->default_headers->push_header('Accept-Encoding' => 'gzip'); $self->{ua}->cookie_jar({}); &{$self->{shared}{add_job}}(\&login,0);

return $self; }

sub login { open(USER,'adminCredentials/username');sysread(USER, my $admin_username, -s(USER));close(USER); open(PASS,'adminCredentials/password');sysread(PASS, my $admin_password, -s(PASS));close(PASS); die $@ unless ($admin_username && $admin_password); print "Fetching account cookies...\n"; my $xml = $self->{ua}->request(POST 'http://en.wikipedia.org/w/api.php?action=login&format=xml', [lgname => $admin_username, lgpassword => $admin_password])->decoded_content; eval{$xml = XMLin($xml)}; (warn $@ && return) if ($@); $self->{settings}{loggedIn} = (${$xml}{login}{result} eq 'Success'); print (($self->{settings}{loggedIn}) ? ("Logged in.\n") : ("Log in failed.\n")); }

sub register_method { $self->{CommandParser} = $self->{shared}{$self->{params}{command_parser_label}} || die; $self->{CommandParser}->methodHandler('RecoverTalkArchive', \&RecoverTalkArchive); }

sub RecoverTalkArchive { my $rh_extras = shift; my $ra_settings = shift; $self->{settings} = parse_settings($ra_settings); $self->{mw} = ${$rh_extras}{'mw'}; $self->{result} = ''; &{$self->{shared}{add_job}}(\&scan_talk_page,0); }

sub parse_settings { my $ra_settings = shift; my %result; foreach my $ra_setting (@{$ra_settings}) { $result{${$ra_setting}[0]} = ${$ra_setting}[1]; } return \%result; }

sub scan_talk_page { my $batch_size = 500; my $url = 'http://en.wikipedia.org/w/api.php?action=query&format=xml&prop=revisions&rvdir=newer&rvlimit=&rvprop=timestamp|user|comment|content|ids&titles='; # my $target = uri_escape($self->{settings}{target}); my $rvstart = (($self->{settings}{start_pos}) ? ("&rvstartid=$self->{settings}{start_pos}"): ('')); $url =~ s||$rvstart|; $url =~ s||$target|; $url =~ s||$batch_size|; print "Fetching $batch_size revisions for: $self->{settings}{target}\n"; my $xml = $self->{ua}->get($url)->decoded_content; eval {$xml = XMLin($xml);}; if ($@) { die "Failed to parse XML: $@\n$xml\n"; } my $ra_revisions = ${$xml}{'query'}{'pages'}{'page'}{'revisions'}{'rev'}; $ra_revisions = [$ra_revisions] unless (ref($ra_revisions) eq 'ARRAY'); print "Got ".scalar(@{$ra_revisions})." revisions...\n"; $self->{settings}{totalRevisions} += scalar(@{$ra_revisions}); $self->{settings}{headings} ||= []; my $prev_id; my $last_timestamp; foreach my $rh_revision (@{$ra_revisions}) { my @lines = split("\n", ${$rh_revision}{'content'}); if($self->{settings}{lastpage}) { my @diffs = diff($self->{settings}{lastpage}, \@lines); foreach my $ra_hunk (@diffs) {       foreach my $ra_diff (@{$ra_hunk}) {         my($action,$content) = @{$ra_diff}[0,2]; if (($content =~ m|^==+\s*([^=]*)\s*==+\s*$|) && ($action eq '-')) {           my $heading = $1; $heading =~ s|||g; ($heading =~ s|(\{\{.*:.*\}\})|<$nowiki>$1|) if ($heading =~ m|\{\{.*:.*\}\}|); ($heading =~ s|(\{\{.*\}\})|<$nowiki>$1|) if ($heading =~ m|\{\{.*\}\}|); ($heading =~ s|image:|:Image:|i); ($heading =~ s|file:|:file:|i); my %result = (             revid     => $prev_id,              timestamp => ${$rh_revision}{'timestamp'},              comment   => ${$rh_revision}{'comment'},              user      => ${$rh_revision}{'user'},              heading   => $heading,            ); push(@{$self->{settings}{headings}},\%result); }         }        }    }    my @old_lines = split("\n",${$rh_revision}{content}); $self->{settings}{lastpage} = \@old_lines; $prev_id = ${$rh_revision}{'revid'}; $last_timestamp = ${$rh_revision}{'timestamp'}; } if (${$xml}{'query-continue'}{'revisions'}{'rvstartid'}) { $self->{settings}{start_pos} = ${$xml}{'query-continue'}{'revisions'}{'rvstartid'}; print "Next batch starts at: $self->{settings}{start_pos} ($last_timestamp)\n"; &{$self->{shared}{add_job}}(\&scan_talk_page,1); } else { push(@{$self->{settings}{headings}},{lastRevid => $prev_id, lastTimestamp => $last_timestamp}); print "No more revisions left. Writing report.\n"; &{$self->{shared}{add_job}}(\&make_report,1); } print "\n"; }

sub make_report { my $last_timestamp; my $last_rev_id;

my $title = $self->{settings}{target}; HEADING: foreach my $rh_heading (@{$self->{settings}{headings}}) { if (${$rh_heading}{lastRevid}) { $self->{result} .= "\n"; last HEADING; }   ${$rh_heading}{timestamp} =~ m|^(\d\d\d\d-\d\d-\d\d)|; my $printable_date = $1; if ($printable_date ne $last_timestamp) { $self->{result} .= "== $printable_date ==\n"; }   my $summary = ${$rh_heading}{comment}; $summary =~ s|[{}]||g;$summary =~ s|\n||g;$summary ||= 'None'; $summary =~ s|image:|:Image:|i;$summary =~ s|file:|:file:|i; $summary =~ s|__.*?__||g; if (${$rh_heading}{revid} != $last_rev_id) { $self->{result} .= ";[ Archive link for ${$rh_heading}{timestamp}] [ (diff)]\n"; $self->{result} .= ":* Removed by: User:${$rh_heading}{user}\n"; $self->{result} .= ":* Summary: \"$summary\"\n"; $self->{result} .= ":* Sections removed:\n"; }   $self->{result} .= "::* ${$rh_heading}{heading}\n"; $self->{result} .="\n"; $last_rev_id = ${$rh_heading}{revid}; $last_timestamp = $printable_date; } print "Sending report to $self->{settings}->{'reportTo'}\n"; my $page = $self->{mw}->get_page({title => $self->{settings}->{'reportTo'}});

$self->{mw}->edit( {   action        => 'edit',    title         => $self->{settings}->{'reportTo'},    basetimestamp => $page->{'timestamp'},    text          => $self->{result},    summary       => 'Posting talk page archive report.',  }) || die $self->{mw}->{error}->{code}. ': ' . $self->{mw}->{error}->{details}; print "Done.\n\n"; }

1;