User:Plastikspork/tfd helper.pl


 * 1) !/usr/bin/perl

use strict;
 * 1) Insist that all variables be declared

use utf8;
 * 1) Allow for utf8 characters

use MediaWiki::Bot;
 * 1) Use the MediaWiki::Bot library

use Encode;
 * 1) Use the Encode library

my ($user, $pass) = ("",""); my ($task, $edit_summary, $is_minor) = ("","",""); my (@pagelist, @tasklist, @templatelist, @regexplist); my ($namespace_string, $templates) = ("","");
 * 1) Declare some global variables

$is_minor = 1; # Label edits as minor $user = 'SporkBot'; $edit_summary = 'Orphan per TFD outcome'; $task = 0;
 * 1) Set default values

my @tasklist = ( "Remove a template and trailing whitespace and newline",  "Replace a template and whitespace with a single space",  "Replace a template with nothing",  "Substitute a template and use subst=subst:",  "Substitute a template" );

print "What task is being performed?\n"; for( my $i=0; $i <= $#tasklist; $i++) { print $i .") ". $tasklist[$i] ."\n"; } print "Press enter for default [".$task."]\n"; print "Task: "; $task = &my_get_response($task);
 * 1) Do we want to orphan or substitute?

print "Which template(s) would you like to orphan?\n"; print "For a list, delimit with commas\n"; print "Template: "; $templates = &my_get_response("BLANK RESPONSE");
 * 1) Get the name of the template to orphan

$templates =~ s/[_ ]+/ /g; $templates =~ s/^ [ ]*//; $templates =~ s/ [ ]*$//;
 * 1) Replace underscores and excess spaces

$templates =~ s/[ ]*[Tt]emplate[ ]*:[ ]*//;
 * 1) Strip off leading "Template:" and

@templatelist = split /[ ]*,[ ]*/, $templates;
 * 1) Create the template orphan list

$namespace_string = &my_get_namespace_list;
 * 1) Set the namespace

print "Username [".$user."]: "; $user = &my_get_response($user);
 * 1) Get username

print "Password [".$pass."]: "; $pass = &my_get_response($pass);
 * 1) Get password

my $editor=MediaWiki::Bot->new($user); $editor->set_wiki('en.wikipedia.org','w');
 * 1) Create an editor object on the English language WP

$editor->{debug} = 1;
 * 1) Turn debugging on, to see what the script is doing

if ( $editor->login($user, $pass) ) { print "Failed to login!\n"; exit; }
 * 1) Log in

my @plist = $editor->what_links_here_ns("Template:".$templatelist[0],4); foreach my $pagestruct (@plist) { # Only want links, not transclusions if ( $pagestruct->{'type'} =~ /^$/ ) { # Get the actual page name from the page structure my $page = $pagestruct->{'title'}; if ( $page =~ /\/log\//i ) { if ( ($task == 3) or ($task == 4) ) { $edit_summary = "Replace template per ". "TFD outcome; no change in content"; } else { $edit_summary = "Remove template per ". "TFD outcome"; }   }  } }
 * 1) Attempt to get the link to the TFD in question, using first on list
 * 1) Loop over all pages in the page list

print "The current edit summary is set to:\n$edit_summary\n"; print "Provide a new edit summary, or press enter to use the default\n"; print "Summary: "; $edit_summary = &my_get_response($edit_summary);
 * 1) Allow user to override edit summary

foreach (@templatelist) { my $str = $_; my @rlist = $editor->what_links_here_ns("Template:".$str, 10); # Escape any characters used in regular expressions $str =~ s/([\-\.\[\]])/\\$1/gi; # First character is case insensitive $str =~ s/^([A-Za-z])(.*)$/[\u$1\l$1]$2/; # Wikipedia allows for underscores and spaces in template references $str =~ s/ /[_ ]+/gi; # Add template to the list push @regexplist, $str;
 * 1) Create a list of regular expressions

# Add redirects to the list foreach (@rlist) { my $tstruct = $_; # Only want redirects, not links or transclusions if( $tstruct->{'type'} =~ /redirect/ ) { $str = $tstruct->{'title'}; # Remove leading Template: in name $str =~ s/^Template://; # Escape any characters used in regular expressions $str =~ s/([\-\.\[\]])/\\$1/gi; # First character is case insensitive $str =~ s/^([A-Za-z])(.*)$/[\u$1\l$1]$2/; # Wikipedia allows for underscores and spaces in template references $str =~ s/ /[_ ]+/gi; push @regexplist, $str; } } }

foreach my $template (@templatelist) { if ($namespace_string) { # Loop over each namespace and get the list of transclusions foreach my $ns (split /\&/, $namespace_string) { if ( $ns =~ /ns([0-9]+)[=]1/ ) { my @alist = $editor->what_links_here_ns("Template:".$template, $1); foreach my $pagestruct (@alist) { # Only want transclusions, not links if ( $pagestruct->{'type'} =~ /transclusion/ ) { push @pagelist, $pagestruct->{'title'}; }    }      }    }  } else { # Get the list of transclusions in all namespaces my @alist = $editor->what_links_here_ns("Template:".$template, ''); foreach my $pagestruct (@alist) { # Only want transclusions, not links if ( $pagestruct->{'type'} =~ /transclusion/ ) { push @pagelist, $pagestruct->{'title'}; }   }  } }
 * 1) Get the list of all transclusions in the specified namespaces

@pagelist = sort(@pagelist); my $prev = "not ".$pagelist[0]; @pagelist = grep($_ ne $prev && ($prev = $_, 1), @pagelist);
 * 1) Remove duplicates from the page list

if( &my_confirm_job ) { # Job not confirmed, so exit exit; }
 * 1) Confirm job paramters before starting

foreach my $page (@pagelist) { # Show which pages are being processed print "Processing: ".$page."\n"; # Pull the wikitext for the page my $text=$editor->get_text($page); # Save the old wikitext for diff my $old = $text;
 * 1) Loop over all pages in the page list

# Split the page into chunks to isolate comments my @chunks = split(//s, $text);

# Loop over sections foreach $text (@chunks) { if ($text =~ /<!--/) { # Skip chunks consisting of only comments } else { foreach my $str (@regexplist) { if ($task == 0) { # Remove the template and trailing spaces/newlines $text =~ s/)[ ]*[\r\n]?//g;       } elsif ($task == 1) {          # Replace the template with whitespace          $text =~ s/[ ]*)[ ]*/ /g; } elsif ($task == 2) { # Replace the template with nothing $text =~ s/)//g;       } elsif ($task == 3) {          # Substitute the template with subst=subst:          $text =~ s/)/{{subst:$1|subst=subst:$2/g; } elsif ($task == 4) { # Substitute the template $text =~ s/)/{{subst:$1$2/g;       }      }    }  }  $text = join('', @chunks);

&my_simple_diff($old,$text);

print "Press enter to submit, s to skip, or ctrl-c to quit\n"; my $response = &my_get_response("");

if ($response =~ /s/i) { print "Skipping...\n"; } else { print "Submitting...\n"; # Submit to Wikipedia. # Warning: This does not warn of edit conflicts. $editor->edit($page, $text, $edit_summary, $is_minor); }

# Take a break (frequent edits are forbidden per bot policy) print "Sleep 10\n"; sleep 10; }

sub my_get_namespace_list { my $all_ns  = '&ns0=1&ns1=1&ns2=1&ns3=1&ns4=1&ns5=1&ns6=1&ns7=1' .'&ns8=1&ns9=1&ns10=1&ns11=1&ns12=1&ns13=1&ns14=1' .'&ns15=1&ns100=1&ns101=1&ns108=1&ns109=1'; my $no_ns   = '&ns0=0&ns1=0&ns2=0&ns3=0&ns4=0&ns5=0&ns6=0&ns7=0' .'&ns8=0&ns9=0&ns10=0&ns11=0&ns12=0&ns13=0&ns14=0' .'&ns15=0&ns100=0&ns101=0&ns108=0&ns109=0'; my ($num,$list) = ("","");

print "Provide a list of namespaces in which the script will operate\n"; print "-- --- -- -\n"; print " 0 Article      1 Talk              2 User         3 User talk\n"; print " 4 Wikipedia    5 Wikipedia talk    6 File         7 File talk\n"; print " 8 MediaWiki    9 MediaWiki talk   10 Template    11 Template talk\n"; print " 12 Help       13 Help talk        14 Category    15 Category talk\n"; print "100 Portal    101 Portal talk     108 Book       109 Book talk\n"; print "-- --- -- -\n"; print "* Delimit list with commas for multiple namespaces\n"; print "* Start the list with 'no' for the compliment of the list\n"; print "* Leave blank for everything\n"; print "Namespace: "; my $ns = ;

# Remove spaces and newlines $ns =~ s/[\t\r\n ]//g;

if( $ns =~ /^$/ ) { # Everything $list = ""; } elsif( $ns =~ /^no[t ]*([0-9].*)$/ ) { # Set compliment $ns = $1; $list = $all_ns; foreach $num (split /[ ]*,[ ]*/, $ns) { $list =~ s/ns($num)=1/ns$1=0/g; } } else { # Set $list = $no_ns; foreach $num (split /[ ]*,[ ]*/, $ns) { $list =~ s/ns($num)[=]0/ns$num=1/g; } }  $list =~ s/\&ns[0-9]+=0//g;

return $list; }

sub my_confirm_job { my $opt = 4; while( ($opt < 5) && ($opt > 0) ) { if ( $opt == 1 ) { print "\n"; print "                        Template list                          \n"; print "\n"; foreach (@templatelist) { print $_."\n"; } } elsif ( $opt == 2 ) { print "\n"; print "                    Regular expression list                    \n"; print "\n"; foreach (@regexplist) { print $_."\n"; } } elsif ( $opt == 3 ) { print "\n"; print "                          Page list                            \n"; print "\n"; foreach (@pagelist) { print $_."\n"; } } elsif ( $opt == 4 ) { print "\n"; print "                        Edit Summary                           \n"; print "\n"; print "\n".$edit_summary."\n\n"; print "\n"; print "                         Job Summary                           \n"; print "\n"; print "\n"; print " Username:                       ". $user ."\n"; print " Task:                           ". $tasklist[$task]. "\n"; print " Number of templates:            ". scalar @templatelist ."\n"; print " Number of templates + redirects ". scalar @regexplist ."\n"; print " Number of pages to process      ". scalar @pagelist ."\n"; print " Namespace string                ". $namespace_string ."\n"; print "\n"; print "\n"; } else { print " Unknown option\n"; $opt = 5; } print " 0) Start job\n";  print " 1) Show list of templates\n"; print " 2) Show list of regular expressions\n"; print " 3) Show list of pages\n"; print " 4) Redisplay summary\n"; print " 5) Quit\n"; print "Option: "; $opt = ; # Remove newlines, underscores, and excess spaces $opt =~ s/[_ \r\n]+//g;

if ( $opt == 5 ) { return 1; } }

return 0; }

sub my_get_response { my $val = shift; my $in = ; # Replace newlines and tabs with spaces $in =~ s/[\t\r\n]/ /g; # Remove leading spaces $in =~ s/^ [ ]*//; # Remove trailing spaces $in =~ s/ [ ]*$//; if( $in =~ /^$/ ) { return $val; } else { return $in; } }
 * 1) Simple keyboard response

sub my_simple_diff { my $a = shift; my $b = shift;
 * 1) Very basic module for showing diffs

my @alines = split /[\r\n]/, $a; my @blines = split /[\r\n]/, $b;

my $i = 0; my $j = 0;

while( $i < scalar @alines and $j < scalar @blines ) { if( $alines[$i] ne $blines[$j] ) { if( $alines[$i+1] eq $blines[$j] ) { print "**** Diff ($i,-) ****\n"; print "- ".$alines[$i]."\n"; $i++; } else { print "**** Diff ($i,$j) ****\n"; print "+ ". $blines[$j]."\n"; print "- ". $alines[$i]."\n"; }   }    $i++; $j++; } }

sub MediaWiki::Bot::what_links_here_ns { my $self   = shift; my $article = shift; my $ns     = shift; my @links;
 * 1) Custom module for namespace restricted "what links here" list

$article = MediaWiki::Bot::uri_escape_utf8( $article );

my $res = $self->_get( 'Special:Whatlinkshere', 'view',     "&target=$article&limit=5000&namespace=$ns" ); unless (ref($res) eq 'HTTP::Response' && $res->is_success) { return 1; } my $content = $res->decoded_content; while (   $content =~ m{[^<]+([^<]*)}g ) { my $title = $1; my $type = $2; if ( $type !~ /\(redirect page\)/ && $type !~ /\(transclusion\)/ ) { $type = ""; }   if ( $type =~ /\(redirect page\)/ ) { $type = "redirect"; } if ( $type =~ /\(transclusion\)/ ) { $type = "transclusion"; }

push @links, { title => $title, type => $type }; }

return @links; }