User:Philosobot/Source code/afd/afd.cgi

use POSIX; # the strftime function use CGI::Carp qw(fatalsToBrowser); use strict; undef $/;
 * 1) !/usr/bin/perl

use lib $ENV{HOME}. '/public_html/cgi-bin/wp/modules'; # absolute path to perl modules use lib '/home/philosobot/public_html/cgi-bin/wp/modules'; # absolute path to perl modules use lib '../wp/modules'; # relative path to perl modules

require 'bin/perlwikipedia_utils.pl'; require 'bin/get_html.pl';


 * 1) Count and list the pages containing Wikipedia articles for deletion discussions (AfD).
 * 2) Archive the pages on which all deletion discussions are closed.
 * 3) Initialize pages for the upcoming days.

my $afd_cutoff = 7;
 * 1) Discussions more than this number of days in the past are considered old and must be listed at AfD

my $gEditor;

MAIN: {

print "Content-type: text/html\n\n";
 * 1) This line must be the first to print in a cgi script

$| = 1; # flush the buffer each line

print "Please be patient, this script can take a minute or two " . "if Wikipedia is slow ... \n";

my $cur_dir = $0; # $0 stands for the executable, with or without full path if ($cur_dir =~ /^\//){ $cur_dir =~ s/^(.*)\/.*?$/$1/g; chdir $cur_dir; }else{ }
 * 1) If the full path to the script is known (such as when running this
 * 2) script from crontab), go to that directory first

$gEditor=wikipedia_login;
 * 1) The log in process must happen after we switched to the right directory as done above

my $attempts = 10; my $sleep = 1;

my $summary_file = "Wikipedia:Articles_for_deletion/Old.wiki"; my $detailed_file = "Wikipedia:Articles_for_deletion/Old/Open AfDs.wiki";

my $combined_stats = &count_and_list_open_AfDs($summary_file, $detailed_file, $attempts, $sleep);
 * 1) Display the number of open afd discussions in the pages listed in
 * 2) in $summary_file and put links to those those discussions in $detailed_file.
 * 3) Return the list of pages in $combined_stats, we'll need that to decide which
 * 4) pages to archive.

my $archive_file = "Wikipedia:Archived deletion discussions.wiki"; &update_archived_discussions($archive_file, $combined_stats, $attempts, $sleep);
 * 1) Update the list of archived discussions, the ones that are no longer at AfD/Old,
 * 2) which is the text in $combined_stats

&initialize_new_afd_days($attempts, $sleep);
 * 1) Initialize afd pages for the next several days

print " Finished! One may now go back to " . "" . "Wikipedia:Articles for deletion/Philosophy/Old. \n"; }

sub count_and_list_open_AfDs {


 * 1) Display the number of open afd discussions in $summary_file and list them in $detailed_file.

my $summary_file = shift; my $detailed_file = shift; my $attempts = shift; my $sleep = shift;

my ($stats, $detailed_stats, $detailed_combined_stats); $detailed_combined_stats = "\n";

my ($text, $edit_summary, $error);

$text = wikipedia_fetch($gEditor, $summary_file, $attempts, $sleep);
 * 1) Fetch the summary file

($text, $edit_summary) = &add_another_day ($text);
 * 1) add the discussion from $afd_cutoff+1 days ago, if it is not already in $text

my @lines = split("\n", $text); my $line; my ($brief_afd_link, $link, %stats_hash); foreach $line (@lines) {
 * 1) Find the number of open discussions for each listed day

next unless ($line =~ /^\*\s*\[\[(Wikipedia:(?:Pages|Votes|Articles) for deletion\/Log\/\d+.*?)\]\]/); $link=$1; if ($link =~ /^(.*?)\|(.*?)$/){ $link = $1; $brief_afd_link = $2; }else { $brief_afd_link = $link; } print "Now doing $link ... ";
 * 1) extract the links to the open discussions

my $full_link = $link; $full_link =~ s/ /_/g; $full_link = 'http://en.wikipedia.org/wiki/'. $full_link;

($text, $error) = &get_html ($full_link);

($stats, $detailed_stats) = &see_open_afd_discussions ($link, $text, $detailed_file); $detailed_combined_stats = $detailed_combined_stats. $detailed_stats;
 * 1) see which AfD debates are not closed yet, and put that info in the link.
 * 2) Get both a brief and a complete list, to put in different places.

$line = '* ' . $brief_afd_link . ' '. $stats; $stats_hash{$link} = "$line"; }

$text = wikipedia_fetch($gEditor, $summary_file, $attempts, $sleep); ($text, $edit_summary) = &add_another_day ($text); @lines = split("\n", $text);
 * 1) The file might have changed while we were doing the calculation above.
 * 2) Get a new copy.

my $num_days = 0; # cout how many days are listed, ... my $num_open_disc = 0; # ... and how open many discussions my $combined_stats=""; foreach $line (@lines){ if ($line =~ /^\*\s*\[\[(Wikipedia:(?:Pages|Votes|Articles) for deletion\/Log\/\d+.*?)\s*(?:\||\]\])/) { $link=$1; $num_days++; if (exists $stats_hash{$link}) { $line = $stats_hash{$link}; # Overwite this line with the stats we found above } if ( $line =~ /\((\d+) open/ ){ $num_open_disc = $num_open_disc + $1; } } $combined_stats = $combined_stats . "$line\n"; }
 * 1) gather all the info in $text

my $utc_time=strftime("%H:%M, %B %d, %Y (UTC)", gmtime(time)); $combined_stats =~ s/(\/afd\/afd\.cgi.*?\]).*?\n/$1 \(last update at $utc_time\)\n/;

$edit_summary = "There are $num_open_disc open discussion(s) in $num_days day(s)." . $edit_summary; if ($num_open_disc > 200){ $edit_summary = "Big Backlog: ". $edit_summary; }

wikipedia_submit($gEditor, $summary_file, $edit_summary, $combined_stats, $attempts, $sleep); wikipedia_submit($gEditor, $detailed_file, $edit_summary, $detailed_combined_stats, $attempts, $sleep);

return $combined_stats; }

sub add_another_day{

my ($text, $afd_link, $hour_now, $thresh, $edit_summary, $SECONDS_PER_DAY, $brief_afd_link, $seconds);

$text = shift;

$hour_now=strftime("%H", localtime(time)); $thresh = 0; # midnight on gmt
 * 1) If beyond certain hour of the day (midnight GMT time),
 * 2) add a link for the Afd discussion six days ago if not here yet

if ($hour_now < $thresh){ return ($text, ""); }

($afd_link, $brief_afd_link) = &get_afd_link(-$afd_cutoff-1); # Older than $afd_cutoff

my $tag=''; $edit_summary=""; if ($text !~ /\n\*\s*\[\[\Q$afd_link\E/){ $text =~ s/$tag/$tag\n\* \[\[$afd_link\|$brief_afd_link\]\]/g; $edit_summary=" Link to \[\[$afd_link\]\]."; }

return ($text, $edit_summary); }

sub get_afd_link {

my $days_from_now = shift;

my $SECONDS_PER_DAY = 60 * 60 * 24;

my $seconds = time + $days_from_now * $SECONDS_PER_DAY; my $afd_link = strftime("Wikipedia:Articles for deletion/Log/%Y %B %d", localtime($seconds)); $afd_link =~ s/ 0(\d)$/ $1/g; # make 2005 April 01 into 2005 April 1

my $brief_afd_link = strftime("%d %B (%A)", localtime($seconds)); $brief_afd_link =~ s/^0//g;

return ($afd_link, $brief_afd_link); }

sub fmt_date {

my ($link, $date); $link = shift;

if ($link =~ /^.*\/(\d+)\s+(\w+)\s+(\d+)/){ return "$3 $2"; }else{ return ""; }
 * 1) 'Wikipedia:Articles for deletion/Log/2006 December 16' --> '16 December'

}

sub extract_links{


 * 1) Extract links to afd discussions and put them into a hash

my $text = shift; my @links_arr = ($text =~ /\[\[(Wikipedia:Articles for deletion\/Log\/\d+.*?)(?:\||\]\])/g);

my ($link, %links_hash); foreach $link(@links_arr){ $links_hash{$link} = 1; }

return %links_hash; }

sub update_archived_discussions {


 * 1) Daily Afd pages that are at least six days old and that are no longer
 * 2) at Afd/Old (where they are closed) are considered archived, and
 * 3) should be added to the list of archived Afd pages.

my $archive_file = shift; # The name of the file containing the archives my $afd_text = shift; # What is at AfD/Old, those things should not yet be archived my $attempts = shift; my $sleep = shift;

my $archived_text = wikipedia_fetch($gEditor, $archive_file, $attempts, $sleep);
 * 1) The current text on the archive. We'll add to it.

my ($curr_year, $prev_year);

my %skip_archive = &extract_links($afd_text);
 * 1) Identify the discussions in AfD/Old, which won't be added to the archive

$curr_year = strftime("%Y", gmtime(time)); $prev_year = $curr_year - 1;

if ($archived_text !~ /==+\s*$curr_year\s*==+/){

if ($archived_text !~ /^(.*?)(==+\s*)$prev_year(\s*==+)(.*?)$/s){ print "Previous year section is missing, don't know what to do \n"; return; }
 * 1) Add section for current year if missing

$archived_text = $1. $2 . $curr_year. $3 . "\n". $2 . $prev_year. $3 . $4; }
 * 1) Add current year above previous year

my $start = -$afd_cutoff-2; my $stop = -366; my $day;
 * 1) Any day in the current year up no earlier than
 * 2) $afd_cutoff+2 days ago is a candidate to be in the archive
 * 3) (unless, again, that page is still at AfD/Old). Days 0, -1,
 * -2, -3, -4, -5,, ... -$afd_cutoff are still open, while
 * 1) day -$afd_cutoff-1 is now in the process of being closed.

my ($new_links, $afd_link, $prev_afd_link, $link_sans_day, $prev_link_sans_day); my (@new_links_array);

@new_links_array = ; $new_links = ""; $prev_afd_link = "";

my $first_day = 1; # mark that this is the first day in the list for ($day = $start ; $day >= $stop ; $day--){
 * 1) Add only the days from the current year to the archive.
 * 2) Go in reverse, from the most recent date towards the past.

my ($afd_link, $brief_afd_link) = &get_afd_link($day);

next if (exists $skip_archive{$afd_link});
 * 1) Pages which are still at Afd/Old should not be archived yet.
 * 2) Eventually after all discussions in such page are closed, the users will
 * 3) remove the page from AfD/Old, and then the bot will get its hand on it.

next unless ($afd_link =~ /\/$curr_year/); # deal only with the current year

$link_sans_day = $afd_link; $link_sans_day =~ s/\s*\d+$//g; $prev_link_sans_day = $prev_afd_link; $prev_link_sans_day =~ s/\s*\d+$//g;
 * 1) See if to add a section separating two months

if ( $first_day || ($link_sans_day ne $prev_link_sans_day && $prev_link_sans_day ne "") ){
 * 1) Add a section heading only if we are between months or we arrived
 * 2) at the most recent day

$link_sans_day =~ s/^(.*)\/(.*?)$/Deletion discussions\/$2/g; $new_links = $new_links. "\n===$link_sans_day===\n\n";

$first_day = 0; # First day passed }

$new_links = $new_links. "* $afd_link\n"; push(@new_links_array, $afd_link);

$prev_afd_link = $afd_link; }
 * 1) Prepare for the next loop

if ($archived_text !~ /^(.*?==+\s*$curr_year\s*==+)(.*?)(==+\s*$prev_year\s*==.*?)$/s) { print "Previous year section is missing, don't know what to do \n"; return; }
 * 1) Before updating $archived_text, see what is there currently, so that
 * 2) we can see what changed and put that in the edit summary.

my $p1 = $1; my $existing_text = $2; my $p3 = $3;

my %existing_links = &extract_links($existing_text);
 * 1) See what links are in @new_links_array and are not in %existing_links.
 * 2) Put those in the edit summary.

my $edit_summary = ""; foreach $afd_link (@new_links_array){ if (!exists $existing_links{$afd_link}){ $edit_summary = $edit_summary. "$afd_link "; } }
 * 1) This is a link which will be added to the archive now and which was
 * 2) not there before

if ($edit_summary eq ""){ print "No new pages to archive \n"; return; }

$archived_text = $p1. "\n". $new_links. "\n". $p3;
 * 1) Replace in $archived_text the portion corresponding to the links for this year
 * 2) with $new_links which contains the newly archived links

$edit_summary = "Archiving ". $edit_summary;

wikipedia_submit($gEditor, $archive_file, $edit_summary, $archived_text, $attempts, $sleep); }

sub see_open_afd_discussions { my $link = shift; my $text = shift; my $detailed_file = shift;

my $stats = "";

$text =~ s/\n//g; # rm newlines

$text =~ s/^.*?\//sg;
 * 1) strip the top part, as otherwise it confuses the parser below

$text =~ s/\"boilerplate[_\s]+metadata[_\s+][avp]fd.*?\"/\"boilerplate metadata vfd\"/ig;
 * 1) some processing to deal with AfD ambiguity recently

$text =~ s/(\)/$1p$3/sgi;

my @all = ($text =~ /\\[\\[\\[\<a href\s*=\s*\"\/w\/index.php\?title\s*=\s*(Wikipedia:\w+[_\s]for[_\s]deletion.*?)\"/g );

my $openc=0; foreach (@open) {

next if (/Wikipedia:\w+[_\s]for[_\s]deletion\/Log/i); next unless (/\&amp;section=(T-|)1/); s/\&.*?$//g; $openc++;

$stats = "$stats ". "\[\[$_\|$openc]]"; } print "($openc open / ";

my $closedc=0; foreach (@closed) { next if (/Wikipedia:\w+[_\s]for[_\s]deletion\/Log/i); next unless (/\&amp;section=(T-|)1/); s/\&.*?$//g; $closedc++;

} print "$closedc closed / ";
 * 1) print "$closedc: $_\n";

my $allc=0; foreach (@all) { next if (/Wikipedia:\w+[_\s]for[_\s]deletion\/Log/i); next unless (/\&amp;section=(T-|)1/); s/\&.*?$//g; $allc++; } print "$allc total discussions) \n";
 * 1) print "$allc: $_\n";

my $detailed_stats = $stats; my $short_link = $link; $short_link =~ s/^.*\///g; $detailed_file =~ s/\.wiki$//g;
 * 1) some gimmickry, to list to sections in $detailed_file.

if ($openc == 0 ){ $stats = "($openc open / $closedc closed / $allc total discussions)"; }elsif ( $openc > 20 ){ $stats = "($openc open / $closedc closed / $allc total discussions; see open)"; }else{ $stats = "($openc open / $closedc closed / $allc total discussions; open: $stats)"; }
 * 1) if there are too many open afds, link to the file listing them. Otherwise, list them right here.

my $http_link = $link; $http_link =~ s/ /_/g; $http_link = '(. $http_link . '&action=edit edit this day\'s list)';

$detailed_stats =~ s/\s*\[\[(.*?)\|\d+\]\]/\* \[\[$1\]\]\n/g; $detailed_stats =~ s/_/ /g; $detailed_stats = "==$short_link==\n". $http_link. "\n". $detailed_stats;
 * 1) text to add to a subpage listing all open discussions

return ($stats, $detailed_stats); }

sub initialize_new_afd_days {


 * 1) Initialize afd pages for the next several days by putting in a
 * 2) preamble for each day. When such a future day becomes today, the
 * 3) users will edit that afd page page and will add afd listings below
 * 4) the preamble.

print "\n\n Initializing AfD pages for the next several days \n";

my $attempts = shift; my $sleep = shift;
 * 1) Parameters related to fetching/submitting data to Wikipedia

my ($day);

for ($day = 1 ; $day < 5 ; $day++){

my ($prev_afd_link, $prev_afd_name) = &get_afd_link($day - 1); my ($curr_afd_link, $curr_afd_name) = &get_afd_link($day + 0); my ($next_afd_link, $next_afd_name) = &get_afd_link($day + 1);

my $days_page = $curr_afd_link. ".wiki"; my $days_text = wikipedia_fetch($gEditor, $days_page, $attempts, $sleep);

if ($days_text !~ /^\s*$/){ print "Page exists \n\n"; next; }
 * 1) This day's page is not empty, so it was already initialized. Skip it.

$days_text = &get_page_text($prev_afd_link, $prev_afd_name, $next_afd_link, $next_afd_name);
 * 1) Form the page for the current day

print "\n Initializing $curr_afd_link \n"; my $edit_summary = "Initializing a new AfD day"; wikipedia_submit($gEditor, $days_page, $edit_summary, $days_text, $attempts, $sleep);
 * 1) Initialize the page for the day

}

}

sub get_page_text {

my ($prev_afd_link, $prev_afd_name, $next_afd_link, $next_afd_name) = @_;

$prev_afd_name =~ s/\s*\(.*?\)\s*//g; $next_afd_name =~ s/\s*\(.*?\)\s*//g;
 * 1) Strip the text in parentheses from the text "1 February (Sunday)"

return ' \'\'\'Guide to deletion\'\'\'

';

}