User:Polbot/source/Find altnames.pl

use strict; use Perlwikipedia;

my $pw=Perlwikipedia->new; $pw->{mech}->agent('Bot/WP/EN/Quadell/polbot');
 * 1) $pw->{debug} = 1;

print "\nStarting polbot, logging in.\n" ; my $login_status=$pw->login('bot name','bot password'); die "I can't log in." unless ($login_status eq 0);

my $namechar = "[A-Za-z'._-]"; my %pol_names = ; my @lines = ;

print "\nReading todo file\n"; my $todo_list = $pw->get_text('User:Polbot/altnames/todo'); @lines = split(/\n/, $todo_list);


 * 1) my $wiki_text = '';

foreach my $line (@lines) { # Ignore non-listed lines # And use just the name, no foolishness if ($line =~ s/^\*\s*\[\[([^]]*)\]\]\*(.*)$/$1/) { my $article_name = $1; my $job_description = $2; print "Finding altnames for '$article_name'\n"; #$wiki_text = wikiread($article_name, $pw);

# Populate the hash. $pol_names{$article_name}{'job'} = $job_description; # First, put in its own name {			my $main_name = $article_name; # Format suffixes properly $main_name =~ s/([^,]) (Jr\.|Sr\.)$/$1, $2/; $main_name =~ s/, (II|III|IV)$/ $1/;

# change "Van Eyk" into "Van_Eyk" $main_name =~ s/\b(Van|van|De|de|de la|La|la|St\.) /$1_/; # change "Jones III" to "Jones_III" if ($main_name =~ m/^(.*) (II$|III$|IV$)/) { $pol_names{$article_name}{'main'}{$1} = $1; $main_name =~ s/ (II$|III$|IV$)/_$1/; }

$pol_names{$article_name}{'main'}{$main_name} = $main_name; }		# Form altnames from these foreach my $main_name_i (keys %{$pol_names{$article_name}{'main'}}) { my $main_name = $main_name_i; $pol_names{$article_name}{'alt'}{$main_name} = $main_name; if ($main_name =~ s/ \(.+\)$//) { # e.g. John Smith (politician) $pol_names{$article_name}{'alt'}{$main_name} = $main_name; }				if ($main_name =~ s/^(.+) "(.+)" (.+)$/$1 $3/) { #e.g. William S. "Bill" Fulton $pol_names{$article_name}{'alt'}{$main_name} = $main_name; $pol_names{$article_name}{'alt'}{"$2 $3"} = "$2 $3"; }			if ($main_name =~ s/^(.+) '(.+)' (.+)$/$1 $3/) { #e.g. William S. 'Bill' Fulton $pol_names{$article_name}{'alt'}{$main_name} = $main_name; $pol_names{$article_name}{'alt'}{"$2 $3"} = "$2 $3"; }			if ($main_name =~ s/^(.+) \((.+)\) (.+)$/$1 $3/) { #e.g. William S. (Bill) Fulton $pol_names{$article_name}{'alt'}{$main_name} = $main_name; $pol_names{$article_name}{'alt'}{"$2 $3"} = "$2 $3"; }			if ($main_name =~ m/^[A-Z]\.?[A-Z]\. [A-Zdv]$namechar+$/) { #e.g. C. S. Lewis # Do nothing } elsif ($main_name =~ m/^([A-Z]$namechar+) ([A-Z]\.)([A-Z]\.) ([A-Zdv]$namechar+)$/) { #e.g. William S.P. Fulton $pol_names{$article_name}{'alt'}{"$1 $2 $3 $4"} = "$1 $2 $3 $4"; $pol_names{$article_name}{'alt'}{"$1 $4"} = "$1 $4"; } elsif ($main_name =~ m/^([A-Z]$namechar+) ([A-Z]\.) ([A-Z]\.) ([A-Zdv]$namechar+)$/) { #e.g. William S. P. Fulton $pol_names{$article_name}{'alt'}{"$1 $2$3 $4"} = "$1 $2 $3 $4"; $pol_names{$article_name}{'alt'}{"$1 $4"} = "$1 $4"; } elsif ($main_name =~ m/^([A-Z]$namechar+) [A-Z]\. ([A-Zdv]$namechar+)$/) { #e.g. William S. Fulton $pol_names{$article_name}{'alt'}{"$1 $2"} = "$1 $2"; } elsif ($main_name =~ m/^[A-Z]\. ([A-Z]$namechar+) ([A-Zdv]$namechar+)$/) { #e.g. C. Michael Thompson $pol_names{$article_name}{'alt'}{"$1 $2"} = "$1 $2"; } elsif ($main_name =~ m/^([A-Z]$namechar+) ([A-Z])$namechar+ ([A-Zdv]$namechar+)$/) { #e.g. William Savin Fulton $pol_names{$article_name}{'alt'}{"$1 $3"} = "$1 $3"; #$pol_names{$article_name}{'alt'}{"$1 $2. $3"} = "$1 $2. $3"; } elsif ($main_name =~ m/^([A-Z]$namechar+) [A-Zdv]$namechar+ [A-Zdv]$namechar+ ([A-Zdv]$namechar+)$/) { #e.g. William Savin Edward Fulton $pol_names{$article_name}{'alt'}{"$1 $2"} = "$1 $2"; } elsif ($main_name =~ m/^([A-Z]$namechar+) ([A-Z])($namechar+) ([A-Zdv]$namechar+), (Jr\.|Sr\.)$/) { #e.g. William Savin Fulton, Jr. 				$pol_names{$article_name}{'alt'}{"$1 $2$3 $4"} = "$1 $2$3 $4"; #$pol_names{$article_name}{'alt'}{"$1 $2. $4"} = "$1 $2. $4"; $pol_names{$article_name}{'alt'}{"$1 $4"} = "$1 $4"; #$pol_names{$article_name}{'alt'}{"$1 $2. $4, $5"} = "$1 $2. $4, $5"; } elsif ($main_name =~ m/^([A-Z]$namechar+) ([A-Z])\. ([A-Zdv]$namechar+), (Jr\.|Sr\.)$/) { #e.g. William S. Fulton, Jr. $pol_names{$article_name}{'alt'}{"$1 $2. $3"} = "$1 $2. $3"; $pol_names{$article_name}{'alt'}{"$1 $3"} = "$1 $3"; } elsif ($main_name =~ m/^([A-Z]$namechar+) ([A-Zdv]$namechar+), (Jr\.|Sr\.)$/) { #e.g. William Fulton, Jr. 				$pol_names{$article_name}{'alt'}{"$1 $2"} = "$1 $2"; }				}	} }

print "\n\nDone reading.\n\n";

my %inprocess_names = {};
 * 1) Convert to the inprocess hash

for my $article_name (sort keys %pol_names) { my $line_out = ""; for my $altname (sort keys %{$pol_names{$article_name}{'alt'}}) { if ($altname ne $article_name) { $line_out .= "|$altname"; }	}	if ($line_out) { $inprocess_names{$article_name} = "* $article_name$line_out*". $pol_names{$article_name}{'job'}; } }


 * 1) Update my lists

print "Writing in-process list\n";

my $wiki_code = "";

foreach my $inprocess_key (sort keys %inprocess_names) { $wiki_code .= $inprocess_names{$inprocess_key}. "\n"; }

$pw->edit('User:Polbot/altnames/inprocess', $wiki_code, "Auto-updating based on input at todo list");

open(outfile, ">inprocess.txt"); print outfile "$wiki_code\n"; close(outfile);

print "Done!\n";

sub wikiread { my $article = shift; my $connection = shift; my $i = 0; my $wiki = ''; $wiki = $connection->get_text($article); while ($wiki eq "0") { $i++; if ($i > 5) { return ''; }		sleep $i; print "  retry. . .\n"; $wiki = $connection->get_text($article); }

return $wiki; }