User:Polbot/source/Judges.pl

use strict; use Perlwikipedia; use LWP::UserAgent;

my $firstletter = shift; my $startat = shift; my $test = 0; my $soonest_next_op = time;

print "\nStarting polbot\n" ; my $pw=Perlwikipedia->new; $pw->{mech}->agent('Bot/WP/EN/Quadell/polbot');
 * 1) $pw->{debug} = 1;

print "Logging in\n"; my $login_status=$pw->login('Polbot','(bot password)'); die "I can't log in." unless ($login_status eq 0);

my @exceptions = ; print "Getting list of completed judges to skip.\n"; my $todo_list = $pw->get_text('Wikipedia:WikiProject Law/United States federal judges - finished'); my @lines = split(/\n/, $todo_list); foreach my $line (@lines) { # Ignore non-listed lines if ($line =~ /^\*\s*\[\[([^]]*)\]\]/) { push @exceptions, $1; } }
 * 1) Get exceptions (to skip)

print "Getting list of all judges starting with $firstletter\n"; my @judge_ids = ; my $url = 'http://www.fjc.gov/servlet/tAsearch?lname='. $firstletter; print " $url\n"; my $ua = LWP::UserAgent->new; $ua->agent("Mozilla/6.0"); my $res = $ua->get($url); die "could not connect" unless ($res->is_success); my $html = $res->content; while ($html =~ m/([^<]*)get($url); die "could not connect" unless ($res->is_success); $html = $res->content; $html =~ s/\`/'/g; my @eds = ; my @jus = ; my @pcs = ; my @jcats = ; my $rev_name; my $name; my $last_name; my $art_name; my $persondata_name; my $birth_date; my $birth_year; my $birth_loc; my $death_date; my $death_year; my $death_loc; my $pronoun = "He"; my $active = 0; my $wiki_out; # initial change $html =~ s/(\d)\-\/$1-the present/; # extract name $html =~ m/\\([^\n]*?) *\<\/B\>\<\/FONT\>/m; $rev_name = $1; $rev_name =~ s/ +/ /g; $rev_name =~ s/\[//g; $rev_name =~ s/\]//g; $rev_name =~ m/^(.*?)\, (.*?)( Jr\.| II| III| IV)?$/; $last_name = $1; $name = "$2 $last_name$3"; if ($pw->get_text("$name") =~ /\w/) { $art_name = "User:Polbot/fjc/". $name; } else { $art_name = $name; }		print "==$rev_name at $art_name==\n"; # extract gender if ($html =~ m/Gender:<\/B> Female/) {	$pronoun = "She";	} # extract birth and death info if ($html =~ m/Born +(\w+) +(\d+), +(\d+)(, +in +[^<]*)?/) { $birth_date = "$1 $2"; $birth_year = $3; $birth_loc = $4; $birth_loc =~ s/^, +in +//; } elsif ($html =~ m/Born +(\d+)( +in +[^<]*)?/) { $birth_year = $1; $birth_loc = $2; $birth_loc =~ s/^ +in +//; }		if ($html =~ m/Died +(\w+) +(\d+), +(\d+)(, +in +[^<]*)?/) { $death_date = "$1 $2"; $death_year = $3; $death_loc = $4; $death_loc =~ s/^, +in +//; } elsif ($html =~ m/Died +(\d+)( +in +[^<]*)?/) { $death_year = $1; $death_loc = $2; $death_loc =~ s/^ +in +//; }		$birth_loc = Expand_states($birth_loc); $death_loc = Expand_states($death_loc); #print "birth: '$birth_date', '$birth_year', '$birth_loc'\n"; #print "death: '$death_date', '$death_year', '$death_loc'\n";

# Extract education if ($html =~ m/<BR>\s*<BR><B>Education:<\/B><BR>(.*?)<BR>\s*<BR><B>/i) { my $ed_string = $1; @eds = split(/<[Bb][Rr]><[Bb][Rr]>/, $ed_string); foreach my $ed (@eds) { if ($ed =~ m/^(.*), (.*), (\d+)$/) { $ed = "$pronoun received a $2 from $1 in $3"; } elsif ($ed =~ m/^Read law, (\d+)$/) { $ed = "$pronoun read law in $1"; } elsif ($ed =~ m/^(.*), (\d+)$/) { $ed = "$pronoun graduated from $1 in $2"; }				#print " ED: $ed\n"; }		}		# Extract Professional Career if ($html =~ m/<B>Professional Career:<\/B><BR>(.*?)<BR>\s*<BR><B>/i) { my $pc_string = $1; @pcs = split(/ *<[Bb][Rr]> */, $pc_string); foreach my $pc (@pcs) { if ($pc =~ m/^Private practice, (.*?), (\d+)\-(\d+|the present)$/) { $pc = "$pronoun was in private practice of law in $1 from $2 to $3"; } elsif ($pc =~ m/^Private practice, (.*?), (\d+)$/) { $pc = "$pronoun was in private practice of law in $1 in $2"; } elsif ($pc =~ m/^Judge, (.*?), (\d+)\-(\d+|the present)$/) { $pc = "$pronoun was a judge to the $1 from $2 to $3"; } elsif ($pc =~ m/^Judge, (.*?), (\d+)$/) { $pc = "$pronoun was a judge to the $1 in $2"; } elsif ($pc =~ m/^U.S. (Army|Navy)(.*?), (\d+)\-(\d+|the present)$/) { $pc = "$pronoun was in the United States $1$2 from $3 to $4"; } elsif ($pc =~ m/^U.S. (Army|Navy)(.*?), (\d+)$/) { $pc = "$pronoun was in the United States $1$2 in $3"; } elsif ($pc =~ m/^(.*), (\d+)\-(\d+|the present)$/) { $pc = "$pronoun was a $1 from $2 to $3"; } elsif ($pc =~ m/^(.*), (\d+)$/) { $pc = "$pronoun was a $1 in $2"; }				#print "PC: $pc\n"; }		}

# Extract judgeships if ($html =~ m/<B>Federal Judicial Service:<\/B><BR>(.*?)<BR>\s*<BR>\s*<B>/si) { my $ju_string = $1; @jus = split(/ *<[Bb][Rr]><[Bb][Rr]> */, $ju_string); foreach my $ju (@jus) { if ($ju =~ s/Judge, U\. S\. District Court, ([^<]*)<[Bb][Rr]>/$last_name was a federal judge to the United States District Court for the $1. /) { push @jcats, "Judges of the United States District Court for the $1"; }				if ($ju =~ s/Justice, U\. S\. District Court for the District of Columbia \[Supreme Court of the District of Columbia\]\s*<[Bb][Rr]>/$last_name was a federal judge to the United States District Court for the District of Columbia. /) { push @jcats, "Judges of the United States District Court for the District of Columbia"; }				if ($ju =~ s/Judge, U\. S\. Circuit Courts ([^<]*)<[Bb][Rr]>/$last_name was a federal judge to the United States circuit court $1. /) { push @jcats, "Judges of the United States circuit courts"; }				if ($ju =~ s/Judge, U\. S\. Court of Appeals for District of Columbia Circuit<[Bb][Rr]>/$last_name was a federal judge to the United States Court of Appeals for the D.C. Circuit. /) { push @jcats, "Judges of the United States Court of Appeals for the D.C. Circuit"; }				if ($ju =~ s/Judge, U\. S\. Court of Appeals ([^<]*)<[Bb][Rr]>/$last_name was a federal judge to the United States Court of Appeals $1. /) { push @jcats, "Judges of the United States Court of Appeals $1"; }				if ($ju !~ m/Service terminated/i) { $active = 1; }			}		}		# Mash together. $wiki_out = "\n$name "; if ($birth_date) { if ($death_date) { $wiki_out .= "($birth_date, $birth_year \&ndash\; $death_date, $death_year) "; } elsif ($death_year) { $wiki_out .= "($birth_date, $birth_year \&ndash\; $death_year) "; } else { $wiki_out .= "(born $birth_date, $birth_year) "; }		} elsif ($birth_year) { if ($death_date) { $wiki_out .= "($birth_year \&ndash\; $death_date, $death_year) "; } elsif ($death_year) { $wiki_out .= "($birth_year\&ndash\;$death_year) "; } else { $wiki_out .= "(born $birth_year) "; }		} else { if ($death_date) { $wiki_out .= "(died $death_date, $death_year) "; } elsif ($death_year) { $wiki_out .= "(died $death_year) "; }		}		if ($death_year) { $wiki_out .= "was a "; } else { if ($active) { $wiki_out .= "is a "; } else { $wiki_out .= "is a former "; }		}		$wiki_out .= "United States federal judge.\n\n"; if ($birth_loc) { $wiki_out .= "$last_name was born in $birth_loc. "; }		foreach my $ed (@eds) { $wiki_out .= "$ed. "; }		foreach my $pc (@pcs) { $wiki_out .= "$pc. "; }		$wiki_out .= "\n\n"; foreach my $ju (@jus) { $wiki_out .= "$ju\n\n"; }		if ($death_loc) { $wiki_out .= "$pronoun died in $death_loc.\n\n"; }		$persondata_name = $rev_name; $persondata_name =~ s/\'//g; $persondata_name =~ s/\b(\w+)\b/\u\L$1/g; $wiki_out .= "==External links==\n* \n\n"; $wiki_out .= ''. "\n\n\n"; if ($birth_year) { $wiki_out .= "\n"; }		if ($death_year) { $wiki_out .= "\n"; } else { $wiki_out .= "\n"; }		foreach my $jcat (@jcats) { $wiki_out .= "\n"; }		# Final substitutions - multiple $wiki_out =~ s/Nominated by /$last_name was nominated by /g; $wiki_out =~ s/Received a recess appointment from /$last_name received a recess appointment from /g; $wiki_out =~ s/Confirmed by the Senate/$pronoun was confirmed by the United States Senate/g; $wiki_out =~ s/vacated by (.*?);/vacated by $1./g; $wiki_out =~ s/Reassigned /$pronoun was reassigned on /g; $wiki_out =~ s/Service terminated on /$last_name's service was terminated on /g; $wiki_out =~ s/He was a State attorney general, ([^\.\;]*?) from/$pronoun was the state attorney general of $1 from/g; $wiki_out =~ s/ was a Member of the faculty, / was a member of the faculty of /g; $wiki_out =~ s/ was a Faculty, / was a member of the faculty of /g; $wiki_out =~ s/on (\w+ \d+, \d+), and received commission on \1/on $1, and received commission the same day/g; $wiki_out =~ s/(attorney|general|treasurer|secretary|senator), /$1 of /g; $wiki_out =~ s/ a ([AEIO])/ an $1/g; $wiki_out =~ s/, (\d+)\-(\d+) from / from $1 to $2 and from /g; $wiki_out =~ s/, (\d+) from / in $1 and from /g; # Final substitutions - single $wiki_out =~ s/recess appointment/recess appointment/; $wiki_out =~ s/senior status/senior status/; $wiki_out =~ s/U.S. Attorney(,| from)/United States Attorney$1/; $wiki_out =~ s/United States Senate/United States Senate/; $wiki_out =~ s/\[\[J\.D\.\]\]/J.D./; $wiki_out =~ s/Law clerk/law clerk/; # Presidents $wiki_out =~ s/(from|by) (George W. Bush)/$1 President $2/; $wiki_out =~ s/(from|by) (William J. Clinton)/$1 President $2/; $wiki_out =~ s/(from|by) (George H.W. Bush)/$1 President $2/; $wiki_out =~ s/(from|by) (Ronald Reagan)/$1 President $2/; $wiki_out =~ s/(from|by) (Jimmy Carter)/$1 President $2/; $wiki_out =~ s/(from|by) (Gerald Ford)/$1 President $2/; $wiki_out =~ s/(from|by) (Richard M. Nixon)/$1 President $2/; $wiki_out =~ s/(from|by) (Lyndon B. Johnson)/$1 President $2/; $wiki_out =~ s/(from|by) (John F. Kennedy)/$1 President $2/; $wiki_out =~ s/(from|by) (Dwight D. Eisenhower)/$1 President $2/; $wiki_out =~ s/(from|by) (Harry S Truman)/$1 President $2/; $wiki_out =~ s/(from|by) (Franklin D. Roosevelt)/$1 President $2/; $wiki_out =~ s/(from|by) (Herbert Hoover)/$1 President $2/; $wiki_out =~ s/(from|by) (Calvin Coolidge)/$1 President $2/; $wiki_out =~ s/(from|by) (Warren G. Harding)/$1 President $2/; $wiki_out =~ s/(from|by) (Woodrow Wilson)/$1 President $2/; $wiki_out =~ s/(from|by) (William H. Taft)/$1 President $2/; $wiki_out =~ s/(from|by) (Theodore Roosevelt)/$1 President $2/; $wiki_out =~ s/(from|by) (William McKinley)/$1 President $2/; $wiki_out =~ s/(from|by) (Benjamin Harrison)/$1 President $2/; $wiki_out =~ s/(from|by) (Grover Cleveland)/$1 President $2/; $wiki_out =~ s/(from|by) (Chester A. Arthur)/$1 President $2/; $wiki_out =~ s/(from|by) (James A. Garfield)/$1 President $2/; $wiki_out =~ s/(from|by) (Rutherford B. Hayes)/$1 President $2/; $wiki_out =~ s/(from|by) (Ulysses Grant)/$1 President $2/; $wiki_out =~ s/(from|by) (Andrew Johnson)/$1 President $2/; $wiki_out =~ s/(from|by) (Abraham Lincoln)/$1 President $2/; $wiki_out =~ s/(from|by) (James Buchanan)/$1 President $2/; $wiki_out =~ s/(from|by) (Franklin Pierce)/$1 President $2/; $wiki_out =~ s/(from|by) (Millard Fillmore)/$1 President $2/; $wiki_out =~ s/(from|by) (Zachary Taylor)/$1 President $2/; $wiki_out =~ s/(from|by) (James K. Polk)/$1 President $2/; $wiki_out =~ s/(from|by) (John Tyler)/$1 President $2/; $wiki_out =~ s/(from|by) (Martin Van Buren)/$1 President $2/; $wiki_out =~ s/(from|by) (Andrew Jackson)/$1 President $2/; $wiki_out =~ s/(from|by) (John Quincy Adams)/$1 President $2/; $wiki_out =~ s/(from|by) (James Monroe)/$1 President $2/; $wiki_out =~ s/(from|by) (James Madison)/$1 President $2/; $wiki_out =~ s/(from|by) (Thomas Jefferson)/$1 President $2/; $wiki_out =~ s/(from|by) (John Adams)/$1 President $2/; $wiki_out =~ s/(from|by) (George Washington)/$1 President $2/;

# Write if ($test) { print "Output to file\n"; open(outfile, ">judges.txt"); print outfile $wiki_out; die; }

$|=1;		print "Waiting ". ($soonest_next_op - time). " secs... "; $|=1;		while (time < $soonest_next_op) {}; $soonest_next_op = time + 9; if ($name eq $art_name) { $pw->edit($art_name, $wiki_out, "Auto-generating new article based on $url"); my $talkmessage = "\n\nThis article was automatically created by a perl script. It could use a human's loving touch. ~"; $pw->edit("Talk:$art_name", $talkmessage, "Auto-adding WPbiography template"); my $listsofar = $pw->get_text("User:Polbot/fjc"); $listsofar .= "|-\n| $rev_name || yes || $art_name\n"; $pw->edit("User:Polbot/fjc", $listsofar, "Adding $art_name"); } else { $wiki_out =~ s/\[\[Category/Category/g;			$pw->edit($art_name, $wiki_out, "Auto-generating subpage based on $url");			my $otherpage = $pw->get_text("$name");			if ($otherpage =~ m/\ was automatically created by a perl script, based on [$url this article] at the Biographical Directory of Federal Judges. The subpage should either be merged into this article, or moved and disambiguated. ~\n";			$pw->edit("Talk:$name", $talksofar, "Auto-adding link to subpage at $art_name");			my $listsofar = $pw->get_text("User:Polbot/fjc");			$listsofar .= "|-\n| $rev_name || no || $art_name\n";			$pw->edit("User:Polbot/fjc", $listsofar, "Adding $art_name");		}		print "Article created.\n"; }

sub Expand_states { my $place = shift; $place =~ s/AL/Alabama/; $place =~ s/AK/Alaska/; $place =~ s/AZ/Arizona/; $place =~ s/AR/Arkansas/; $place =~ s/CA/California/; $place =~ s/CO/Colorado/; $place =~ s/CT/Connecticut/; $place =~ s/DE/Delaware/; $place =~ s/DC/District of Columbia/; $place =~ s/FL/Florida/; $place =~ s/GA/Georgia/; $place =~ s/HI/Hawaii/; $place =~ s/ID/Idaho/; $place =~ s/IL/Illinois/; $place =~ s/IN/Indiana/; $place =~ s/IA/Iowa/; $place =~ s/KS/Kansas/; $place =~ s/KY/Kentucky/; $place =~ s/LA/Louisiana/; $place =~ s/ME/Maine/; $place =~ s/MD/Maryland/; $place =~ s/MA/Massachusetts/; $place =~ s/MI/Michigan/; $place =~ s/MN/Minnesota/; $place =~ s/MS/Mississippi/; $place =~ s/MO/Missouri/; $place =~ s/MT/Montana/; $place =~ s/NE/Nebraska/; $place =~ s/NV/Nevada/; $place =~ s/NH/New Hampshire/; $place =~ s/NJ/New Jersey/; $place =~ s/NM/New Mexico/; $place =~ s/NY/New York/; $place =~ s/NC/North Carolina/; $place =~ s/ND/North Dakota/; $place =~ s/OH/Ohio/; $place =~ s/OK/Oklahoma/; $place =~ s/OR/Oregon/; $place =~ s/PA/Pennsylvania/; $place =~ s/PR/Puerto Rico/; $place =~ s/RI/Rhode Island/; $place =~ s/SC/South Carolina/; $place =~ s/SD/South Dakota/; $place =~ s/TN/Tennessee/; $place =~ s/TX/Texas/; $place =~ s/UT/Utah/; $place =~ s/VT/Vermont/; $place =~ s/VA/Virginia/; $place =~ s/WA/Washington/; $place =~ s/WV/West Virginia/; $place =~ s/WI/Wisconsin/; $place =~ s/WY/Wyoming/; return $place; }