User:OrphanBot/libBot.pl


 * 1) !/usr/bin/perl


 * 1) libBot: A library of useful routines for running a bot

use strict; use warnings;

require "libPearle2.pl";

my $test_only = 0; my $username = "";

sub config {	my %params = @_; $test_only = $params{test_only} if(defined($params{test_only})); $username = $params{username} if(defined($params{username})); }

sub userwarnlog {	my ($text, $editTime, $startTime, $token, $user, $summary, $session); $user = $_[1]; $user = $username if(!defined($user)); $summary = $_[2]; $summary = "Logging warning message" if(!defined($summary)); $session = $_[3]; if(defined($session)) {		# We've been handed an editing session ($text, $editTime, $startTime, $token) = @{$session}; Pearle::myLog("Warning with existing edit session\n"); }	else {		($text, $editTime, $startTime, $token) = Pearle::getPage("User talk:$user"); }	if($test_only) {		print STDERR $_[0]; return; }	if($text =~ /^#redirect/i) {		userwarnlog("*User talk page User talk:$user is a redirect\n"); return; }	$text .= $_[0]; Pearle::postPage("User talk:$user", $editTime, $startTime, $token, $text, $summary, "no"); print STDERR $_[0]; }
 * 1) Log a warning on the talk page of the bot

sub notelog {	print STDERR @_; }
 * 1) Log a notification message to the console

sub FixupLinks {	my $link = shift; $link =~ s/\[\[(Category|Image)/[[:$1/g;	return $link; }
 * 1) Fix all wikilinks in a string so that they shows as a link, not inline, if it's for a category or image

sub MakeWikiRegex {	my $string = shift; # Escape metacharacters $string =~ s/\\/\\\\/g; $string =~ s/\./\\\./g; $string =~ s/\(/\\\(/g; $string =~ s/\)/\\\)/g; $string =~ s/\[/\\\[/g; $string =~ s/\]/\\\]/g; $string =~ s/\+/\\\+/g; $string =~ s/\*/\\\*/g; $string =~ s/\?/\\\?/g; $string =~ s/\^/\\\^/g; $string =~ s/\$/\\\$/g; # Process the string to match both with spaces and with underscores $string =~ s/[ _]/[ _]+/g;
 * 1) Make a string into a Wikipedia-compatible regex

# Process the string to match both upcase and lowercase first characters if($string =~ /^[A-Za-z]/) {      		$string =~ s/^(.)/"[$1".lc($1)."]"/e; }	return $string; }

sub DoIHaveMessages {	my $text = shift; if($text =~ / You have/) {		return 1; }	else {		return 0; } }
 * 1) Check for new talk page messages

sub GetPageList {	my $image = shift; my $image_text = shift; my @pages = ; # Extract the page links # Lee Hyori # Daesung Entertainment #  while($image_text =~ //g) {		my $title; $title = $2; # Unescape any HTML entities in the title $title =~ s/&lt;//g; $title =~ s/&quot;/"/g;		$title =~ s/&amp;/&/g;

notelog("Matched article $title\n");

# Filter out bad namespaces if($title =~ /^(User:|Talk:|User talk:|Template talk:|Image:|Image talk:|Category talk:|Wikipedia:|Wikipedia talk:|Portal talk:)/)	# Leave these alone {			notelog("Ignoring $title due to namespace\n"); }		elsif($title =~ /^Special:/) {			# Ignore Special: pages completely }		elsif($title =~ /^(MediaWiki:|MediaWiki talk:|Template:|Help:|Help talk:)/)		# Log a warning about these, but otherwise leave them alone {			userwarnlog("*Found image $image in $title\n"); }		else	# Good namespaces: article, Category:, Portal: {			push @pages, $title; }	}	return @pages; }

sub GetFullPageList {	my $image = shift; my $image_text = shift; my @pages = ; # Extract the page links # Lee Hyori</a></li> # <li><a href="/wiki/Daesung_Entertainment" title="Daesung Entertainment">Daesung Entertainment</a></li> # </ul> while($image_text =~ /<li><a href="(\/wiki\/[^"]+)" title="([^"]+)">/g) {		my $title; $title = $2; # Unescape any HTML entities in the title $title =~ s/&lt;/</g; $title =~ s/&gt;/>/g; $title =~ s/&quot;/"/g;		$title =~ s/&amp;/&/g;
 * 1) Get all pages.  Don't filter for bad namespaces.

notelog("Matched article $title\n");

push @pages, $title; }	return @pages; }

sub SaveImage {	my $image = shift; my $image_text = shift; my $image_path = shift; my $image_url; ($image_url) = $image_text =~ /<a href="(http:\/\/upload\.wikimedia\.org\/wikipedia\/en\/[^"]+)"/;	if(defined($image_url))	{		my $filename;		my $image_data;		notelog("Fetching image $image_url\n");		($filename) = $image_url =~ /(\/[^\/]+)$/;		$filename = $image_path . $filename;		if(! -e $filename)		{			if($test_only)			{				notelog("Would save to $filename...");			}			else			{				$image_url = Pearle::urlDecode($image_url);				$image_data = Pearle::getURL($image_url);				notelog("Saving to $filename...");				if(defined($filename) and $filename)				{					open OUTFILE, ">", $filename;					print OUTFILE $image_data;					close OUTFILE;					notelog("Image saved\n");					Pearle::myLog("Image $image saved as $filename\n");				}				else				{					notelog("Failed\n");				}			}		}		else		{			notelog("File already exists\n");		}	}			}

sub RemoveImageFromPage {	my $image = shift; my $page = shift; my $image_regex = shift; my $removal_prefix = shift; my $removal_comment = shift;

my ($text, $editTime, $startTime, $token); my ($match1, $match2); my $old_length; my $new_length; my $change_len; my $match_len;

# Fetch an article page ($text, $editTime, $startTime, $token) = Pearle::getPage($page); if(!defined($text)) {		Pearle::myLog("Error: Bad edit page $page\n"); userwarnlog(FixupLinks("*Error: Bad edit page $page\n")); sleep(300); return 0; }	if($text =~ /^\s*$/) {		# Might be protected instead of empty Pearle::myLog("Error: Empty page $page\n"); userwarnlog(FixupLinks("*Error: Empty page $page\n")); sleep(300); return 0; }	if($text =~ /^#redirect/i) {		Pearle::myLog("Redirect found for page $page (image $image)\n"); userwarnlog(FixupLinks("*Redirect found for page $page (image $image)\n")); return 0; }

# Remove the image my $regex3 = "(\\[\\[${image_regex}.*?(\\[\\[.*?\\]\\].*?|)+\\]\\][ \\t]*)";	# Regex to match images my $regex3ex = "\\w[ \\t]*${regex3}[ \\t]*\\w";									# Regex to try to spot inline images my $regex3c = "";											# Regex to spot images in comments my $regex3g = "(${image_regex}.*)";												# Regex to match gallery images my $regex3gc = "";											# Regex to spot gallery images in comments my ($raw_image) = $image =~ /Image:(.*)/; my $regex4a = "([Cc]over\\s*=\\s*)". MakeWikiRegex($raw_image); my $regex4b = "(image_skyline\\s*=\\s*)". MakeWikiRegex($raw_image); my $regex4i = "(image\\s*=\\s*)". MakeWikiRegex($raw_image);						# Regex to match "image = " sections in infoboxes my $regex4p = "(picture\\s*=\\s*)". MakeWikiRegex($raw_image);					# Regex to match "picture = " sections in infoboxes

my $regex4m = "\\[\\([^*)\\]\\]";	# Regex to match inline Media: links my $regex4g = "(img\\s*=\\s*)". MakeWikiRegex($raw_image);	# Regex to match "img = " sections in infoboxes Pearle::myLog("Regex 3: $regex3\n"); notelog("Regex 3: $regex3\n"); notelog("Regex 3 extended: $regex3ex\n"); notelog("Regex 3 gallery: $regex3g\n"); Pearle::myLog("Raw regex: $raw_image\n"); notelog("Regex 4 Album: $regex4a\n"); notelog("Regex 4 City: $regex4b\n"); notelog("Regex 4 Image: $regex4i\n"); notelog("Regex 4 Media: $regex4m\n"); notelog("Regex 4 Picture: $regex4p\n"); notelog("Regex 4 Img: $regex4g\n"); if($text =~ /$regex3ex/) {		Pearle::myLog("Possible inline image in $page\n"); userwarnlog(FixupLinks("*Possible inline image $image in $page\n")); return 0;	# Can't do gallery matching because that also matches regular images, and odds are, we don't have an infobox }	if($text =~ /$regex3c/ or $text =~ /$regex3gc/) {		Pearle::myLog("Image in comment in $page\n"); return 0;	# Can't do gallery matching because that also matches regular images }	$text =~ /$regex3/; $match_len = length($1); $match2 = $text =~ s/$regex3//g;
 * 1) 		userwarnlog(FixupLinks("*Image in comment in $page\n"));

$new_length = length($text); print "Num: $match2 Len: $match_len\n"; if($match2) {		# If a whole lot of text was removed, log a warning if($match_len > (500 + length($image))) {			userwarnlog(FixupLinks("*Long caption of $match_len bytes replaced in $page\n")); if($match_len > (1000 + length($image))) {				notelog("Unusually long caption found. Exiting.\n"); Pearle::myLog("Unusually long caption of $match_len found in $page ($match2 matches).\n"); exit; }		}		if($match_len < (4 + length($image))) {			notelog("*Short replacement of $match_len bytes in $page\n"); Pearle::myLog("Short replacement of $match_len bytes (min ". (length($image) + 4). ") in $page ($match2 matches). Exiting.\n"); Pearle::myLog("Text:\n$text\n"); exit; }		# If many matches, log a warning if($match2 > 2) {			Pearle::myLog("More than one match ($match2) in page $page\n"); }		if($match2 > 100) {			Pearle::myLog("Too many matches ($match2) in page $page. Skipping.\n"); userwarnlog("Too many matches ($match2) in page $page. Skipping.\n"); return 0; }		# If there might be a reference, log a warning if($text =~ /-->\]/) {			Pearle::myLog("Possible bracket mixup in page $page\n"); userwarnlog(FixupLinks("*Possible bracket mixup in page $page\n")); }		{			$match2 += 1; }	}
 * 1) 			userwarnlog(FixupLinks("*More than one match ($match2) in page $page\n"));
 * 1) 		if($text =~ /(?:see (?:image|picture|graph|diagram|right|left)|\(left\)|\(right\)|\(below\)|\(above\))/)
 * 2) 			Pearle::myLog("Possible image reference in page $page\n");
 * 3) 			userwarnlog("*Possible image reference in page $page\n");
 * }
 * }
 * 1) 		if($text =~ /\[\[(?: |)/)

if($match2 > 0) {		if($text =~ /\[\[(?: |)<!--/) {			Pearle::myLog("Possible multiline image in page $page\n"); userwarnlog(FixupLinks("*Possible multiline image in page $page\n")); }	}

# Infobox removal if($text =~ /{{Album[ _]infobox|{{Infobox[ _]Album/i) {		if($text =~ s/$regex4a/$1/) {			Pearle::myLog("*Album infobox in page $page\n"); $match2 += 1; }	}	if($text =~ /{{Infobox[ _]City/i) {		if($text =~ s/$regex4b/$1/) {			Pearle::myLog("*City infobox in page $page\n"); $match2 += 1; }	}	if($text =~ /{{Taxobox/i) {		if($text =~ s/$regex4i/$1/) {			Pearle::myLog("*Taxobox in page $page\n"); $match2 += 1; }	}	if($text =~ /{{NFL[ _]player/i) {		if($text =~ s/$regex4i/$1/i) {			Pearle::myLog("*NFL Playerbox in page $page\n"); $match2 += 1; }	}	if($text =~ /{{Infobox[ _]President/i) {		if($text =~ s/$regex4i/$1/i) {			Pearle::myLog("*Presidentbox in page $page\n"); $match2 += 1; }	}	if($text =~ /{{Infobox[ _]Cricketer/i) {		if($text =~ s/$regex4p/picture = cricket no pic.png/i) {			Pearle::myLog("*Cricketer in page $page\n"); $match2 += 1; }	}	if($text =~ /{{Infobox[ _]Celebrity/) {		if($text =~ s/$regex4i/$1/i) {			Pearle::myLog("*Celebrity in page $page\n"); $match2 += 1; }	}	if($text =~ /{{Infobox[ _]Wrestler/) {		if($text =~ s/$regex4i/$1/i) {			Pearle::myLog("*Wrestler in page $page\n"); $match2 += 1; }	}	if($text =~ /{{Infobox musical artist 2/) {		if($text =~ s/$regex4g/$1/i) {			Pearle::myLog("*InfoMusArt2 in page $page\n"); $match2 += 1; }	}	if($text =~ /{{Infobox Model/) {		if($text =~ s/$regex4i/$1/i) {			Pearle::myLog("*Model in page $page\n"); $match2 += 1; }	}
 * 1) 			userwarnlog("*Presidentbox in page $page\n");
 * 1) 			userwarnlog("*Cricketer in page $page\n");

if($match2)	# No need to null-edit articles anymore {		if($test_only) {			notelog("Test removal from page succeeded\n"); }		else {			# Submit the changes Pearle::postPage($page, $editTime, $startTime, $token, $text, $removal_comment, "no"); }	}	return ($match2) }

sub isNotified {	my $image_text = shift; my $uploader = shift; my $image_regex = shift; my $image_name = shift; my $notes_ref = shift; my $donts_ref = shift;
 * 1) Returns 1 if the user has been notified, or a reference to the userpage edit session if they haven't

# Check notification list if($notes_ref->{"$uploader,$image_name"}) {		notelog("Already notified for this image\n"); return 1; }

if($donts_ref->{$uploader}) {		notelog("On exception list\n"); Pearle::myLog("On exception list: $uploader\n"); return 1; }	# Check uploader's talkpage my ($text, $editTime, $startTime, $token) = Pearle::getPage("User talk:$uploader"); if($text =~ /$image_regex/) {		notelog("Already notified by someone else\n"); $donts_ref->{"$uploader,$image_name"} = 1; return 1; }	else {		print "Not already notified\n"; return [$text, $editTime, $startTime, $token]; } }

sub isDated {	my $image_text = shift; if($image_text =~ /\((\d\d?) (\w*) (\d\d\d\d)\)/)	# Dated template {		print "Dated tag $1 $2 $3\n"; return 1; }	# as of 6 October 2006">	elsif($image_text =~ /as of (\d\d?) (\w*) (\d\d\d\d)/) # Template borked, working off category	{		print "Template borked; category $1 $2 $3\n";		return 1;	}	elsif($image_text =~ /{{{day}}} {{{month}}} \d\d\d\d/ or $image_text =~ /\( 2006\)/)	# Generic template	{		print "Generic tag\n";		return 0;	}	else	{		print "No tag match\n";		return 0;	} }

sub getDate {	my $image_text = shift; if($image_text =~ /\((\d\d?) (\w*) (\d\d\d\d)\)/) {		print "Template date $1-$2-$3\n"; return ($1, $2, $3); }	elsif($image_text =~ /as of (\d\d?) (\w*) (\d\d\d\d)/) # Template borked, working off category {		print "Category date $1-$2-$3\n"; return ($1, $2, $3); }	elsif($image_text =~ />\d\d?:\d\d, (\d\d?) (\w*) (\d\d\d\d)</) {		print "Upload date $1-$2-$3\n"; # For now, be conservative: my ($year, $month, $day) = Today; return ($day, Month_to_Text($month), $year); }	else {		print "No date\n"; return (1, "January", 2006); } }
 * 1) Return the tag date if there is one, the upload date if not
 * 2) Returns in (day, month, year) format
 * 1) 		return ($1, $2, $3);

sub getUploadDates {	my @dates; my $image_text = shift; while($image_text =~ />\d\d?:\d\d, (\d\d?) (\w*) (\d\d\d\d)</g) {		push @dates, [$1, $2, $3]; }	return @dates; }
 * 1) Return a list of upload dates

sub getLastEditDate {	my ($day, $month, $year); my $image = shift; my @history = Pearle::parseHistory($image); (undef, $day, $month, $year) = @{$history[0]}; return ($day, $month, $year); }

sub getUploader {	my $image_text = shift; my ($uploader, $dims, $bytes, $comment); my @uploaders; my $uploader_data; my $i = 0; # title="User:Jamie100">Jamie100</a> (<a href="/wiki/User_talk:Jamie100" title="User talk:Jamie100">Talk</a>). . 424x216 (25800 bytes) (Reverted to earlier revision) </li> while($image_text =~ />([^<]+?)<\/a> \(<a href="[^"]+?" (?:class="new" |)title="[^"]+?">Talk<\/a> \| <a href="[^"]*" title="[^"]*">contribs<\/a>\) \. \. (\d+.+?\d+) \(([0-9,]+) bytes\)(?: ([^<]*)|)</g) {		($uploader, $dims, $bytes, $comment) = ($1, $2, $3, $4); $bytes =~ s/,//g;						# Remove commas to turn into a real number $comment = "" if(!defined($comment));	# Reduce warnings push @uploaders, [$uploader, $dims, $bytes, $comment]; notelog("Uploader found: $uploader, $dims, $bytes, $comment\n"); $i++; die "Too many uploaders: $i\n" if($i > 100); }	my $max = scalar(@uploaders); print $max, "\n"; for($i = 0; $i < $max; $i++) {		$uploader = $uploaders[$i][0]; if($uploaders[$i][3] =~ /Reverted/) {			$dims = $uploaders[$i][1]; $bytes = $uploaders[$i][2]; notelog("Revert found: $uploader, $dims, $bytes\n"); $i++; while(($dims ne $uploaders[$i][1] or $bytes ne $uploaders[$i][2]) and $i < $max) {				notelog("Reversion data: $uploaders[$i][1], $uploaders[$i][2], $i\n"); $uploader = $uploaders[$i][0]; $i++; }		}		elsif($uploaders[$i][3] =~ /optimi(z|s)|adjust|tweak|scale|crop|change|resize/i) {			notelog("Optimize found. Skipping.\n"); }		else {			notelog("Uploader: $uploader ($i)\n"); last; }	}	$uploader = undef if($i >= $max); print "Uploader: $uploader\n"; return $uploader; }
 * 1) Find the most recent non-vandal, non-revert uploader
 * 1) 	while($image_text =~ />([^<]+?)<\/a> \(<a href="[^"]+?" (?:class="new" |)title="[^"]+?">Talk<\/a>\) \. \. (\d+x\d+) \(([0-9,]+) bytes\)(?: ([^<]*)|)</g)

sub checkImageCategory {	my $cat; my ($text, $editTime, $startTime, $token); $cat = "Category:Images with unknown source as of $_[0] $_[1] $_[2]"; ($text, $editTime, $startTime, $token) = Pearle::getPage($cat); if($text !~ /\[\Cc]ategory:[Ii]mages with unknown source/)	{		$text .= "\n\n";		if($test_only)		{			notelog("Would create category [[:$cat\n");		}		else		{			Pearle::postPage($cat, $editTime, $startTime, $token, $text, "Created category", "no");			userwarnlog("*Created category $cat\n");		}	} }
 * 1) See if the specified category exists, and if not, create it

sub loadNotificationList {	my $file = shift; my %notelist; my $i = 0; notelog("File: $file\n"); open INFILE, "<", $file; while(<INFILE>) {		$_ =~ s/\s*#.*$//g; chomp; $notelist{$_} = 1; $i++; }	close INFILE; notelog("$i notifications loaded\n"); return %notelist; }

sub saveNotificationList {	return if($test_only); my $file = shift; my %notelist = @_; my $key; open OUTFILE, ">", $file; foreach $key (keys(%notelist)) {		print OUTFILE "$key\n"; }	close OUTFILE; }

1;