User:FairuseBot/libBot.pm


 * 1) !/usr/bin/perl


 * 1) libBot: A Perl module of useful routines for running a bot

package libBot;

use strict; use warnings;

use Pearle; use Data::Dumper; use Array::Utils;

require Exporter;

our @ISA = qw(Exporter); our @EXPORT = qw(config usernotify wikilog botwarnlog notelog LoadInfoboxPatterns FixupLinks MakeWikiRegex DoIHaveMessages GetPageCategories GetPageTemplates GetLinksOnPage GetPageText GetPageList GetFullPageList GetImageNames SaveImage UpdateLink RemoveImageFromPage ReplaceImage IsNotified IsPageNotified isDated getDate getUploadDates getLastEditDate GetImageUploader loadNotificationList saveNotificationList usesTemplate DoesPageExist); our $VERSION = 1.00;

my $test_only = 0; my $username = ""; my @infobox_patterns = ;

sub config {	my %params = @_; $test_only = $params{test_only} if(defined($params{test_only})); $username = $params{username} if(defined($params{username})); }

sub usernotify {	my ($wikipage, $text, $user, $summary); $wikipage = $_[1]; $summary = $_[2]; $summary = "Logging warning message" if(!defined($summary)); if(!$wikipage->isa("Pearle::WikiPage")) {		Pearle::myLog(0, "usernotify: Second parameter is not a WikiPage object\n"); die "usernotify: Second parameter is not a WikiPage object\n"; }	# We've been handed an editing session Pearle::myLog(4, "Warning with existing edit session\n");
 * 1) Log a warning on a user's talkpage, using an existing edit session

if($test_only) {		print STDERR $_[0]; return; }	if($wikipage->getWikiText =~ /^#redirect/i) {		botwarnlog("*User talk page User talk:$user is a redirect\n"); return; }	$text = $wikipage->getEditableText; $text .= $_[0]; $wikipage->setEditableText($text); Pearle::postPage($wikipage, $summary, 0); print STDERR $_[0]; }

sub wikilog {	my($target, $text, $token, $summary); $target = $_[0]; $text = $_[1]; $summary = $_[2] || "Logging note";
 * 1) General-purpose on-Wiki logging routine

chomp($text); $text = "\n$text" if($text !~ /^\n/);	# The edit API eats trailing newlines, so prepend a newline if the message doesn't already have one.

eval {		$token = Pearle::getToken($target); };	if($@) {		if($@ =~ /^925/) {			Pearle::myLog(1, "Failed to notify: Protected page $target\n"); return; }		else {			die; }	}

if($test_only) {		print STDERR $_[1]; return; }

Pearle::appendToPage($target, $token, $text, $summary, 0); }

sub botwarnlog {	my ($page, $text, $summary); $text = $_[0]; $summary = $_[1]; $summary = "Logging warning message" if(!defined($summary)); $page = "User talk:${username}/log"; wikilog($page, $text, $summary); }
 * 1) Log a warning on the talk page of the bot

sub notelog {	print STDERR @_; }
 * 1) Log a notification message to the console

sub FixupLinks {	my $link = shift; $link =~ s/\[\[(Category|Image|File)/[[:$1/g;	return $link; }
 * 1) Fix all wikilinks in a string so that they shows as a link, not inline, if it's for a category or image

sub MakeWikiRegex {	my $string = shift; my @chars = split //, $string; my $result = '';
 * 1) Make a string into a Wikipedia-compatible regex

return undef if(!defined($string));

foreach my $char (@chars) {		# Escape metacharacters, and add percent-encoding for certain characters if($char eq '\\') {$result .= '\\\\';} elsif($char eq '/') {$result .= '(?i:\/|%2F)';} elsif($char eq '.') {$result .= '\.';} elsif($char eq '(') {$result .= '(?i:\(|%28)';} elsif($char eq ')') {$result .= '(?i:\)|%29)';} elsif($char eq '[') {$result .= '\[';} elsif($char eq ']') {$result .= '\]';} elsif($char eq '+') {$result .= '\+';} elsif($char eq '*') {$result .= '\*';} elsif($char eq '?') {$result .= '(?i:\?|%3F)';} elsif($char eq '^') {$result .= '\^';} elsif($char eq '$') {$result .= '\$';} elsif($char eq '&') {$result .= '(?i:&|%26)';} elsif($char eq '!') {$result .= '(?i:!|%21)';} elsif($char eq '~') {$result .= '(?i:~|%7E)';} elsif($char eq "'") {$result .= "(?i:'|%27)";} elsif($char eq '"') {$result .= '(?i:"|%22)';} elsif($char eq ',') {$result .= '(?i:,|%2C)';} else {$result .= $char;} }	# Process the string to match both with spaces and with underscores $result =~ s/[ _]/[ _]+/g;

# Process the string to match both upcase and lowercase first characters if($result =~ /^[A-Za-z]/) {		$result =~ s/^(.)/"[$1".lc($1)."]"/e; }	return $result; }

sub HTMLEncode {	my $char = shift; return sprintf("&X%X;", ord($char)); }

sub MakeFancyRegex {	my $string = shift; my @chars = split //, $string; my $result; foreach my $char (@chars) {		if($char eq '\\') {			$result .= "(\\\\|%5C|%5c|&x5C;)"; }		elsif($char eq '.') {		}		elsif($char eq '(')		{		}		elsif($char eq ')') {		}		else {			$result .= "($char|" . uri_escape_utf8($char) . "|" . lc(uri_escape_utf8($char)) . "|" . HTMLEncode($char) . "|" . lc(HTMLEncode($char)) . ")"; }	}
 * 1) Make a string into something that can match most image name formats

return $result; }

sub DoIHaveMessages {	my $xml = shift; my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser->XMLin($xml); if(exists($parsed_xml->{query}->{userinfo}->{messages}) and defined($parsed_xml->{query}->{userinfo}->{messages})) {		return 1; }	else {		return 0; } }
 * 1) Check for new talk page messages

sub GetPageCategories {	my $xml = shift; my @pages = ; if(defined($xml)) {		my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser->XMLin($xml); Pearle::myLog(4, Dumper($parsed_xml)); if(exists($parsed_xml->{query}->{pages}->{page}->{categories}->{cl}) and defined($parsed_xml->{query}->{pages}->{page}->{categories}->{cl})) {			if(ref($parsed_xml->{query}->{pages}->{page}->{categories}->{cl}) eq 'ARRAY') {				my @all_pages = @{$parsed_xml->{query}->{pages}->{page}->{categories}->{cl}}; @pages = map {$_->{title}} @all_pages; }			else {				@pages = ($parsed_xml->{query}->{pages}->{page}->{categories}->{cl}->{title}); }		}	}	return @pages; }

sub GetLinksOnPage {	my $xml = shift; my @pages = ; if(defined($xml)) {		my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser->XMLin($xml); Pearle::myLog(4, Dumper($parsed_xml)); if(exists($parsed_xml->{query}->{pages}->{page}->{links}->{pl}) and defined($parsed_xml->{query}->{pages}->{page}->{links}->{pl})) {			if(ref($parsed_xml->{query}->{pages}->{page}->{links}->{pl}) eq 'ARRAY') {				my @all_pages = @{$parsed_xml->{query}->{pages}->{page}->{links}->{pl}}; @pages = map {$_->{title}} @all_pages; }			else {				@pages = ($parsed_xml->{query}->{pages}->{page}->{links}->{pl}->{title}); }		}	}	return @pages; }

sub GetPageText {	my $xml = shift; my $text = undef; if(defined($xml)) {		my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser->XMLin($xml); Pearle::myLog(4, Dumper($parsed_xml)); if(exists($parsed_xml->{query}->{pages}->{page}->{revisions}->{rev}->{content}) and defined($parsed_xml->{query}->{pages}->{page}->{revisions}->{rev}->{content})) {			if(ref($parsed_xml->{query}->{pages}->{page}->{revisions}->{rev}->{content})) {				# The API/XML parser interact to produce a HASH ref if the revision is empty $text = ""; }			else {				$text = $parsed_xml->{query}->{pages}->{page}->{revisions}->{rev}->{content}; }		}	}	return $text; }

sub GetPageTemplates {	my $xml = shift; my @templates; if(defined($xml)) {		my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser->XMLin($xml, ForceArray => ['tl']); if(exists($parsed_xml->{query}->{pages}->{page}->{templates}->{tl}) and defined($parsed_xml->{query}->{pages}->{page}->{templates}->{tl})) {			@templates = map {$_->{title}} @{$parsed_xml->{query}->{pages}->{page}->{templates}->{tl}}; }	}	return @templates; }
 * 1) Input: XML from the API, generated with prop => 'templates' and with only a single title
 * 2)        Either as text or as a parsed tree
 * 3) Returns: A list of templates used by the page
 * 4) Side effects: None
 * 1) Side effects: None
 * 1) Side effects: None

sub GetPageList {	my $xml = shift; my $image; my @pages = ;
 * 1) Input: XML, either a tree produced by parsing, or XML text
 * 2) Returns: A list of pages that this image is used on
 * 3) Side effects: For pages in certain namespaces, posts on the bot's log page
 * 1) Side effects: For pages in certain namespaces, posts on the bot's log page
 * 1) Side effects: For pages in certain namespaces, posts on the bot's log page

if(defined($xml)) {		my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser->XMLin($xml, ForceArray => ['iu']); my $image = $parsed_xml->{query}->{pages}->{page}->{title};

Pearle::myLog(4, Dumper($parsed_xml)); if(exists($parsed_xml->{query}->{imageusage}->{iu}) and defined($parsed_xml->{query}->{imageusage}->{iu})) {			my @bad_pages = grep {$_->{ns} == 10 or $_->{ns} == 12} @{$parsed_xml->{query}->{imageusage}->{iu}}; my @good_pages = grep {$_->{ns} != 10 and $_->{ns} != 12} @{$parsed_xml->{query}->{imageusage}->{iu}}; @pages = map {$_->{title}} @good_pages; if(scalar(@bad_pages) > 0 and defined($image))	# If "image" is undefined, we're probably doing a pure usage check, rather than one in preparation for removal {				my $notice; foreach my $page (@bad_pages) {					$notice .= "*Found image $image in $page->{title}\n"; }				botwarnlog($notice); }		}	}	return @pages; }

sub GetFullPageList {	my $xml = shift; my $image; my @pages = ;
 * 1) Get all pages.  Don't filter for bad namespaces.

if(defined($xml)) {		my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser->XMLin($xml, ForceArray => ['iu']); my $image = $parsed_xml->{query}->{pages}->{page}->{title};

Pearle::myLog(4, Dumper($parsed_xml)); if(exists($parsed_xml->{query}->{imageusage}->{iu}) and defined($parsed_xml->{query}->{imageusage}->{iu})) {			@pages = map {$_->{title}} @{$parsed_xml->{query}->{imageusage}->{iu}}; }	}	return @pages; }

sub GetImageNames {	my $xml = shift; my @names; if(defined($xml)) {		my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser->XMLin($xml, ForceArray => ['bl']); if(exists($parsed_xml->{query}->{backlinks}->{bl}) and defined($parsed_xml->{query}->{backlinks}->{bl})) {			@names = map {$_->{title}} grep( {defined($_->{redirect})} @{$parsed_xml->{query}->{backlinks}->{bl}}); }	}	return @names; }
 * 1) Input: XML from the API, generated with list => 'backlinks'.  blfilterredir => 'redirects' is recommended but not mandatory.
 * 2)        Either as text or as a parsed tree
 * 3) Returns: A list of redirects to the image
 * 4) Side effects: None
 * 1) Side effects: None
 * 1) Side effects: None

sub UpdateLink {	my $page = shift; my $from = shift; my $to = shift; my $summary = shift || "Updating link to bypass a redirect or disambiguation page"; die "No page to edit" if(!defined($page)); die "No link to change" if(!defined($from)); die "No new link" if(!defined($to)); Pearle::myLog(3, "Updating link from $from to $to for page $page\n"); my $wikipage = Pearle::getPage($page); $wikipage->canonicalizeLinks; my $text = $wikipage->getEditableText; my $link_regex = MakeWikiRegex($from); my $matches = $text =~ s/\x01($link_regex)\x02/\x01${to}|${1}\x02/gi; $matches += $text =~ s/\x01$link_regex([#|])/\x01${to}${1}/gi; $matches += $text =~ s/([^=]\s*=\s*)$link_regex(\s*[|\n])/${1}$to${2}/gi; $wikipage->setEditableText($text); print $text; if($matches > 0) {		Pearle::postPage( $wikipage, $summary, 0); }	else {		Pearle::myLog(3, "No update\n"); }	return $matches; }

sub RemoveImageFromPage {	my $image = shift; my $page = shift; my $image_regex = shift; my $removal_prefix = shift; my $removal_comment = shift;

my $wikipage; my $text; my ($match1, $match2); my $old_length; my $new_length; my $change_len; my $match_len;

tryagain: # Fetch an article page $wikipage = Pearle::getPage($page); $wikipage->canonicalizeLinks; $text = $wikipage->getEditableText; if(!defined($text)) {		Pearle::myLog(1, "Error: Bad edit page $page\n"); botwarnlog(FixupLinks("*Error: Bad edit page $page\n")); sleep(300); return 0; }	if($text =~ /^\s*$/) {		# Might be protected instead of empty Pearle::myLog(1, "Error: Empty or protected page $page\n"); botwarnlog(FixupLinks("*Error: Empty or protected page $page\n")); sleep(300); return 0; }	if($text =~ /^#redirect/i) {		Pearle::myLog(1, "Redirect found for page $page (image $image)\n"); botwarnlog(FixupLinks("*Redirect found for page $page (image $image)\n")); print $text; return 0; }

# Remove the image my $regex3 = "([ \\t]*\x01${image_regex}[^\x01]*?(\x01[^\x02]*?\x02[^\x01]*?|)+\x02[ \\t]*)";		# Regex to match images

#my $regex3 = "(	#              [ \\t]*                          # Any leading whitespace	#               \x01                             # Open double-bracket for the image	#               ${image_regex}                   # The image itself	#               [^\x01]*?                        # Anything up to the first link in the caption, or a closing double bracket (minimal match)	#                   (\x01                            # Open double-bracket for a link in the caption	#                   [^\x02]*?                        # Anything but a closing double-bracket	#                   \x02                             # The closing double-bracket for the link	#                   [^\x01]*?|)                      # Any non-link text, or nothing	#                   +                                # Matches one or more times	#               \x02                             # The closing double-bracket for the image #              [ \\t]*)                         # Any trailing whitespace	#             ";

my $regex3ex = "\\w[ \\t]*${regex3}[ \\t]*\\w";								# Regex to try to spot inline images my $regex3g = "(${image_regex}.*)";											# Regex to match gallery images my ($raw_image) = $image =~ /(?:Image|File):(.*)/;

Pearle::myLog(3, "Regex 3: $regex3\n"); notelog("Regex 3 extended: $regex3ex\n"); notelog("Regex 3 gallery: $regex3g\n"); Pearle::myLog(3, "Raw regex: $raw_image\n"); if($text =~ /$regex3ex/) {		Pearle::myLog(1, "Possible inline image in $page\n"); botwarnlog(FixupLinks("*Possible inline image $image in $page\n")); return 0;	# Can't do gallery matching because that also matches regular images, and odds are, we don't have an infobox }	$text =~ /$regex3/; $match_len = length($1); my @matches = $text =~ /$regex3/g; if(grep {$_ =~ /[\x{F0000}-\x{FFFFF}]/} @matches) # If any images have comments in their captions, we can't remove them {		botwarnlog(FixupLinks("*Comment in image in $page\n")); goto skipregular; }

if(defined($removal_prefix)) {		$match2 = $text =~ s/$regex3//g; }	else {		$match2 = $text =~ s/$regex3//g; }

$new_length = length($text); print "Num: $match2 Len: $match_len\n"; if($match2) {		if($match_len < (2 + length($image))) {			Pearle::myLog(0, "Short replacement of $match_len bytes (min ". (length($image) + 2). ") in $page ($match2 matches). Exiting.\n"); Pearle::myLog(0, "Text:\n$text\n"); print Dumper($1); print Dumper($image); exit; }		# If many matches, log a warning if($match2 > 2) {			Pearle::myLog(3, "More than one match ($match2) in page $page\n"); }		if($match2 > 100) {			Pearle::myLog(1, "Too many matches ($match2) in page $page. Skipping.\n"); botwarnlog("*Too many matches ($match2) in page $page. Skipping.\n"); return 0; }	}

skipregular: # Put the text back and get it again in order to fold any comments resulting from removing non-gallery images. # This is because gallery image matching will also match commented images. $wikipage->setEditableText($text); $text = $wikipage->getEditableText;

if($text =~ / 0) {		if($text =~ /\[\[(?: |)<!--/) {			Pearle::myLog(2, "Possible multiline image in page $page\n"); botwarnlog(FixupLinks("*Possible multiline image in page $page\n")); }	}

# Improved infobox removal my $infobox_regex = "([-A-Za-z0-9_]+[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}]*=)[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*". MakeWikiRegex($raw_image). "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*"; my $infobox_regex_full = "([-A-Za-z0-9_]+[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}]*=)[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*". '(?:[Ii][Mm][Aa][Gg][Ee]|[Ff][Ii][Ll][Ee])[ _]*:[ _]*'. MakeWikiRegex($raw_image). "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*"; if($text =~ /$infobox_regex/) {		Pearle::myLog(3, "Matched on infobox regex: $infobox_regex\n"); Pearle::myLog(3, "Infobox parameter: $1\n"); if($& =~ /puic/) {			botwarnlog(FixupLinks("*PUIC in page $page\n")); }		else {			my $sub = $1; my $match_regex = MakeWikiRegex($sub). "[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*". MakeWikiRegex($raw_image). "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*"; $text =~ s/$match_regex/$sub/g; $match2 += 1; }	}	if($text =~ /$infobox_regex_full/) {		Pearle::myLog(3, "Matched on infobox regex: $infobox_regex_full\n"); Pearle::myLog(3, "Infobox parameter: $1\n"); if($& =~ /puic/) {			botwarnlog(FixupLinks("*PUIC in page $page\n")); }		else {			my $sub = $1; my $match_regex = MakeWikiRegex($sub). "[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*". '(?:[Ii][Mm][Aa][Gg][Ee]|[Ff][Ii][Ll][Ee])[ _]*:[ _]*'. MakeWikiRegex($raw_image). "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*"; $text =~ s/$match_regex/$sub/g; $match2 += 1; }	}

if($match2)	# No need to null-edit articles anymore {		if($test_only) {			notelog("Test removal from page succeeded\n"); }		else {			# Submit the changes $wikipage->setEditableText($text); eval {				Pearle::postPage($wikipage, $removal_comment, 0); };			if($@) {				if($@ =~ /^924 Spam filter: (.*)$/) {					botwarnlog("*Spam filter on page $page, url $1 \n"); $match2 = 0;	# We weren't able to remove it				} elsif($@ =~ /^922/) {					# Edit conflict. Try editing the page again. botwarnlog("*Edit conflict on page $page\n"); goto tryagain; }				else {					die; }			}		}	}	return ($match2) }

sub ReplaceImage {	my $image = shift; my $page = shift; my $image_regex = shift; my $new_image = shift; my $reason = shift;

my $wikipage; my $text; my ($match1, $match2); my $old_length; my $new_length; my $change_len; my $match_len;

tryagain: # Fetch an article page $wikipage = Pearle::getPage($page); $wikipage->canonicalizeLinks; $text = $wikipage->getEditableText; if(!defined($text)) {		Pearle::myLog(1, "Error: Bad edit page $page\n"); botwarnlog(FixupLinks("*Error: Bad edit page $page\n")); sleep(300); return 0; }	if($text =~ /^\s*$/) {		# Might be protected instead of empty Pearle::myLog(1, "Error: Empty or protected page $page\n"); botwarnlog(FixupLinks("*Error: Empty or protected page $page\n")); sleep(300); return 0; }	if($text =~ /^#redirect/i) {		Pearle::myLog(1, "Redirect found for page $page (image $image)\n"); botwarnlog(FixupLinks("*Redirect found for page $page (image $image)\n")); print $text; return 0; }

# Remove the image my $regex3 = "([ \\t]*\x01)${image_regex}([^\x01]*?(\x01[^\x02]*?\x02[^\x01]*?|)+\x02[ \\t]*)";		# Regex to match images

#my $regex3 = "(	#              [ \\t]*                          # Any leading whitespace	#               \x01                             # Open double-bracket for the image	#               ${image_regex}                   # The image itself	#               [^\x01]*?                        # Anything up to the first link in the caption, or a closing double bracket (minimal match)	#                   (\x01                            # Open double-bracket for a link in the caption	#                   [^\x02]*?                        # Anything but a closing double-bracket	#                   \x02                             # The closing double-bracket for the link	#                   [^\x01]*?|)                      # Any non-link text, or nothing	#                   +                                # Matches one or more times	#               \x02                             # The closing double-bracket for the image #              [ \\t]*)                         # Any trailing whitespace	#             ";

my $regex3g = "${image_regex}(.*)";											# Regex to match gallery images my ($raw_image) = $image =~ /(?:Image|File):(.*)/;

my $regex4m = "(\x01[ _]*[Mm][Ee][Dd][Ii][Aa][ _]*:[ _]*)". MakeWikiRegex($raw_image). "([ _]*\\|([^]]*)\x02)";	# Regex to match inline Media: links

Pearle::myLog(3, "Regex 3: $regex3\n"); notelog("Regex 3 gallery: $regex3g\n"); Pearle::myLog(3, "Raw regex: $raw_image\n"); notelog("Regex 4 Media: $regex4m\n"); $old_length = length($text); $match2 = $text =~ s/$regex3/$1$new_image$2/g;

$new_length = length($text); print "Num: $match2 Len: $match_len\n"; if($match2) {		# If the length change isn't right, log a warning and return without saving if($new_length != $old_length - ($match2 * (length($image) - length($new_image)))) {			botwarnlog(FixupLinks("*Length mismatch on $page replacing $image with $new_image\n")); Pearle::myLog(1, "Length mismatch on $page replacing $image with [{$new_image]]\n"); return 0; }		# If many matches, log a warning if($match2 > 2) {			Pearle::myLog(3, "More than one match ($match2) in page $page\n"); }		if($match2 > 100) {			Pearle::myLog(1, "Too many matches ($match2) in page $page. Skipping.\n"); botwarnlog("*Too many matches ($match2) in page $page. Skipping.\n"); return 0; }	}

if($text =~ /<gallery/i) {		Pearle::myLog(3, "*Possible image gallery in page $page\n"); if($new_image =~ /$regex3g/) {			Pearle::myLog(2, "New image name $new_image is a substring of old image name $image\n"); botwarnlog("*New image $new_image is a substring of $image in page $page\n"); }		else {			my $gallery_matches = $text =~ s/$regex3g/$new_image$1/g; $match2 += $gallery_matches; }	}

# Improved infobox replacement my $infobox_regex = "([-A-Za-z0-9_]+[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}]*=)[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*". MakeWikiRegex($raw_image). "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*"; my $infobox_regex_full = "([-A-Za-z0-9_]+[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}]*=)[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*". '(?:[Ii][Mm][Aa][Gg][Ee]|[Ff][Ii][Ll][Ee]) _]*:[ _]*' . MakeWikiRegex($raw_image) . "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*";	if($text =~ /$infobox_regex/)	{		Pearle::myLog(3, "Matched on infobox regex: $infobox_regex\n");		Pearle::myLog(3, "Infobox parameter: $1\n");		if([[ _]*:[ _]*' . MakeWikiRegex($raw_image) . "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*";	if($text =~ /$infobox_regex/)	{		Pearle::myLog(3, "Matched on infobox regex: $infobox_regex\n");		Pearle::myLog(3, "Infobox parameter: $1\n");		if($& =~ /puic/)		{			botwarnlog(FixupLinks("*PUIC in page [[$page]] =~ /puic/)		{			botwarnlog(FixupLinks("*PUIC in page [[$page\n"));		}		else		{

my $sub = $1; my $match_regex = MakeWikiRegex($sub). "[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*". MakeWikiRegex($raw_image). "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*"; my $non_prefixed_new_image = $new_image; $non_prefixed_new_image =~ s/^(Image|File)://; $text =~ s/$match_regex/${sub}${non_prefixed_new_image}/g; $match2 += 1; }	}	if($text =~ /$infobox_regex_full/) {		Pearle::myLog(3, "Matched on infobox regex: $infobox_regex_full\n"); Pearle::myLog(3, "Infobox parameter: $1\n"); if($& =~ /puic/) {			botwarnlog(FixupLinks("*PUIC in page $page\n")); }		else {			my $sub = $1; my $match_regex = MakeWikiRegex($sub). "[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*". '(?:[Ii][Mm][Aa][Gg][Ee]|[Ff][Ii][Ll][Ee])[ _]*:[ _]*'. MakeWikiRegex($raw_image). "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*"; $text =~ s/$match_regex/${sub}$new_image/g; $match2 += 1; }	}

if($match2)	# No need to null-edit articles anymore {		if($test_only) {			notelog("Test removal from page succeeded\n"); }		else {			# Submit the changes $wikipage->setEditableText($text); eval {				Pearle::postPage($wikipage, $reason, 0); };			if($@) {				if($@ =~ /^924 Spam filter: (.*)$/) {					botwarnlog("*Spam filter on page $page, url $1 \n"); $match2 = 0;	# We weren't able to remove it				} elsif($@ =~ /^922/) {					# Edit conflict. Try editing the page again. botwarnlog("*Edit conflict on page $page\n"); goto tryagain; }				else {					die; }			}		}	}	return ($match2) }

sub IsNotified {	my $uploader = shift; my $image_regex = shift; my $image_name = shift; my $notes_ref = shift; my $donts_ref = shift;
 * 1) Returns 1 if the user has been notified, or 0 if they haven't

# Check notification list if(defined($notes_ref) and $notes_ref->{"$uploader,$image_name"}) {		Pearle::myLog(2, "Already notified for this image\n"); return 1; }

if(defined($donts_ref) and $donts_ref->{$uploader}) {		Pearle::myLog(2, "On exception list: $uploader\n"); return 1; }	# Check uploader's talkpage my $page_data = Pearle::APIQuery(titles => "User talk:$uploader", prop => ['links', 'templates'], plnamespace => 6, pllimit => 500, tlnamespace => 10, tllimit => 500); $image_regex = MakeWikiRegex($image_name) if(!defined($image_regex)); if($page_data =~ /$image_regex/) {		Pearle::myLog(2, "Has a link from userpage\n"); return 1; }	if(usesTemplate($page_data, "Template:Nobots")) {		Pearle::myLog(2, "Uses \n"); return 1; }	return 0; }

sub IsPageNotified {	my $page = shift; my $image_regex = shift; my $image_name = shift; my $notes_ref = shift; my $donts_ref = shift; # Check notification list if($notes_ref->{"$page,$image_name"}) {		Pearle::myLog(2, "Already notified for this image\n"); return 1; }
 * 1) Returns 1 if the page has been notified, or 0 if it hasn't

if($donts_ref->{$page}) {		Pearle::myLog(2, "On exception list: $page\n"); return 1; }

# Check page my $page_data = Pearle::APIQuery(titles => $page, prop => 'links', plnamespace => 6); if($page_data =~ /$image_regex/) {		Pearle::myLog(2, "Has a link from page\n"); return 1; }	return 0; }

sub isDated {	my $image_text = shift; if($image_text =~ /\((\d\d?) (\w*) (\d\d\d\d)\)/)	# Dated template {		Pearle::myLog(4, "Dated tag $1 $2 $3\n"); return 1; }	# as of 6 October 2006">	elsif($image_text =~ /as of (\d\d?) (\w*) (\d\d\d\d)/) # Template borked, working off category	{		Pearle::myLog(4, "Template borked; category $1 $2 $3\n");		return 1;	}	elsif($image_text =~ / \d\d\d\d/)	# Generic template	{		Pearle::myLog(4, "Generic tag\n");		return 0;	}	else	{		Pearle::myLog(4, "No tag match\n");		return 0;	} }

sub getDate {	my $image_text = shift; if($image_text =~ /\((\d\d?) (\w*) (\d\d\d\d)\)/) {		Pearle::myLog(4, "Template date $1-$2-$3\n"); return ($1, $2, $3); }	elsif($image_text =~ /as of (\d\d?) (\w*) (\d\d\d\d)/) # Template borked, working off category {		Pearle::myLog(4, "Category date 'as of' $1-$2-$3\n"); return ($1, $2, $3); }	elsif($image_text =~ /from (\d\d?) (\w*) (\d\d\d\d)/) # Alternate category naming {		Pearle::myLog(4, "Category date 'from' $1-$2-$3\n"); return ($1, $2, $3); }	else {		Pearle::myLog(4, "No date\n"); return (1, "January", 2007); } }
 * 1) Return the tag date if there is one, the upload date if not
 * 2) Returns in (day, month, year) format

sub getUploadDates {	my @dates; my $image_text = shift; while($image_text =~ />\d\d?:\d\d, (\d\d?) (\w*) (\d\d\d\d)XMLin($image_data);
 * 1) Find the most recent non-vandal, non-revert uploader

Pearle::myLog(4, Dumper($parsed_xml)); if(exists($parsed_xml->{query}->{pages}->{page}->{imageinfo}->{ii}) and defined($parsed_xml->{query}->{pages}->{page}->{imageinfo}->{ii})) {		if(ref($parsed_xml->{query}->{pages}->{page}->{imageinfo}->{ii}) eq 'ARRAY') {			@uploaders = @{$parsed_xml->{query}->{pages}->{page}->{imageinfo}->{ii}}; }		else {			return $parsed_xml->{query}->{pages}->{page}->{imageinfo}->{ii}->{user}; }	}	else {		return undef; }	$uploader = $uploaders[0]->{user}; $sha1 = $uploaders[0]->{sha1}; $comment = $uploaders[0]->{comment} || ""; my $done = 0; while(!$done) {		if($comment =~ /^Reverted/) {			Pearle::myLog(4, "Revert found\n"); $i += 1; while($uploaders[$i]->{sha1} ne $sha1) {				$i = $i + 1; }		}		elsif($comment =~ /optimi(z|s)ed|adjust|tweak|scale|crop|change|resize|remove|reduc(e|ing)/i) {			Pearle::myLog(4, "Tweak found\n"); $i = $i + 1; }		elsif(!defined($uploader)) {			Pearle::myLog(4, "Something went wrong with finding the uploader\n"); $done = 1; }		elsif($count > 500) {			Pearle::myLog(4, "Took too long finding the uploader\n"); $uploader = undef; $done = 1; }		else {			$done = 1; }		$uploader = $uploaders[$i]->{user}; $sha1 = $uploaders[$i]->{sha1}; $comment = $uploaders[$i]->{comment} || ""; $count = $count + 1; }	if(defined($uploader)) {		Pearle::myLog(4, "Uploader: $uploader\n"); return $uploader; }	else {		return undef; } }

sub loadNotificationList {	my $file = shift; my %notelist; my $i = 0; notelog("File: $file\n"); open INFILE, "<:utf8", $file; while() {		$_ =~ s/\s*#.*$//g; chomp; $notelist{$_} = 1; $i++; }	close INFILE; notelog("$i notifications loaded\n"); return %notelist; }

sub saveNotificationList {	return if($test_only); my $file = shift; my %notelist = @_; my $key; open OUTFILE, ">:utf8", $file; foreach $key (keys(%notelist)) {		print OUTFILE "$key\n"; }	close OUTFILE; }

sub usesTemplate {	my $xml = shift; my @templates = @_; my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser->XMLin($xml, ForceArray => ['tl']); Pearle::myLog(4, "Templates: " . join(", ", @templates) . "\n"); if(!exists($parsed_xml->{query}->{pages}->{page}->{templates}->{tl}) or !defined($parsed_xml->{query}->{pages}->{page}->{templates}->{tl})) {		Pearle::myLog(4, "No templates or no page\n"); return 0; }	my $result = eval {		my @used_templates = @{$parsed_xml->{query}->{pages}->{page}->{templates}->{tl}}; @used_templates = map {$_->{title}} @used_templates; Pearle::myLog(4, "Used: " . join(", ", @used_templates) . "\n"); Pearle::myLog(4, "Intersect: " . join(", ", Array::Utils::intersect( @templates, @used_templates )) . "\n"); if(Array::Utils::intersect( @templates, @used_templates )) {			return 1; }		else {			return 0; }	};	if($@) {		# Probably more than one page in the xml print "usesTemplate error: $@"; return 0; }	return $result; }
 * 1) Does a page transclude any of a set of templates?  Template names must be in the canonical form, with prefix.
 * 1) 	Pearle::myLog(4, Dumper($parsed_xml));

sub DoesPageExist {	my $page = shift; my $xml = Pearle::APIQuery(titles => [$page], prop => ['info']); # TODO: Handle API query errors ('undef' return values) if($xml =~ /missing=""/) {		return 0; }	return 1; }

1;