User:ImageRemovalBot/removebot-followup.pl

Source code for ImageRemovalBot's second-pass removal, to deal with protected pages and delays in updating the "image usage" table in the database. Requires User:FairuseBot/Pearle.pm and User:FairuseBot/libBot.pm.


 * 1) !/usr/bin/perl


 * 1) RemoveBot Followup
 * 2) A bot to remove deleted images from pages.  Checks up on images RemoveBot was unable to remove at a 24-hour delay.
 * 1) A bot to remove deleted images from pages.  Checks up on images RemoveBot was unable to remove at a 24-hour delay.

use strict; use warnings;

use lib '/home/mark/perllib'; use lib '/home/mark/Desktop/Projects/Wikibots/dev/common'; use Fcntl qw(:flock);
 * 1) use Date::Calc qw;
 * 2) use URI::Escape;

use libBot;

my $homedir = '/home/mark/removebot'; my $permit_interruptions = 0;	# Allow talkpage messages to stop the bot?

Pearle::init("ImageRemovalBot", "", "$homedir/removebot-followup.log","$homedir/followup-cookies.txt"); config(username => "ImageRemovalBot");

if(!Pearle::login) {	exit; }

if(-e "$homedir/pid-followup") {	# Possible other copy. Compare PIDs open PIDFILE, "<", "$homedir/pid-followup"; my $pid = ; close PIDFILE;
 * 1) Check for a running copy

my $psresult = `ps -p $pid`; if($psresult =~ /removebot-followup.pl/) {		botwarnlog("*Previous run is taking longer than normal\n"); exit; } }

open PIDFILE, ">", "$homedir/pid-followup"; print PIDFILE $$; close PIDFILE;

my @images;

if(-e "$homedir/followup.log") {	my @new_images; # Read the log in	open INFILE, "<", "$homedir/followup.log"; flock INFILE, LOCK_SH; while() {		my ($date, $image) = $_ =~ /(\d+) (.*)/; chomp $image; if(defined($date) && $date < time - 86400) {			push @images, $image; }		else {			if(defined($date)) {				push @new_images, "$date $image\n"; }		}	}	flock INFILE, LOCK_UN; close INFILE;
 * 1) Process the followup log

# Write out a log containing the entries we aren't going to process now open INFILE, ">", "$homedir/followup.log"; flock INFILE, LOCK_EX; foreach my $line (@new_images) {		print INFILE $line; }	flock INFILE, LOCK_UN; close INFILE; } else {	Pearle::myLog(2, "No images in followup log\n"); exit; }

{	my $image; Pearle::myLog(2, "Beginning set at " . time . "\n");

print join "\n", @images; print "\n", scalar(@images), " images found\n"; if(scalar(@images) == 0) {		Pearle::myLog(1, "*No images in log need processing\n"); }

foreach $image (@images) {		my $image_url; my $image_regex = $image; my $page; my @pages = ; my ($day, $month, $year); # Fetch image info my $image_data = Pearle::APIQuery(titles => [$image], prop => 'imageinfo', meta => 'userinfo', uiprop => ['hasmsg'], 			# Basic data		                                 list => 'imageusage', iutitle => $image, iunamespace => [0, 10, 12, 14, 100], iulimit => 500);	# Image usage

my $full_comment = ""; my $removal_prefix = "Deleted image removed:"; my $removal_comment = "Removing deleted image";

if($permit_interruptions and DoIHaveMessages($image_data)) {			Pearle::myLog(0, "Talkpage message found; exiting on image $image.\n"); exit; }		# Verify the image is still deleted if($image_data !~ /missing=""/) {			Pearle::myLog(2, "*Image $image has been re-uploaded.\n"); next; }

# Images from Commons. May have been masked by the deleted version. if($image_data =~ /imagerepository="shared"/) {			Pearle::myLog(2, "*Commons image $image found\n"); next; }		@pages = GetPageList($image_data);

if(scalar(@pages) == 0) {			notelog("Image $image is already orphaned\n"); next; }		my ($raw_image) = $image =~ /Image:(.*)/; $raw_image = MakeWikiRegex($raw_image); if($image !~ /(\.jpg|\.jpeg|\.png|\.gif|\.svg)$/i) {			$image_regex = "[ _]*(:?[Ii][Mm][Aa][Gg][Ee]|[Mm][Ee][Dd][Ii][Aa])[ _]*:[ _]*${raw_image}[ _]*"; Pearle::myLog(2, "*Non-image media file $image found.\n"); }		else {			$image_regex = "[ _]*[Ii][Mm][Aa][Gg][Ee][ _]*:[ _]*${raw_image}[ _]*"; }		# Sanity check if(!defined($raw_image) or $image !~ /$raw_image/) {			botwarnlog("*Parse error on image $image ($raw_image)\n"); next; }		Pearle::myLog(3, "Image regex: $image_regex\n"); if($image !~ /(\.jpg|\.jpeg|\.png|\.gif|\.svg)$/i) {			my $page_list = "( ";			foreach my $page_entry (@pages)			{				$page_list .= "$page_entry ";			}			$page_list .= ")"; wikilog("User talk:ImageRemovalBot/media", "*$image $page_list\n"); next;	# Non-image files are too hard to work with }

my $parsed_removal_comment = $removal_comment; $parsed_removal_comment =~ s/image/image/; foreach $page (@pages) {			my $hits = 0; notelog("Page for removal: $page\n"); if($hits = RemoveImageFromPage($image, $page, $image_regex, $removal_prefix, $parsed_removal_comment)) 	# Don't limit if we just touched the article {				Pearle::myLog(2, "Removed image $image from article $page ($hits times)\n"); Pearle::limit; }		}		# Verify removal # Portal removal is too hard to get correct, and we don't really care about it. # Template removal isn't possible, and the template usage has already been logged. $image_data = Pearle::APIQuery(list => 'imageusage', iutitle => $image, iunamespace => [0, 12, 14], iulimit => 500); @pages = GetPageList($image_data);

if(scalar(@pages) != 0) {			botwarnlog("*Unable to remove all instances of $image\n"); Pearle::myLog(2, "*Unable to remove all instances of $image\n"); }

}	Pearle::myLog(2, "Finished with followup set.\n"); }


 * 1) print "Finished. Total $total_images removed, $total_processed processed.\n";

unlink "$homedir/pid-followup"