User:OrphanBot/orphanbot.pl

The source code for OrphanBot's image-removal task. Requires libBot.pm and Pearle.pm.


 * 1) !/usr/bin/perl


 * 1) OrphanBot
 * 2) A bot to remove images from pages in preparation for deletion
 * 1) A bot to remove images from pages in preparation for deletion

use strict; use warnings; use utf8;

use Date::Calc qw(Delta_Days Decode_Month Month_to_Text Today); use Getopt::Long;

use libBot;

my $homedir = '/path/to/bot/working/directory';

my $test = 0;

my $permit_interruptions = 1;	# Allow talkpage messages to stop the bot? my $last_image = undef; my @last_images; my $task = "";							# One of "source", "copyright", "unsure", "special", "fairuse", "disputed" my %users_notified;						# List of users notifed. 0, undef = no; 1 = notified once; 2 = notified and second notice my %notifications;						# List of user,image pairs, used to ensure that no user is ever notified about an image twice. my %dont_notify = ;						# List of users to never notify

my ($remove_type, $removal_comment, $removal_prefix, @template_match, $uploader_warning, $uploader_warning_summary, $write_remove_log, $limit_by_date); # Params for changing tasks

GetOptions('task=s' => \$task);

sub sig {	if($task ne 'source' and $task ne 'copyright') {		return " -- "; }	else {		return " "; } }
 * 1) Generate a signature

%notifications = loadNotificationList("$homedir/orphanbot.note"); %dont_notify = loadNotificationList("$homedir/orphanbot.whitelist"); Pearle::init("", "", "$homedir/orphanbot.log","$homedir/cookies.pearle.txt"); Pearle::config(nullOK => 1, printlevel => 4); config(username => "");

if(!Pearle::login) {	exit; }

{	my @images; my $image; my $edited = 0; my $images_removed = 0; botwarnlog("=== Beginning set at " . time . " for task '$task' ===\n");
 * 1) while(1)

{		if($task eq "source") {			my $cat = "Category:All images with unknown source"; if($test) {				@images = ("Image:Nosuchimage.jpg"); }			else {				@images = Pearle::getCategoryImages($cat); }			$remove_type = 'normal'; $removal_comment = "Removing image with no source information. Such images that are older than seven days may be deleted at any time."; $removal_prefix = "Unsourced image removed:"; @template_match = ("Template:Di-no source", "Template:No copyright holder", "Template:Di-no source no license"); $uploader_warning = "{{subst:User:OrphanBot/nosource|"; $uploader_warning_summary = "You've uploaded an unsourced image"; $write_remove_log = 1; $limit_by_date = 1; }		elsif($task eq "copyright") {			my $cat = "Category:All images with unknown copyright status"; if($test) {				@images = (""); }			else {				@images = Pearle::getCategoryImages($cat); }			$remove_type = 'normal'; $removal_comment = "Removing image with no copyright information. Such images that are older than seven days may be deleted at any time."; $removal_prefix = "Image with unknown copyright status removed:"; @template_match = ("Template:Di-no license", "Template:No copyright information", "Template:Di-no source no license", "Template:Don't know", "Template:No license needing editor assistance", "Template:Di-no permission"); $uploader_warning = "{{subst:User:OrphanBot/nocopyright|"; $uploader_warning_summary = "You've uploaded an image with unknown copyright"; $write_remove_log = 1; $limit_by_date = 1; }		else {			Pearle::myLog(0, "Unknown task: $task\n"); exit; }	}	if(scalar(@images) == 0) {		Pearle::myLog(2, "Category is empty.\n"); exit; }

IMAGE:	foreach $image (@images) {		my $image_url; my $image_regex = $image; my $page; my @pages = ; my $page_remove_log; my ($day, $month, $year); Pearle::myLog(2, "Processing image $image\n"); # Fetch an image page my $image_data = Pearle::APIQuery(titles => [$image], prop => ['imageinfo', 'categories', 'templates'],		                                 iiprop => ['user', 'sha1', 'comment'],		                                  cllimit => 500,		                                  tllimit => 500,		                                  list => 'imageusage',		                                  iutitle => $image,		                                  iunamespace => [0, 10, 12, 14, 100],		                                  meta => 'userinfo',				# Do I have talkpage messages?		                                  ); next if(!defined($image_data)); my $full_comment = "";

$page_remove_log = ''; $last_image = $image;

if($permit_interruptions and DoIHaveMessages($image_data)) {			Pearle::myLog(1, "Talkpage message found; exiting on image $image.\n"); last; }		# Images from Commons if($image_data =~ /imagerepository="shared"/) {			Pearle::myLog(2, "*Commons image $image found\n"); botwarnlog("*Commons image $image found\n"); next; }		# Check for image existance if($image_data =~ /missing=""/) {			Pearle::myLog(2, "Image $image has been deleted.\n"); next; }

# The odd case of an image description page without an image if($image_data =~ /imagerepository=""/) {			Pearle::myLog(2, "*Image $image does not appear to exist.\n"); botwarnlog("*Image $image does not appear to exist.\n"); next; }

# Check for image copyright tag if((scalar(@template_match) > 0) and (not usesTemplate($image_data, @template_match))) {			Pearle::myLog(2, "*Image $image in category does not have an appropriate template\n"); botwarnlog("*Image $image in category does not have an appropriate template\n"); next; }		my ($raw_image) = $image =~ /Image:(.*)/; $raw_image = MakeWikiRegex($raw_image); if($image !~ /(\.jpg|\.jpeg|\.png|\.gif|\.svg)$/i) {			$image_regex = "[ _]*(:?[Ii]mage|[Mm]edia)[ _]*:[ _]*${raw_image}[ _]*"; }		else {			$image_regex = "[ _]*[Ii]mage[ _]*:[ _]*${raw_image}[ _]*"; }		# Sanity check if(!defined($raw_image) or $image !~ /$raw_image/) {			Pearle::myLog(1, "Parse error on image $image ($raw_image)\n"); botwarnlog("*Parse error on image $image ($raw_image)\n"); last; }		Pearle::myLog(2, "Image regex: $image_regex\n");

($day, $month, $year) = getDate($image_data);

# Notify the user my $uploader = GetImageUploader($image_data); my $is_notified = 0; if(defined($uploader_warning) and defined($uploader)) {			$is_notified = IsNotified($uploader, $image_regex, $image, \%notifications, \%dont_notify); }

if(defined($uploader_warning) and !$is_notified) {			if(defined($uploader)) {				if(!($users_notified{$uploader})) {					Pearle::myLog(3, "Warning user $uploader\n"); wikilog("User talk:$uploader", "${uploader_warning}${image}}}" . sig . "\n", $uploader_warning_summary); Pearle::limit; $notifications{"$uploader,$image"} = 1; $users_notified{$uploader} = 1; }				else {					Pearle::myLog(3, "User $uploader has already been warned repeatedly\n"); $users_notified{$uploader} += 1; }			}			else {				Pearle::myLog(1, "Could not determine uploader for $image\n"); }		}

if(!Date::Calc::check_date($year, Decode_Month($month), $day)) {			Pearle::myLog(1, "Date error for image $image\n"); botwarnlog("*Date error for image $image\n"); next; }		if((Delta_Days($year, Decode_Month($month), $day, Today ) >= 4) or !($limit_by_date)) {			@pages = GetPageList($image_data); if(scalar(@pages) == 0) {				Pearle::myLog(2, "Image $image may already be orphaned\n"); }

if(scalar(@pages) > 5) {				botwarnlog("*Found image $image on " . scalar(@pages) . " content pages\n"); }

foreach $page (@pages) {				print "Page for removal: $page\n"; my $parsed_removal_comment = $removal_comment; $parsed_removal_comment =~ s/image/image/; if(my $hits = RemoveImageFromPage($image, $page, $image_regex, $removal_prefix, $parsed_removal_comment)) 	# Don't limit if we just touched the article {					$page_remove_log .= "#$page\n"; Pearle::myLog(2, "Removed image $image from article $page $hits times\n"); Pearle::limit; $edited = 1; }			}		}		else {			Pearle::myLog(2, "Recent image: notification only\n"); }		# Update image description page if($write_remove_log) {			my $edited_idp = 0; my $text = ""; # Log all removals on the image description page if($page_remove_log ne "") {				$text .= "\n\nRemoved from the following pages:\n"; $text .= FixupLinks($page_remove_log); $text .= "--~\n"; $full_comment .= "Listing pages that the image has been removed from"; $edited_idp = 1; print "Remove log\n"; }			if($edited_idp) {				if($test) {					notelog("Edited image description page\n"); }				else {					my $wikipage; $wikipage = Pearle::getPage( $image); my $pagetext = $wikipage->getEditableText; $pagetext .= $text; $wikipage->setEditableText($pagetext); Pearle::postPage( $wikipage, $full_comment, 0); }			}		}


 * 1) 		exit if($images_removed >= 100);

if($edited) {			print "Sleeping for 30 seconds\n"; sleep(30); }		else {			print "Sleeping for two seconds\n"; sleep(2); }		$edited = 0; }	notelog("Saving notification list\n"); saveNotificationList("/home/mark/orphanbot/orphanbot.note", %notifications); Pearle::myLog(2, "Finished with category.\n"); }