User:FairuseBot/10cbot.pl


 * 1) !/usr/bin/perl
 * 2) A bot to assist with NFCC #10c enforcement
 * 1) A bot to assist with NFCC #10c enforcement

use warnings; use strict;

use Date::Calc qw(Month_to_Text Today); use Data::Dumper; use Array::Utils;

binmode STDOUT, ":utf8";

use libBot;

my $test = 0; my $images_marked = 0;

my %common_pages;		# A list of pages that images have been found on, to see what's linked from templates and therefore shouldn't be included in disambig/redirect searches.

my $homedir = ''; my $permit_interruptions = 1;	# Allow talkpage messages to stop the bot?

my @common_links = ("Copyright", "Copyright infringement", "Fair use", "Logo", "Trademark", "United States copyright law", "Wikimedia",                   "Computer game", "Counterfeit", "Currency", "Free software", "Portable Network Graphics", "Poster", "Public domain",                    "Screenshot", "Station identification", "United States Code", "U.S. state", "Video game", "Wikimedia Foundation",                    "Work of the United States Government","Blu-ray Disc", "Comic book", "Comic strip", "DVD", "Free content",                    "Videotape", "Webcomic", "Scouting", "Personality rights", "Screenshot", "2007", "2008", "Crown copyright",		    "Scalable Vector Graphics", "BSD", "GPL", "JPEG", "Uploading and downloading", "Compression artifact",		    "Vector graphics", "Uniform", "Magazine", "Coat of arms", "Crest (heraldry)", "Emblem", "Seal (device)", "Flag",		    "Graphics Interchange Format");
 * 1) Common links that we don't need to chase for redirect or disambiguation testing
 * 1) my $common_links = join "|", @common_links;

my @free_categories;
 * 1) A selection of common free-image categories

Pearle::init("FairuseBot", "", "$homedir/fairusebot.log","$homedir/10cbot-cookies.txt"); Pearle::config(nullOK => 1, printlevel => 4); config(username => "FairuseBot");

my %notifications = loadNotificationList("$homedir/nfcc10c.note"); my %dont_notify = loadNotificationList("$homedir/nfcc10c.whitelist");

if(!Pearle::login) {	exit; }

@free_categories = Pearle::getSubcategories("Category:Free images"); print "Free-license categories: ", join(",", @free_categories), "\n";
 * 1) Fetch a list of common free licenses

my @images; my $imagenum; if($test) {	@images = ('Image:Dummy315.png'); } else {	open IMAGECOUNTFILE, "<", "$homedir/imagecount.txt"; $imagenum = ; chomp $imagenum; close IMAGECOUNTFILE; open IMAGEFILE, "<:utf8", "$homedir/imagelist.txt"; foreach(1 .. $imagenum) {		my $dummy = ; } }
 * 1) Fetch the list of all non-free images

sub get_next_image {	if($test) {		return shift @images; }	else {		my $image = ; chomp $image; return $image; } }

IMAGE: while(my $image = get_next_image) {	$imagenum += 1; Pearle::myLog(2, "Processing image $image ($imagenum)\n"); # Fetch the image data my $image_data; if($test) {		$image_data = Pearle::APIQuery(titles => [$image], prop => ['links', 'revisions', 'imageinfo', 'categories'], 		                                 plnamespace => [0, 2],  							# Links		                                  rvprop => ['content'],							# Article body		                                  iiprop => ['user', 'comment', 'sha1'], iilimit => 500,			# Upload history		                                  meta => 'userinfo', uiprop => ['hasmsg'], 					# Check for talkpage messages		                                  list => 'imageusage', iutitle => $image, iunamespace => [0, 2], iulimit => 500);	# Image usage }	else {		$image_data = Pearle::APIQuery(titles => [$image], prop => ['links', 'revisions', 'imageinfo', 'categories'], 		                                 plnamespace => [0],	  							# Links		                                  rvprop => ['content'],							# Article body		                                  iiprop => ['user', 'comment', 'sha1'], iilimit => 500,			# Upload history		                                  meta => 'userinfo', uiprop => ['hasmsg'], 					# Check for talkpage messages		                                  list => 'imageusage', iutitle => $image, iunamespace => [0], iulimit => 500);	# Image usage }	if(!defined($image_data)) {		Pearle::myLog(1, "Server did not return an appropriate response on initial query. Deferring image for later.\n"); open DEFERLOG, ">>:utf8", "$homedir/deferred_images.txt"; print DEFERLOG "$image\n"; close DEFERLOG; next; }	# Extract the list of pages where it's used. my @pages = GetPageList($image_data); my $num_pages = scalar(@pages); my @failed_pages; # Extract a list of pages this image links to. my @links = GetPageLinks($image_data); # Filter out common links my @new_links; foreach my $link (@links) {		if(grep {$_ eq $link} @common_links) {			# Do nothing }		else {			Pearle::myLog(4, "Found valid link $link\n"); push @new_links, $link; }	}	@links = @new_links; # Collect link statistics foreach my $page (@links) {		$common_pages{$page}++; if($common_pages{$page} == 20) {			Pearle::myLog(2, "*Adding common page $page to list\n"); push @common_links, $page; }	}	# Extract the body text. my $text = GetPageText($image_data); # Extract the categories my @categories = GetPageCategories($image_data); # Check for interruptions if($permit_interruptions and DoIHaveMessages($image_data)) {		Pearle::myLog(0, "Talkpage message found; exiting on image $image.\n"); last; }	# Sanity check: Does the image still exist? - tested if($image_data =~ /missing=""/) {		Pearle::myLog(2, "*Image $image has been deleted.\n"); next; }	# Sanity check: Is this still tagged as non-free? - tested if(!grep {$_ eq 'Category:All non-free media'} @categories) {		Pearle::myLog(2, "*Image $image is no longer marked as non-free.\n"); next; }	# Sanity check: Is this image already disputed? if((grep {$_ eq 'Category:All disputed non-free images'} @categories) or	  (grep {$_ eq 'Category:All images with no fair use rationale'} @categories)) {		Pearle::myLog(2, "*Image $image is already marked for deletion.\n"); next; }	# Sanity check: Is the image used? if(!@pages) {		# Orphaned fairuse image Pearle::myLog(2, "*Image $image is not used anywhere\n"); if(!grep {$_ eq 'Category:All orphaned fairuse images'} @categories) {			my $text = "\n\n"; wikilog($image, $text, "Non-free image is not used in any article\n"); my $uploader = GetImageUploader($image_data); if(defined($uploader)) {				if(!IsNotified($uploader, MakeWikiRegex($image), $image, \%notifications, \%dont_notify)) {					$text = "\n$image\n"; wikilog("User talk:$uploader", $text, "Image $image is not used in any article"); }			}		}		next; }	# Check: Is the image double-tagged as both free and non-free? if(Array::Utils::intersect( @categories, @free_categories)) {		Pearle::myLog(2, "Image $image is double-tagged as free and non-free\n"); botwarnlog("*Image $image is double-tagged as free and non-free\n"); next; }	# Check for best-case compliance: each use has a matching direct link in the body of the text - tested Pearle::myLog(4, "Image is used in " . scalar(@pages) . " pages.\n"); Pearle::myLog(4, "Image is used on " . join("|", @pages) . "\n"); Pearle::myLog(4, "Image links to " . join("|", @links) . "\n"); foreach my $page (@links)	# Filter out pages that match a link {		@pages = grep {$_ ne $page} @pages; }	Pearle::myLog(2, "Image failed best-case test for " . scalar(@pages) . " pages.\n"); next if !@pages; # Check for liberal compliance: # For each use, remove it from the list if there's a case-insensitive match in the body text - tested foreach my $page (@pages) {		my $page_match_regex = MakeWikiRegex($page); push @failed_pages, $page unless($text =~ /$page_match_regex/i); }	@pages = @failed_pages; @failed_pages = ; Pearle::myLog(2, "Image failed text test for " . scalar(@pages) . " pages.\n"); next if !@pages; # Chase redirects and disambig pages foreach my $link (@links)					# Foreach link {		my @expanded_links = ChaseLinks($link);			# Find out where we can get through it		if(!@expanded_links) {			# @expanded_links will contain at least one link. # If it doesn't, an error occurred. open DEFERLOG, ">>:utf8", "$homedir/deferred_images.txt"; print DEFERLOG "$image\n"; close DEFERLOG; next IMAGE; }		foreach my $page (@pages)				# Foreach page {			if(grep {$page eq $_} @expanded_links)		# If we can get there by chasing this link {				UpdateLink($image, $link, $page);	# Update the link to point to the page Pearle::limit; }			else {				push @failed_pages, $page;		# Otherwise, keep looking for a way to get there }		}							# Note that because we don't break out of the loop, it will take two @pages = @failed_pages;					# passes for the bot to do the right thing in the unlikely case that @failed_pages = ;					# we can get to two pages by expanding one link. last if !@pages;				# If we've found all the pages, exit the loop. }	Pearle::myLog(2, "Image failed link-chasing test for " . scalar(@pages) . " pages.\n"); next if !@pages; # Test for compliance # Over-use (some compliant, some non-compliant): Remove from any non-compliant articles, OrphanBot-style. Leave a note on the article talk page. if(@pages && $num_pages > scalar(@pages)) {		Pearle::myLog(2, "Image $image failed on " . scalar(@pages) . " pages.\n"); my ($cur_y, $cur_m, $cur_d) = Today(1);		# Today in GMT. Generated each time because this bot does *long* runs. open IMAGEFAILLOG, ">>:utf8", "$homedir/partial_failures_${cur_y}-${cur_m}-${cur_d}.txt"; print IMAGEFAILLOG "$image\n"; close IMAGEFAILLOG; $cur_m = Month_to_Text($cur_m); my $text = "\n\n"; wikilog($image, $text, "Image is not compliant with the non-free content rules"); foreach my $page (@pages) {			if(!IsPageNotified("Talk:$page", MakeWikiRegex($image), $image, \%notifications, \%dont_notify)) {				NotifyPage("Talk:$page", $image); $notifications{"Talk:$page,$image"} = 1; }		}		$images_marked++; Pearle::limit(10); }	# Non-compliance (all uses are non-compliant): Mark for deletion, notify the uploader. elsif(@pages && $num_pages == scalar(@pages)) {		Pearle::myLog(2, "Image $image failed on all pages.\n"); my ($cur_y, $cur_m, $cur_d) = Today(1);		# Today in GMT. Generated each time because this bot does *long* runs. $cur_m = Month_to_Text($cur_m); $text = "\n\n"; wikilog($image, $text, "Image is not compliant with the non-free content rules"); my $uploader = GetImageUploader($image_data); if(defined($uploader)) {			if(!IsNotified($uploader, MakeWikiRegex($image), $image, \%notifications, \%dont_notify)) {				NotifyUser($uploader, $image); $notifications{"$uploader,$image"} = 1; }		}		$images_marked++; Pearle::limit(10); }	# No problems else {		Pearle::myLog(2, "Image $image is okay.\n"); }	if($images_marked >= 1000) {		Pearle::myLog(0, "Edit limit reached; exiting\n"); last; }	if(!$test) {		open IMAGENUMFILE, ">", "$homedir/imagecount.txt"; print IMAGENUMFILE $imagenum; close IMAGENUMFILE; Pearle::myLog(4, "Saving notification list\n"); saveNotificationList("$homedir/nfcc10c.note", %notifications); } }
 * 1) Process the list

$imagenum++; if(!$test) {	open IMAGENUMFILE, ">", "$homedir/imagecount.txt"; print IMAGENUMFILE $imagenum; close IMAGENUMFILE;

Pearle::myLog(4, "Saving notification list\n"); saveNotificationList("$homedir/nfcc10c.note", %notifications); }

sub NotifyUser {	my $uploader = shift; my $image = shift; my $uploader_page = "User talk:$uploader"; my $wikipage = Pearle::getPage( $uploader_page ); my $text = $wikipage->getEditableText; $text = $wikipage->unfoldComments($text); if($text =~ /^#redirect/i) {		botwarnlog("*User talk page User talk:$uploader is a redirect\n"); return; }	my $summary = "Image $image is not compliant with the non-free content rules"; if($text =~ //) {		Pearle::myLog(4, "Adding notification to list\n"); # Add an image to the list $text =~ s//${1}\n*$image/; }	elsif($text =~ //) {		Pearle::myLog(4, "Adding notification list\n"); # Add an "additional images" list $text =~ s//\n*$image/; }	else {		Pearle::myLog(4, "Adding notification\n"); $text .= "\n$image --\n"; }	$wikipage->setEditableText($text); Pearle::postPage($wikipage, $summary, 0); }
 * 1) Support functions #############################################

sub NotifyPage {	my $uploader = shift; my $image = shift; my $uploader_page = "$uploader"; my $wikipage = Pearle::getPage( $uploader_page ); my $text = $wikipage->getEditableText; $text = $wikipage->unfoldComments($text); if($text =~ /^#redirect/i) {		botwarnlog("*Talk page $uploader_page is a redirect\n"); return; }	my $summary = "Image $image in this article is not compliant with the non-free content rules"; if($text =~ //) {		Pearle::myLog(4, "Adding notification to list\n"); # Add an image to the list $text =~ s//${1}\n*$image/; }	elsif($text =~ //) {		Pearle::myLog(4, "Adding notification list\n"); # Add an "additional images" list $text =~ s//\n*     $image/; }	else {		Pearle::myLog(4, "Adding notification\n"); $text .= "\n$image --\n"; }	$wikipage->setEditableText($text); Pearle::postPage($wikipage, $summary, 0); }

sub ChaseLinks {	my $link = shift; my @candidates = ($link); my @destinations; my $iterations = 0; my $done = 0; while(!$done) {		my @new_candidates = ; my $page_data = Pearle::APIQuery(titles => \@candidates, prop => ['links', 'templates'], 		                                 redirects => 1,		                                  tlnamespace => [10],				# Templates		                                  plnamespace => [0, 2]);			# Links if(!defined($page_data)) {			Pearle::myLog(1, "Server did not return an appropriate response on chase query. Deferring image for later.\n"); return ; }		my $parsed_xml = Pearle::getXMLParser->XMLin($page_data, ForceArray => ['r', 'page', 'pl', 'tl'] ); Pearle::myLog(4, Dumper($parsed_xml)); # Push all redirect targets to @destinations, because we've already got disambig data, and we don't want to chase redirect loops. my $redirects = $parsed_xml->{query}->{redirects}->{r}; push @destinations, map {$_->{to}} @{$redirects}; # Push all disambig targets to @destinations and to @new_candidates, because we want to do redirect testing on them. my $pages = $parsed_xml->{query}->{pages}->{page}; foreach my $page (@{$pages}) {			if(grep {$_->{title} eq 'Template:Disambig'} @{$page->{templates}->{tl}}) {				# Disambiguation page. For each target, if we don't have it already, push it to @destinations and @new_candidates. foreach my $target (@{$page->{links}->{pl}}) {					if(grep {$_ eq $target->{title}} @destinations) {						# Do nothing }					else {						push @destinations, $target->{title}; push @new_candidates, $target->{title}; }				}			}			else {				# Ordinary page. push @destinations, $page->{title} unless(grep {$_ eq $page->{title}} @destinations); }		}		if(!@new_candidates) {			$done = 1; }		if($iterations > 10) {			Pearle::myLog(1, "Iteration limit exceeded\n"); $done = 1; }		$iterations++; @candidates = @new_candidates; }	Pearle::myLog(4, join(",", @destinations) . "\n"); return @destinations; }