User:AnomieBOT/source/tasks/ReplaceExternalLinks4.pm

does not contain a  or   parameter containing the target URL. Please fix manually.");       }        return ($ret)     };    for my $domain (@domains) {        for my $suffix (@suffixes) {            $replacements{$domain.$suffix}=$repl;        }    } }



my $chars='[^][<>"\x00-\x20\x7F\p{Zs}]';

sub new { my $class=shift; my $self=$class->SUPER::new; $self->{'proto'}=undef; $self->{'iter'}=undef;

my %remap=; my @re=; while(my ($k,$v)=each %replacements){ my $re=quotemeta($k); $re=~s!\\/!/!g; $re=~s/\\\*/$chars*/g; $re=~s!^(.*?)($|/)!(?i:$1)$2!; push @re, $re; $remap{$k}=qr!//$re!; }   $self->{'remap'}=\%remap; my $re='//(?:'.join('|', @re).')'.$chars.'*'; $self->{'re'}=qr/$re/;

bless $self, $class; return $self; }

=pod

=for info Approved 2011-11-02. Bots/Requests for approval/AnomieBOT 58

=cut

sub approved { return -1; }

sub run { my ($self, $api)=@_; my $res;

$api->task('ReplaceExternalLinks4', 0, 10, qw/d::Templates d::Talk/);

my $screwup='Errors? User:'.$api->user.'/shutoff/ReplaceExternalLinks4';

# Spend a max of 5 minutes on this task before restarting my $endtime=time+300;

my $re=$self->{'re'}; my %remap=%{$self->{'remap'}}; my $fix=0; my $page;

my $checkExtLink=sub { my ($fmt,$url,$txt)=@_; my $prefix;

if($fmt==2){ # Duplicate Mediawiki post-processing of bare external links $txt=$1.$txt if $url=~s/((?:[<>]|&[lg]t;).*$)//; my $sep=',;\.:!?'; $sep.=')' unless $url=~/\(/; $txt=$1.$txt if $url=~s/([$sep]+$)//;

# There shouldn't be a template inside the url $txt=$1.$txt if $url=~s/(\{\{.*$)//;

$prefix=qr/https?:/; } else { $prefix=qr/(?:https?:)?/; }       return $url.$txt unless $url=~/^$prefix$re$/;

keys %remap; while(my ($k,$r)=each %remap){ next unless $url=~/^$prefix$r/; my ($ret,$log,$errs,$errb)=$replacements{$k}($url); if(defined($ret)){ $fix++; $ret=~s/([][<>"\x00-\x20\x7F\p{Zs}])/ uri_escape_utf8($1,'\x00-\xff') /ge;               return $ret.$txt;            }            $api->warn("$log in $page") if defined($log);            $api->whine("$errs in $page", $errb, Pagename=>'User:AnomieBOT/ReplaceExternalLinks4 problems', NoSmallPrint=>1) if(defined($errs) && defined($errb));        }        return $url.$txt;    };

my $fixLinks=sub { my $txt=shift; my $nowiki;

# Hide bits we shouldn't process ($txt,$nowiki)=$api->strip_nowiki($txt); ($txt,$nowiki)=$api->strip_templates($txt, sub { return 1; }, {}, $nowiki);

# Hide XLinkBot notices if($page=~/^User talk:/){ ($txt,$nowiki)=$api->strip_regex(qr/[^\n]*\[\[User:XLinkBot(?:\||\]\])[^\n]*/, $txt, $nowiki); }

# First, fix any bracketed external link $txt=~s{\[((?:https?:)?$re)( *[^\]\x00-\x08\x0a-\x1F]*?)\]}{ '['.($checkExtLink->(1,$1,$2)).']' }ge;

# Now hide the bracketed external links ($txt,$nowiki)=$api->strip_regex(qr{\[(?:https?:)?//[^][<>\x22\x00-\x20\x7F]+ *[^\]\x00-\x08\x0a-\x1F]*?\]}, $txt, $nowiki);

# Fix any bare external links $txt=~s{\b(https?:$re)}{ $checkExtLink->(2,$1,'') }ge;

# Unstrip $txt=$api->replace_stripped($txt,$nowiki);

return $txt; };

$self->{'proto'}=['http','https'] unless @{$self->{'proto'}//[]}; while(@{$self->{'proto'}}){ if(!defined($self->{'iter'})){ $self->{'iter'}=$api->iterator(               generator   => 'exturlusage',                geuprotocol => shift @{$self->{'proto'}},                geuquery    => [ keys %replacements ],                geulimit    => '1000', # exturlusage has issues with big lists            ); }       while(my $pg=$self->{'iter'}->next){ if(!$pg->{'_ok_'}){ $api->warn("Failed to retrieve page list for ".$self->{'iter'}->iterval.": ".$pg->{'error'}."\n"); return 60; }

return 0 if $api->halting; $page=$pg->{'title'}; my $tok=$api->edittoken($page, EditRedir => 1); if($tok->{'code'} eq 'shutoff'){ $api->warn("Task disabled: ".$tok->{'content'}."\n"); return 300; }           if($tok->{'code'} eq 'pageprotected'){ $api->whine("$page is protected", "Please fix manually.", Pagename=>'User:AnomieBOT/ReplaceExternalLinks4 problems', NoSmallPrint=>1); next; }           if($tok->{'code'} eq 'botexcluded'){ $api->whine("Bot excluded from $page", " ".$tok->{'error'}." . Please fix manually or adjust the exclusion.", Pagename=>'User:AnomieBOT/ReplaceExternalLinks4 problems', NoSmallPrint=>1); next; }           if($tok->{'code'} ne 'success'){ $api->warn("Failed to get edit token for $page: ".$tok->{'error'}."\n"); next; }           if(exists($tok->{'missing'})){ $api->warn("WTF? $page does not exist?\n"); next; }

my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'}; $fix=0;

# First, process links in templates my $outtxt=$api->process_templates($intxt, sub {               shift; #$name                my $params=shift;                shift; #$wikitext                shift; #$data                my $oname=shift;

my $ret=""; return $ret; });

# Now clean up the rest of the page. $outtxt=$fixLinks->($outtxt);

if($outtxt ne $intxt){ my @summary=; push @summary, "bypassing $fix redirection URL".($fix==1?'':'s') if $fix; unless(@summary){ $api->warn("Changes made with no summary for $page, not editing"); next; }               $summary[$#summary]='and '.$summary[$#summary] if @summary>1; my $summary=ucfirst(join((@summary>2)?', ':' ', @summary)); $api->log("$summary in $page"); my $r=$api->edit($tok, $outtxt, "$summary. $screwup", 1, 1); if(lc($r->{'code'}) eq 'failure' && exists($r->{'edit'}{'spamblacklist'})){ my $bl=$r->{'edit'}{'spamblacklist'}; $api->log("Write failed on $page: Blacklisted link $bl"); $api->warn("Write failed on $page: Blacklisted link $bl\n"); $api->whine("Redirect to blacklisted URL in $page", "MediaWiki's spam blacklist complained about $bl . Note there may be more than one blacklisted link in the page. Please fix manually.", Pagename=>'User:AnomieBOT/ReplaceExternalLinks4 problems', NoSmallPrint=>1); next; }               if($r->{'code'} ne 'success'){ $api->warn("Write failed on $page: ".$r->{'error'}."\n"); next; }           }

# If we've been at it long enough, let another task have a go. return 0 if time>=$endtime; }       $self->{'iter'}=undef; }

$api->log("May be DONE!"); return 3600; }

1;