User:AnomieBOT/source/tasks/OrphanReferenceFixer.pm

for(my $i=0; $i<@matches; $i+=4){ $text=~s/\Q$matches[$i]\E//g;
 * Strip parameters other than  and   from and  |$)))!oigs);

# Last ref in the page broken? if(defined($matches[$i+3]) && $matches[$i+3] eq ''){ $$b0rken='Last ':'/>');            push @$replacements, {                'orig' => $api->replace_nowiki($old,$nowiki),                'repl' => $api->replace_nowiki($matches[$i],$nowiki)            };        }

$matches[$i+1]=~s/\s+$//g;

# Group? my ($gg,$g); if($matches[$i+1]=~/(\s+group\s*=\s*"([^\x22<]*)")/oi ||          $matches[$i+1]=~/(\s+group\s*=\s*'([^\x27<]*)')/oi ||           $matches[$i+1]=~/(\s+group\s*=\s*([^\x09\x0a\x0c\x0d\x20]+))/oi){ $gg=$1; $g=$2; } else { $gg=; $g=; }

# Ok, parse the list-defined refs if(defined($matches[$i+2])){ %refs=_get_refs2($self,$api,$api->replace_stripped($matches[$i+2],$nowiki),$replacements,$b0rken,$g,'references',%refs); }   }    $text=$api->replace_nowiki($text,$nowiki);

# Next, do reflist and #tag:references $api->process_templates($text, sub {       my $name=shift;        my @params=@{shift};        my $orig=shift;

my ($c, $g, $type);

my $prop = $reflist{$name} // $reflist{"Template:$name"} // undef; if($prop){ ($type=$name)=~s/^Template://; my $groupre=$prop->{'groupre'}; my @refparams=@{$prop->{'refs'}}; $g=$prop->{'group'}; $c=''; foreach my $p ($api->process_paramlist(@params)){ if(grep { $p->{'name'} eq $_ } @refparams) { $c=$p->{'value'}; } elsif($p->{'name'} eq 'group' && $p->{'value'}=~/^\s*([\x22\x27]?)($groupre)\1\s*$/oi){ $g=$2; }           }        } elsif($name=~/^#tag:\s*references$/is){ $type=$name; $c=shift(@params) // ''; my $bad=0; foreach (@params){ if(/^\s*group\s*=\s*([\x22\x27]?)([^\x22\x27]*?)\1\s*$/oi){ $g=$2; } else { $bad=1; }           }

# If it had unrecognized parameters to the tag, strip them if($bad){ my $old=$orig; $orig="\x7b\x7b#tag:references|$c"; $orig.="|group=$g" if $g ne ''; $orig.="\x7d\x7d"; push @$replacements, { 'orig' => $old, 'repl' => $orig };           }        } else { return undef; }

# Ok, parse the list-defined refs %refs=_get_refs2($self,$api,$c,$replacements,$b0rken,$g,$type,%refs);

return ''; });

# And finally, parse the page text. return _get_refs2($self,$api,$text,$replacements,$b0rken,,,%refs); }

sub _get_refs2 { my $self=shift; my $api=shift; my ($text,$nowiki)=$api->strip_nowiki(shift); my $replacements=shift; my $b0rken=shift; my $defaultgroup=shift; my $listdefined=shift; my %refs=@_;

# Fix whitespace in default group $defaultgroup =~ s/[\t\r\n ]+/ /g; $defaultgroup =~ s/^\s+|\s+$//g;

# Find all ref tags my @matches=($text=~m!(]*[^/>])?)(?:/>|>(.*?)(|$)))!oigs); for(my $i=0; $i<@matches; $i+=4){ # Last ref in the page broken? if(defined($matches[$i+3]) && $matches[$i+3] eq ''){ $$b0rken='Last ':'/>');           push @$replacements, {                'orig' => $api->replace_nowiki($old,$nowiki),                'repl' => $api->replace_nowiki($matches[$i],$nowiki)            };        }

$matches[$i+1]=~s/\s+$//g;

# Fix obviously incorrect ref bodies. if(defined($matches[$i+2]) &&          ($matches[$i+2]=~/^\s*$/ || $matches[$i+2] eq 'Insert footnote text here')){ my $old=$matches[$i]; $matches[$i+2]=undef; $matches[$i]=''; push @$replacements, { 'orig' => $api->replace_nowiki($old,$nowiki), 'repl' => $api->replace_nowiki($matches[$i],$nowiki) };       }

# Extract params my ($gg, $g) = ('', $defaultgroup); my ($nn, $n) = (, ); my $params = ''; my @m = $matches[$i+1] =~ /$attrRe/g; for(my $j=0; $j<@m; $j+=5){ my $a = lc( $m[$j+1] ); if ( $a eq 'group' || $a eq 'name' || $a eq 'dir' || $a eq 'follow' ) { $m[$j] .= $m[$j+2] if( ($m[$j+2]//) ne ($m[$j+4]//) ); $params .= $m[$j]; ($gg, $g) = ($m[$j], $m[$j+3]//'') if $a eq 'group'; ($nn, $n) = ($m[$j], $m[$j+3]//'') if $a eq 'name'; }       }

# If it's unnamed and empty, remove it completely. if($nn eq '' && !defined($matches[$i+2])){ push @$replacements, { 'orig' => $api->replace_nowiki($matches[$i],$nowiki), 'repl' => '' };           next; }

# Unknown parameters cause errors, so replace them if found. if($matches[$i+1] ne $params) { my $old=$matches[$i]; $matches[$i+1]=$params; $matches[$i]=''.$matches[$i+2].' ':'/>'); push @$replacements, { 'orig' => $api->replace_nowiki($old,$nowiki), 'repl' => $api->replace_nowiki($matches[$i],$nowiki) };       }

# We're not interested if it's unnamed. if($nn eq ''){ $$b0rken='Ref contains ':'/>');           push @$replacements, {                'orig' => $api->replace_nowiki($old,$nowiki),                'repl' => $api->replace_nowiki($matches[$i],$nowiki)            };        }        my $n2 = $n; $n=~s/[\t\r\n ]+/ /g; $n=~s/^\s+|\s+$//g;        if ( $n ne $n2 ) {            my $old=$matches[$i];            $matches[$i+1]=~s/name\s*=\s*([\x22\x27]?)\Q$n2\E\1/name=$1$n$1/i;            $matches[$i]=''.$matches[$i+2].' ':'/>');            push @$replacements, {                'orig' => $api->replace_nowiki($old,$nowiki),                'repl' => $api->replace_nowiki($matches[$i],$nowiki)            };        }

# Integer names cause errors, so replace them if found. if($n=~/^\d+$/){ my $x="renamed_from_".$n."_on_".strftime('%Y%m%d%H%M%S', gmtime); next if index($text, $x)>=0; my $old=$matches[$i]; $matches[$i+1]=~s/name\s*=\s*([\x22\x27]?)$n\1/name=$1$x$1/i; $matches[$i]=''.$matches[$i+2].' ':'/>'); push @$replacements, { 'orig' => $api->replace_nowiki($old,$nowiki), 'repl' => $api->replace_nowiki($matches[$i],$nowiki) };           $n=$x; }

# Decode HTML entities, as MediaWiki does for # somewhere (and then that's probably how it got "orphaned"). To be           # safe, don't use it. $matches[$i+2]=undef; $refs{$g}{$n}{'broken'}=1; $$b0rken='Ref contains # somewhere (and then that's probably how it got "orphaned"). To be           # safe, don't use it. $matches[$i+2]=undef; $refs{$g}{$n}{'broken'}=1; $$b0rken='Ref contains =='; }       if($refs{$g}{$n}{'type'} eq '' && defined($matches[$i+2])){ $refs{$g}{$n}{'type'}='ref'; $refs{$g}{$n}{'repl'}=$api->replace_nowiki($matches[$i+0],$nowiki); $refs{$g}{$n}{'content'}=$api->replace_nowiki($matches[$i+2],$nowiki); }   }

# Darn. Now we have to parse through the page and find all the #tag:refs # and / too. $api->process_templates($text, sub {       my $name=shift;        my @params=@{shift};        my $orig=$api->replace_nowiki(shift,$nowiki);        shift;        my $oname=shift;

my ($type, $groupre, @contentparams); my $g=$defaultgroup; my $c; if($name=~/^#tag:\s*ref$/is){ $oname='#tag:ref'; $type='tag'; $groupre=qr/[^\x22\x27]*/; @contentparams=; $c=$api->replace_nowiki(shift(@params),$nowiki); } elsif(exists($reftpl{$name}) || exists($reftpl{"Template:$name"})){ $type='tpl'; my $props=$reftpl{$name} // $reftpl{"Template:$name"}; $g=$props->{'group'} if $props->{'group'} ne ''; $groupre=$props->{'groupre'}; @contentparams=@{$props->{'content'}}; } else { return undef; }

my $n=undef; my @bad=; foreach my $p ($api->process_paramlist(@params)){ # Whitespace and quotes will be stripped from name and group by #tag, and all the templates use #tag at some level too. my $v = $p->{'value'}; $v=~s/^\s*([\x22\x27]?)(.*?)\1\s*$/$2/; if($p->{'name'} eq 'group'){ $g=$api->replace_nowiki($v,$nowiki) if $v =~ /$groupre/; } elsif($p->{'name'} eq 'name') { $n=$api->replace_nowiki($v,$nowiki); } elsif(grep { $p->{'name'} eq $_ } @contentparams) { $c=$api->replace_nowiki($p->{'value'},$nowiki); } else { push @bad, $p->{'text'}; }       }

# If it's a template, no content, and one "bad" param that contains an `=`, let's guess it's       # a case where they should have used an explicit param name and didn't.        if(!defined($c) && $type ne 'tag' && @bad == 1 && $bad[0]=~/=/ && @contentparams){ $c = pop @bad; my ($cp) = @contentparams; my $old=$orig; $orig="\x7b\x7b$oname"; for my $p (@params) { $p = "$cp=". $api->replace_nowiki($p,$nowiki) if $p eq $c; $orig.='|'. $api->replace_nowiki($p,$nowiki); }           $orig.="\x7d\x7d"; push @$replacements, { 'orig' => $old, 'repl' => $orig };           $c=$api->replace_nowiki($c,$nowiki); }       $c='' if !defined($c);

# We're not interested if it's unnamed. But strip it out if       # it's unnamed and empty, because that's an error. if(!defined($n)){ if($c eq ''){ push @$replacements, { 'orig' => $orig, 'repl' => '' };           }            return undef; }

# If it had unrecognized parameters to the tag, strip them if(@bad && $type eq 'tag'){ my $old=$orig; $orig="\x7b\x7b$oname"; $orig.="|$c"; $orig.="|name=$n" if defined($n); $orig.="|group=$g" if $g ne $defaultgroup; $orig.="\x7d\x7d"; push @$replacements, { 'orig' => $old, 'repl' => $orig };       }

# Integer names cause errors, so replace them if found. if($n=~/^\d+$/){ my $x="renamed_from_".$n."_on_".strftime('%Y%m%d%H%M%S', gmtime); next if index($text, $x)>=0; my $old=$orig; $orig="\x7b\x7b$oname"; if($type eq 'tag'){ $orig.="|$c"; $orig.="|name=$x"; $orig.="|group=$g" if $g ne $defaultgroup; } else { foreach my $p (@params){ $p =~ s/^(\s*name\s*=\s*).*?(\s*)$/$1$x$2/; $orig.='|'. $api->replace_nowiki($p,$nowiki); }           }            $orig.="\x7d\x7d"; $n=$x; push @$replacements, { 'orig' => $old, 'repl' => $orig };       }

# Save detected reference $refs{$g}={} unless exists($refs{$g}); if(!exists($refs{$g}{$n})){ $refs{$g}{$n}={ orig => [], type => '', content => undef, listdefined => $listdefined };       }        if($c=~/^\s*$/){ # Apparently, some people really do this. Don't use empty refs. $c=''; }       push @{$refs{$g}{$n}{'orig'}}, $orig; if($refs{$g}{$n}{'type'} eq  && $c ne ){ $refs{$g}{$n}{'type'}=$type; $refs{$g}{$n}{'repl'}=$orig; $refs{$g}{$n}{'content'}=$c; }

return undef; });

return %refs; }

sub _strip_templates { my ($name, $params, $wikitext, $data) = @_; return undef if $name=~/^#tag:\s*(ref|references)$/is; return undef if exists($alltpl{$name});
 * 1) process_templates callback to strip templates and store them in the fourth
 * 2) parameter hash

# Template in skip list? return undef if exists($no_move_refs_out{$name}); return undef if exists($no_move_refs_out{"Template:$name"});

return 1; }

my $months=qr/(?:January|February|March|April|May|June|July|August|September|October|November|December)/i; my $sp=qr/(?:(?:\s| )+)/; my $dt=qr/(?:(?:\d{1,2}$sp$months|\[\[\d{1,2}[ _]$months\]\]|$months$sp\d{1,2}|\[\[$months[ _]\d{1,2}\]\])$sp?,?$sp?(?:\d{1,4}(?:${sp}BC)?|\[\[\d{1,4}(?:[ _]BC)?\]\])|-?\d{4}-\d{2}-\d{2}|\[\[-?\d{4}-\d{2}-\d{2}\]\]|\[\[-?\d{4}\]\]-\[\[\d{2}-\d{2}\]\])/i;
 * 1) Regexes used below

sub _check_linked_pages { my ($api,$self,$pages,$type,$pageid,$title,$refs,$needed,$found,$replacements,$log)=@_; my %found_in_links=; my %dup_in_links=;
 * 1) Check all the pages in the specified query for needed refs

# Resolve any redirects in the list my %r=$api->resolve_redirects(@$pages); if(exists($r{''})){ $api->warn("Failed to resolve redirects in $type links for $title: ".$r{''}{'error'}."\n"); return -1; }   delete $r{$title};

# Get revids for the top revision in all pages, and also get a list of   # recently-edited pages in the list my %revisions=; my @pages=; my $iter=$api->iterator(       titles => bunchlist(500, keys %r),        prop   => 'revisions',        rvprop => 'ids|timestamp',    ); while(my $r=$iter->next){ if(!$r->{'_ok_'}){ $api->warn("Failed to retrieve revids for $type links for $title: ".$r->{'error'}."\n"); return -1; }       next unless exists($r->{'revisions'}[0]{'revid'}); $revisions{$r->{'revisions'}[0]{'revid'}}=1; push @pages, $r->{'title'} if ISO2timestamp($r->{'revisions'}[0]{'timestamp'})>time-86400; }

return -1 if $api->halting;

# Now get the revids for the past 24 hours for all the recently-edited pages for my $p (@pages){ my %rq=(           titles  => $p,            prop    => 'revisions',            rvprop  => 'ids|timestamp',            rvlimit => '100',        ); do { my $res=$api->query(%rq); if($res->{'code'} ne 'success'){ $api->warn("Failed to retrieve older revids for $type links for $p (for $title): ".$res->{'error'}."\n"); return -1; }           if(exists($res->{'query-continue'})){ $rq{'rvcontinue'}=$res->{'query-continue'}{'revisions'}{'rvcontinue'}; } else { delete($rq{'rvcontinue'}); }           foreach my $r (@{(values %{$res->{'query'}{'pages'}})[0]{'revisions'}}){ $revisions{$r->{'revid'}}=1; if(ISO2timestamp($r->{'timestamp'})iterator(       revids => bunchlist(50, keys %revisions),        prop   => 'revisions',        rvprop => 'content|timestamp',        rvslots => 'main',    ); while(my $r=$iter->next){ return -1 if $api->halting; if(!$r->{'_ok_'}){ $api->warn("Failed to retrieve $type revisions for $title: ".$r->{'error'}."\n"); return -1; }       foreach my $rev (@{$r->{'revisions'}//[]}){ next unless exists($rev->{'slots'}{'main'}{'*'});

my $ts=ISO2timestamp($rev->{'timestamp'});

# Get refs from this linked page, and see if any of           # them are the ones we need. my %rrefs=$self->_get_refs($api, $rev->{'slots'}{'main'}{'*'});

foreach (keys %$needed){ my ($g,$n)=split />/, $_, 2; next if !exists($rrefs{$g}{$n}); next if $rrefs{$g}{$n}{'type'} eq '';

next if _is_generic_ref_name($n,$g);

$found_in_links{$_}={} if !exists($found_in_links{$_});

# Did we find a duplicate of a ref already in our target article? my ($dup,$dupref)=_check_dups($g,$rrefs{$g}{$n},$refs->{$g}); $dup_in_links{$_}=[0,$dup,$dupref,$r->{'title'}] if defined($dup);

# Keep only the most recent version from each article my $have_newer=0; while(my ($k,$v)=each %{$found_in_links{$_}}){ if($v->[4]==$r->{'pageid'}){ if($ts>$v->[5]){ delete $found_in_links{$_}{$k} if $ts>$v->[5]; } else { $have_newer=1; }                   }                }                next if $have_newer;

my $content=$rrefs{$g}{$n}{'content'};

# To help minimize false dups, strip whitespace, manipulate # dashes, and remove accessdate parameters from the key, and # sort named template params. my $k=$content; $k=~s/Retrieved (?:on )?$dt/Retrieved xxx/ig; $k=$api->process_templates($k, sub {                   my $name=shift;                    my @params=@{shift};                    return undef unless @params;                    my %p=;                    my $i=1;                    foreach (@params){                        s/\s+//g;                        next if(/^access(date|monthday|daymonth|year)=/);                        if(/^([^=]+)=/){                            $p{$1}=$_;                        } else {                            $p{$i}="$i=$_";                            $i++;                        }                    }                    return "";                }); $k=~s/[\x{2013}\x{2014}]|&([mn]dash|#0*821[12]|#x0*201[34]);/-/g; $k=~s/\x{2212}|&(minus|#0*8722|#x0*2212);/-/g; $k=~s/\s+//g;

$found_in_links{$_}{$k}=[0,$rrefs{$g}{$n}{'repl'},$r->{'title'},$content,$r->{'pageid'},$ts]; }       }    }    my @talkpost=; $api->store->{"p$pageid"}={} unless exists($api->store->{"p$pageid"}); my $posted=$api->store->{"p$pageid"}; foreach (keys %found_in_links){ my ($g,$n)=split />/, $_, 2; my @repl=keys %{$found_in_links{$_}}; my ($score,$repl,$from,$content); my $dup=undef; if(exists($dup_in_links{$_})){ ($score,$dup,$repl,$from)=@{$dup_in_links{$_}}; } elsif(@repl>1){ # Crap, we have multiple versions of the named ref. next if exists($posted->{$_}); $posted->{$_}=1; my $x="Reference named \"$n\""; $x.=" in group \"$g\"" if $g ne ''; $x.=":\n"; foreach my $k (@repl){ ($score,$repl,$from,$content)=@{$found_in_links{$_}{$k}}; $x.="From $from: $content\n"; }           $x.="\n"; push @talkpost, $x; next; } else { my $k=$repl[0]; ($score,$repl,$from)=@{$found_in_links{$_}{$k}}; }       if(defined($dup)){ foreach my $need (@{$needed->{$_}}) { push @$replacements, { 'orig' => $need, 'repl' => $repl, };           }            $found->{$_}="\"$n\" → \"$dup\" from $from"; push @$log, "** Renamed \"$n\" → \"$dup\" from $from"; } else { push @$replacements, { 'orig' => $needed->{$_}[0], 'repl' => $repl, };           $found->{$_}="\"$n\" from $from"; push @$log, "** Rescued \"$n\" from $from"; }       delete $needed->{$_}; }

if(@talkpost){ my $ttok=$api->edittoken('Talk:'.$title); if($ttok->{'code'} eq 'shutoff'){ $api->warn("Task disabled: ".$ttok->{'content'}."\n"); return 300; }       if($ttok->{'code'} ne 'success'){ $api->warn("Failed to get edit token for Talk:$title: ".$ttok->{'error'}."\n"); return -2; }       my $txt="I check pages listed in "; $txt.="Category:Pages with incorrect ref formatting to "; $txt.="try to fix reference errors. One of the things I "; $txt.="do is look for content for "; $txt.="orphaned references "; $txt.="in wikilinked articles. I have found content for "; $txt.="some of $title's orphans, the problem is that "; $txt.="I found more than one version. I can't determine "; $txt.="which (if any) is correct for this article, so "; $txt.="I am asking for a sentient editor to look it over "; $txt.="and copy the correct ref content into this article.\n\n"; $txt.=join("\n", @talkpost); $txt.="\nI apologize if any of the above are effectively "; $txt.="identical; I am just a simple computer program, so "; $txt.="I can't determine whether minor differences are "; $txt.="significant or not. Feel free to remove this comment after fixing the refs. \x7e\x7e\x7e\x7e"; my $r=$api->edit($ttok, $txt, "Orphaned references in $title", 0, 0, section => 'new'); if($r->{'code'} ne 'success'){ $api->warn("Write failed on Talk:$title: ".$r->{'error'}."\n"); return -2; }       $self->_log($api, "* Posted on Talk:$title to request assistance"); $api->store->{"p$pageid"}=$posted; }   return 0; }

sub _is_generic_ref_name { my ($n, $g) = @_;

return (       # Skip autogenerated named refs, they're unlikely to be useful        # matches.        $g eq '' && $n=~/^autogenerated\d+$/ ||

# Skip these very generic names, too $g eq '' && $n=~/^e\d+$/i ||

# Thanks, VE       $n=~/^:\d+$/ ); }

sub _log { my $self=shift; my $api=shift; $api->store->{'log'}.=shift."\n"; }

sub _notify_reverter { my $self=shift; my $api=shift; my $user=shift; my $page=shift; my $revid=shift;

next if exists($api->store->{"revert$revid"});

my $template='User:AnomieBOT/OrphanReferenceFixer revert help'; my $chk=$api->query(       titles  => $template,        prop    => 'revisions',        rvprop  => 'user',        rvlimit => 1,    ); if($chk->{'code'} ne 'success'){ $api->warn("Could not check $template: ".$chk->{'error'}."\n"); return -2; }   my $edituser=(values %{$chk->{'query'}{'pages'}})[0]{'revisions'}[0]{'user'}; unless(grep $_ eq $edituser, ('Anomie')){ $api->log("An unauthorized user has edited $template!"); $api->warn("An unauthorized user has edited $template!\n"); $api->whine("An unauthorized user has edited $template", "An unauthorized user has edited $template, so I am refusing to use it until an authorized user confirms it has not been vandalized by making any edit to it. No offense to $edituser, but I don't want to go substing vandalism on innocent people's talk pages."); return 300; }

my $title="Help on reversion"; my $summary="Provide information on correctly fixing reference errors (instead of reverting)"; my $msg=""; my $res=$api->whine($title, $msg, Summary => $summary, Pagename => "User talk:$user", OptOut => 'AnomieBOT-OrphanReferenceFixer', NoSmallPrint => 1, NoSig => 1); if($res->{'code'} eq 'shutoff'){ $api->warn("Task disabled: ".$res->{'content'}."\n"); return 300; }   if($res->{'code'} eq 'botexcluded'){ $self->_log($api, "* Tried to give a revert notice to $user about $revid on $page, but I was excluded: ".$res->{'error'}." "); $api->log("Bot excluded from User talk:$user: ".$res->{'error'}); $api->store->{"revert$revid"}=1; return 0; }   if($res->{'code'} ne 'success'){ $api->warn("Failed to get edit token for User talk:$user: ".$res->{'error'}."\n"); return -2; }   $self->_log($api, "* Issued a revert notice to $user about $revid on $page"); $api->log("Issued a revert notice to User talk:$user about $revid on $page"); $api->store->{"revert$revid"}=1; return 0; }

sub unit_test { my $revid=shift; my $filename=shift;
 * 1) This function can be used to run the bot over arbitrary page content.
 * 2) Something like:
 * 3)  perl -we 'use tasks::OrphanReferenceFixer; tasks::OrphanReferenceFixer::unit_test($revid[,$filename]);'

$|=1;   binmode STDOUT, ':utf8'; binmode STDERR, ':utf8';

my $self=tasks::OrphanReferenceFixer->new; my $api=AnomieBOT::API->new('conf.ini', 1); $api->{'noedit'}='/tmp/'; $api->login; $api->DEBUG(-1); $api->task('OrphanReferenceFixer', 0, 0.1, qw/d::Talk d::Timestamp d::Templates d::Redirects d::IWNS/);

return undef unless $api->load_IWNS_maps;

my $r=$self->init($api); die "init failed\n" if defined($r);

my $res=$api->query(revids=>$revid,prop=>'info|revisions',rvprop=>$filename?'':'content',rvslots=>'main'); if($res->{'code'} ne 'success'){ die "Could not load revision $revid: ".$res->{'error'}."\n"; }   die "Invalid revid\n" unless(exists($res->{'query'}{'pages'}) && %{$res->{'query'}{'pages'}}); $res=(values(%{$res->{'query'}{'pages'}}))[0]; my $pageid=$res->{'pageid'}; my $lastrevid=$revid; my $title=$res->{'title'}; my $intxt; if($filename){ open X, '<:utf8', $filename or die "Could not open $filename: $!\n"; { local $/=undef; $intxt=; } close X;   } else { $intxt=$res->{'revisions'}[0]{'slots'}{'main'}{'*'}; }

my $checked={revid=>$lastrevid,continue=>' ',did_summary_links=>0,did_page_links=>0,prev_ts=>-1,prev_info=>[0,,,-1],unfound=>[],ignored=>[]}; $api->store->{$pageid}=$checked; $api->store->{'log'}=''; my $ret=$self->process_page($api,$pageid,$lastrevid,$title,$intxt,$checked,time+86400); print STDERR $api->store->{'log'}."\n"; die "Returned undef\n" unless defined($ret); die "Returned $ret\n" unless ref($ret); my $outtxt=$ret->{'outtxt'}; delete $ret->{'outtxt'}; print STDERR scalar Data::Dumper->Dump([$ret],['ret'])."\n"; print $outtxt; }

1;