User:XLinkBot/Code/LinkParser.pl


 * 1) !/usr/bin/perl

fork and exit;

use POE qw (Component::Client::TCP); use HTML::Entities; use LWP::UserAgent; use perlwikipedia; use strict;

my $editor=Perlwikipedia->new("LinkParser","LinkParser");

my $diffFetcher=LWP::UserAgent->new; $diffFetcher->agent("LinkParser/2.0");

my %settings;

$settings{'debug'} = 0;

print "Reading config file...\n" if $settings{'debug'}; open (CONFIG,") { unless (/^#/) { if(/(.+?)=(.+)/) { $settings{$1}=$2; }   } }   close (CONFIG);

my @prefixes = split(/\|/,$settings{'prefixes'});

print ("Prefixes: " . join(" - ", @prefixes) . "\n") if $settings{'debug'};

print "done\n" if $settings{'debug'};

my $server_port=shift;

sleep 4;

POE::Component::Client::TCP->new(   RemoteAddress       =>'127.0.0.1',    RemotePort          => $server_port,    ServerInput     => \&server_input,    Connected       => \&connected, );

my ($heap,$kernel); my $number_of_edits=0;

POE::Kernel->run; exit 0;

sub server_input { my ( $session, $heap, $kernel, $input ) = @_[ SESSION, HEAP, KERNEL, ARG0 ]; if ($input =~ m{EDIT \[\[(.+)\]\] \[\[(.+):User:(.+?)\]\] (http:\/\/.+) (.+)}) { my ($pagename) = $1; my ($lang)    = $2; my ($username) = $3; my ($diffurl) = $4; my ($size)    = $5; $number_of_edits++;

my @linksadded; my @linkremoved; if ( $diffurl ) { my @addedPre = ; my @removedPre = ; my @addedlinks = ; my @removedlinks = ; my $addedTotal = ""; my $removedTotal = "";

if ($diffurl =~ m/index\.php/) { my $diffUrl="$diffurl&diffonly=1&action=render"; my $diffContent=$diffFetcher->get($diffUrl)->content; print ("$diffContent\n") if $settings{'debug'}; @addedPre=$diffContent=~m/ (.*?)<\/div><\/td>/sg; @removedPre=$diffContent=~m/ (.*?)<\/div><\/td>/sg; $addedTotal=join(' ', @addedPre); $removedTotal=join(' ', @removedPre);

$addedTotal  =~ s/ //g; $addedTotal  =~ s/ //g; $addedTotal  =~ s/<\/span>//g;

$removedTotal  =~ s/ //g; $removedTotal =~ s/ //g; $removedTotal =~ s/<\/span>//g;

$addedTotal  =~ s///g; $addedTotal  =~ s///g; $addedTotal  =~ s/<\/ins>//g;

$removedTotal =~ s///g; $removedTotal =~ s///g; $removedTotal =~ s/<\/ins>//sig;

$addedTotal = lc($addedTotal); $removedTotal = lc($removedTotal); print ("Added data: $addedTotal\n") if $settings{'debug'}; } else { $addedTotal=$editor->get_text($pagename); $addedTotal= lc($addedTotal); $removedTotal = ""; }           decode_entities( $addedTotal ); decode_entities( $removedTotal );

@addedlinks=$addedTotal=~m{(http://[^\s\]\[\{\}\\\|^~`<>]+)}sgi; @removedlinks=$removedTotal=~m{(http://[^\s\]\[\{\}\\\|^~`<>]+)}sgi; my @really_added_links = ; my @really_removed_links = ; my $links_added; my $links_removed; if (@addedlinks) { if (@removedlinks) { print("\nDIFF $diffurl ".join(" ",@addedlinks)." - ".join(" ",@removedlinks)."\n\n") if $settings{'debug'}; foreach $links_added(@addedlinks) { my $found = 0; foreach $links_removed(@removedlinks) { if ($links_removed eq $links_added) { $found = 1; }                       }                        unless ($found) { push(@really_added_links,$links_added); }                   }                } else { @really_added_links = @addedlinks; }               print ("DIFF $diffurl ".join(" ",@really_added_links)."\n\n") if $settings{'debug'}; }           if (@really_added_links) { my $message="PARSED $pagename $diffurl $size $lang:User:$username |". join(" ",@really_added_links). "|";               $heap->{server}->put($message); }       }        $heap->{server}->put("REQUEST"); }   elsif ($input =~ m{NOEDIT}) { sleep 1; $heap->{server}->put("REQUEST"); }   if ($number_of_edits>50) { $kernel->post("shutdown"); exit 0; } }

sub connected { ( $kernel, $heap ) = @_[ KERNEL,HEAP ]; $heap->{server}->put("REQUEST"); }

sub request_edit { ( $kernel, $heap ) = @_[ KERNEL,HEAP ]; $heap->{server}->put("REQUEST"); }