User:HighInBC/MCP/NameWatcher

This is a plugin, not a stand alone program. See here for GFDL history prior to 02:54, June 4, 2007.

package HBCPlugins::NameWatcher; use    strict; use    Data::Dumper; use    MediaWiki; use    URI::Escape; our    $self;

sub new { shift; $self = shift; bless($self);

$self->{params}{version_number} = '1.0.2'; $self->{params}{VERSION} = 'HBC NameWatcherBot v'.$self->{params}{version_number}; warn $self->{params}{VERSION}." starting up.\n"; $self->{params}{parameter_expected} = {   'Status'                    => 1, 'Default target'           => 1, 'Long name'                => 1, 'Repeating character'      => 1, 'Write rate'               => 1, 'Recheck duration'         => 1, 'Recheck interval'         => 1, }; $self->{params}{parameter_required} = [   'Status', 'Default target', ]; $self->{params}{default_params} = {   'Write rate'                => 10, 'Long name'        => 0, 'Repeating character'      => 0, };

$self->{params}{whitelist} = []; $self->{params}{blacklist} = []; $self->{params}{delayed_names} = {}; &{$self->{shared}{add_job}}(\&contact_irc_plugin, 0); &{$self->{shared}{add_job}}(\&login,0); &{$self->{shared}{add_job}}(\&check_homoglyphs,2); &{$self->{shared}{add_job}}(\&check_parameters,2); &{$self->{shared}{add_job}}(\&check_whitelist,2); &{$self->{shared}{add_job}}(\&check_blacklist,2); &{$self->{shared}{add_job}}(\&debug_report,5);

return $self; }

sub check_version { my ($active_version,$my_version) = @_; my @active_parts = split(/\./, $active_version); my @my_parts = split(/\./, $my_version);

return 0 if scalar(@active_parts) > scalar(@my_parts); # should never happen

foreach (@active_parts) { my $check_part = shift(@my_parts); last if $check_part > $_; next if $_ <= $check_part; return 0; } return 1; }

sub send_info { my $type = shift; if ($self->{params}{send_info_to}) { my %payloads = (    'client'    => $self->{WP_obj},     'panel'     => $self->{params}{param},     'blacklist' => $self->{params}{blacklist},     'whitelist' => $self->{params}{whitelist}    ); foreach my $target (@{$self->{params}{send_info_to}}) { warn "Sending data to $target: $type\n"; $self->{shared}{$target}->set_info($type, $payloads{$type}); } } }

sub login { warn "Connecting to Wikipedia...\n"; my $c                 =   MediaWiki->new; $c->setup ({                           'bot' => {'user' => $self->{params}{username},'pass' => $self->{params}{password}},                            'wiki' => {'host' => 'en.wikipedia.org','path' => 'w'}                          }) || warn "Failed to log in\n"; my $whoami             =  $c->user; warn "$whoami connected\n"; $self->{WP_obj} = $c; send_info('client'); &{$self->{shared}{add_job}}(\&login,3600); }

sub debug_report { warn Dumper ({   'Params'    => $self->{params}{param},    'Delayed'   => $self->{params}{delayed_names},   }); &{$self->{shared}{add_job}}([\&debug_report],300); }
 * 1)    'Whitelist'        => $self->{params}{whitelist},
 * 2)    'Blacklist'        => $self->{params}{blacklist},

sub contact_irc_plugin { $self->{IRCFeed} = $self->{shared}{$self->{params}{irc_label}} || die; $self->{IRCFeed}->add_hook ({    'check'    => sub        {        my $text = shift;        my $pattern = chr(0x03).'03(.*?)'.chr(0x03);        my $createpattern = chr(0x03).'4\s*create'.chr(0x03);        if (($text =~ m|$createpattern|) && ($text =~ m"$pattern"))          {          return $1;          }        else          {          return 0;          }        },     'callback' => \&check_name,    }   ); $self->{IRCFeed}->add_hook ({    'check'    => sub {$_[0] =~ m|User:HBC NameWatcherBot/Control panel|},     'callback' => \&check_parameters,    }   ); $self->{IRCFeed}->add_hook ({    'check'    => sub {$_[0] =~ m|User:HBC NameWatcherBot/Whitelist|},     'callback' => \&check_whitelist,    }   ); $self->{IRCFeed}->add_hook ({    'check'    => sub {$_[0] =~ m|User:HBC NameWatcherBot/Blacklist|},     'callback' => \&check_blacklist,    }   ); return; }

sub check_parameters { warn "Checking parameters...\n"; my $page = $self->{WP_obj}->get('User:HBC NameWatcherBot/Control panel','r')->content; %{$self->{params}{param}} = ; foreach my $line (split("\n",$page)) {   if ($line =~ m|^;(.*?):(.*)$|) {     $self->{params}{param}{$1} = $2 if ($self->{params}{parameter_expected}{$1}); }   }  die "Bot is not turned on in the control panel\n" unless (lc($self->{params}{param}{'Status'}) eq 'on'); foreach my $key (@{$self->{params}{parameter_required}}) {   die "Missing required parameter: $key\n" unless ($self->{params}{param}{$key}); } %{$self->{params}{param}} = (%{$self->{params}{default_params}},%{$self->{params}{param}}); send_info('panel'); }

sub check_homoglyphs { warn "Checking homoglyphs...\n"; my $page = $self->{WP_obj}->get('User:HBC_NameWatcherBot/Homoglyphs','r')->content; utf8::upgrade($page); @{$self->{params}{homoglyphs}} = ; foreach my $line (split("\n",$page)) { next unless ($line =~ m|^;(.+?):\s*(.+)$|); my $symbol = $1; my @matches = split(/\s+/,$2); for (@matches) {$_ = quotemeta($_)}; push (@{$self->{params}{homoglyphs}}, $symbol, \@matches) } }

sub check_whitelist { warn "Checking whitelist...\n"; my $page = $self->{WP_obj}->get('User:HBC NameWatcherBot/Whitelist','r')->content; @{$self->{params}{whitelist}} = ; foreach my $line (split("\n",$page)) {   push(@{$self->{params}{whitelist}},$1) if ($line =~ m|^;(.*)$|); } send_info('whitelist'); }

sub check_blacklist { warn "Checking blacklist...\n"; my $page = $self->{WP_obj}->get('User:HBC NameWatcherBot/Blacklist','r')->content; @{$self->{params}{blacklist}} = ; foreach my $line (split("\n",$page)) {   if ($line =~ m|^;(.*?)(:(.*))?$|) {     my $rh_entry = {}; ${$rh_entry}{word} = $1; if ($2) {       my @flags = split(',',$3); ${$rh_entry}{flags} = parse_flags(\@flags); }     if (${$rh_entry}{'flags'}{'HOMOGLYPH'}) { my $hg_regex = compile_homoglyph_pattern(${$rh_entry}{'word'}); ${$rh_entry}{'flags'}{'LABEL'}[0] = ('Homoglyph of: "'.${$rh_entry}{'word'}.'"'); ${$rh_entry}{'flags'}{'REGEX'} = 1; ${$rh_entry}{'word'} = $hg_regex; }     push(@{$self->{params}{blacklist}},$rh_entry); }   }  send_info('blacklist'); }

sub parse_flags { my $ra_flags = shift; my %has_flag; foreach my $flag (@{$ra_flags}) {   if ($flag =~ m|(.*)\((.*)\)|) {     my (@params) = split('\|',$2); $has_flag{$1} = \@params; }   else {     $has_flag{$flag} = 1 }   }  return \%has_flag; }

sub check_name { shift; my($name) = shift; my $display_name = "'$name'"; my $master_name = $name; warn "Checking name: $display_name\n"; foreach my $white_word (@{$self->{params}{whitelist}}) {$name =~ s|$white_word||gi;} # Remove occurances of whitelisted words my $ra_offenses = []; foreach my $rh_black_word (@{$self->{params}{blacklist}}) {   my $word = ${$rh_black_word}{word}; if (${$rh_black_word}{flags}{'REGEX'}) {     if (${$rh_black_word}{flags}{'CASE_SENSITIVE'}) {       push(@{$ra_offenses},$rh_black_word) if ($name =~ m|$word|); }     else {       push(@{$ra_offenses},$rh_black_word) if ($name =~ m|$word|i); }     }    else {     if (${$rh_black_word}{flags}{'CASE_SENSITIVE'}) {       push(@{$ra_offenses},$rh_black_word) if (index($name,$word) > -1); }     else {       push(@{$ra_offenses},$rh_black_word) if (index(lc($name),lc($word)) > -1); }     }    }  &{$self->{shared}{add_job}}([\&bad_name,$master_name,$ra_offenses],0) if        (         scalar(@{$ra_offenses}) ||         (($self->{params}{param}{'Long name'}) && (length($name) > $self->{params}{param}{'Long name'})) ||         (($self->{params}{param}{'Repeating characters'}) && (detect_repeatition($name,$self->{params}{param}{'Repeating characters'}))) ||         (($self->{params}{param}{'Repeating numbers'}) && (detect_repeatition_numbers($name,$self->{params}{param}{'Repeating numbers'})))        ); return 1; }

sub bad_name { my ($name, $ra_offenses) = @_; my(@offense_reports) = ('* '); my $need_rechecking = 0; # default no reason to report my $need_header = 0; my $has_edited = user_has_edited($name); my $is_blocked = is_blocked($name); my $target_page = $self->{params}{param}{'Default target'}; push(@offense_reports,':* This user has edited at least one time.') if ($has_edited); if (($self->{params}{param}{'Long name'}) && (length($name) > $self->{params}{param}{'Long name'})) {   push(@offense_reports, ":* At ".length($name)." characters, this name exceeds ".$self->{params}{param}{'Long name'}." characters"); } if (($self->{params}{param}{'Repeating characters'}) && (my $char = detect_repeatition($name,$self->{params}{param}{'Repeating characters'}))) {   push(@offense_reports, ":* This name has the character \"".$char."\" repeating ".$self->{params}{param}{'Repeating characters'}." or more than times in a row."); } if (($self->{params}{param}{'Repeating numbers'}) && (detect_repeatition_numbers($name,$self->{params}{param}{'Repeating numbers'}))) {   push(@offense_reports, ":* This name has ".$self->{params}{param}{'Repeating numbers'}." or more numbers in a row."); } foreach my $rh_blackword (@{$ra_offenses}) {   my $rh_has_flag = ${$rh_blackword}{flags}; my $label = ((${$rh_has_flag}{'LABEL'}) ? ("called ".${${$rh_has_flag}{'LABEL'}}[0]."") : ("${$rh_blackword}{word}")); my $name_pattern = '('.${$rh_blackword}{word}.')'; my $match; if (${$rh_has_flag}{'CASE_SENSITIVE'}) {     $name =~ m|$name_pattern|; $match = " The portion that matched was $1." if ($1); }   else {     $name =~ m|$name_pattern|i; $match = " The portion that matched was $1." if ($1); }   my(@report) = (((${$rh_has_flag}{'REGEX'}) ? (":* Matches the regular expression $label.$match") : (":* Matches the literal pattern $label."))); if (${$rh_has_flag}{'CASE_SENSITIVE'}) { push (@report, "::* This filter is case sensitive."); }   if (${$rh_has_flag}{'WAIT_TILL_EDIT'}) {     if ($has_edited) {       push (@report, "::* This report was delayed until the user edited."); delete(${$self->{params}{param}{delayed_names}}{$name}); }     else {       warn "\t$name has not edited, not counting match to the string $label at this point, rechecking in ".$self->{params}{param}{'Recheck interval'}." seconds.\n"; $need_rechecking = 1 unless (scalar(@offense_reports) > 1); next; }     }    if (${$rh_has_flag}{'HOMOGLYPH'}) { push (@report, "::* This filter attempts to see past the use of similar looking characters and creative substitutions, see Homoglyphs."); }   $need_rechecking = 0; warn Dumper ({     'Name'    => $name,      'Flags'   => $rh_has_flag,      'Word'    => ${$rh_blackword}{'word'},     }); if (${$rh_has_flag}{'HTTP_CHECK'}) {     warn "Gotta check '$name' for http server.\n"; my $code = $self->{WP_obj}->{ua}->head('http://'.$name)->code; if (($code == 500) && (ref(${$rh_has_flag}{'HTTP_CHECK'})) && (lc(${${$rh_has_flag}{'HTTP_CHECK'}}[0]) eq 'ignore_fail')) {       warn "Ignoring failed connection.\n"; return; }     my $info = ''; if       ($code =~ m|^2..$|) {       $info = "A response code starting in 2 means HTTP server found, succesful request."; }     elsif     ($code =~ m|^3..$|) {       $info = "A response code starting in 3 means HTTP server found, client redirected."; }     elsif     ($code =~ m|^4..$|) {       $info = "A response code starting in 4 means HTTP server found, HTTP server gave error."; }     elsif     ($code =~ m|^5..$|) {       $info = "A response code starting in 5 means HTTP server not found."; }     push (@report, "::* When using the name $name(link) as a http URL I got a response code of $code.".$info); }   if  (${$rh_has_flag}{'ALTERNATE_TARGET'}) {     $target_page = ${${$rh_has_flag}{'ALTERNATE_TARGET'}}[0]; warn "Alternate target: $target_page\n"; }   if  (${$rh_has_flag}{'USE_HEADER'}) {     $need_header = 1; }   if  (${$rh_has_flag}{'LOW_CONFIDENCE'}) {     push (@report, "::* The string $label is prone to false positives, please take extra care ensuring this name is actually a violation before blocking."); }   if  (${$rh_has_flag}{'NOTE'}) {     push (@report, "::* The string $label has a comment associated with it: ".${${$rh_has_flag}{'NOTE'}}[0]); }   if  (${$rh_has_flag}{'SOCK_PUPPET'}) {     push (@report, "::* The string $label is often associate with sock puppets of User:".${${$rh_has_flag}{'SOCK_PUPPET'}}[0]." "); }   push (@offense_reports,@report); } if ($need_rechecking) {   ${$self->{params}{param}{delayed_names}}{$name} ||= time; my $report_age = (time - ${$self->{params}{param}{delayed_names}}{$name}); if (($report_age > $self->{params}{param}{'Recheck duration'}) || ($is_blocked)) {     warn "Forgetting about $name, it has been $report_age seconds since creation and no edits.\n"; warn "\tThe user is blocked you see.\n" if ($is_blocked); delete(${$self->{params}{param}{delayed_names}}{$name}); }   else {     warn "Scheduling recheck of $name\n"; &{$self->{shared}{add_job}}([\&check_name,$name,$name],$self->{params}{param}{'Recheck interval'}); }   return 1; } unshift(@offense_reports, "== Account User:$name created ==") if $need_header; my $report = (join("\n",@offense_reports)." ~\n"); warn "Writing to: $target_page\n"; my $page = $self->{WP_obj}->get($target_page, 'rw'); if (($page->content =~ m|<\!-- HBC NameWatcherBot v(\d+\.\d+\.\d+) allowed -->|) && !($is_blocked)) {   my $version_number_needed = $1; unless (check_version($version_number_needed,$self->{params}{version_number})) { warn "I need $version_number_needed and my version is ".$self->{params}{version_number}.".\n"; warn "I am too old to edit this page.\n"; return; }   while ($page->{'content'} !~ m|\n\n$|) {$page->{'content'} = $page->{'content'}."\n";} $page->{'content'} = ($page->{'content'}.$report); $page->{'summary'} = "Reporting $name (block)."; $page->save; warn ($page->{'summary'}."\n"); sleep($self->{params}{param}{'Write rate'}); warn "\n"; } elsif ($is_blocked) {   warn "Skipping, user is already blocked.\n"; } else {   warn "I don't have permission to write to '$target_page'\n"; return; } }
 * 1)    warn ($page->{'content'}."\n\n\n");

sub user_has_edited {   my ($name) = @_; my $url = 'http://en.wikipedia.org/w/api.php?action=query&list=usercontribs&format=xml&ucuser='.uri_escape($name); my $data = $self->{WP_obj}->{ua}->get($url)->content; if (!$data || $data =~ m||) { # blank XML container, no contribs, return zero return 0; } else { return 1; } }

sub is_blocked { my ($name) = @_; my $url = $self->{WP_obj}->{index}.'?title=Special:Ipblocklist&ip='.uri_escape($name); my $data = $self->{WP_obj}->{ua}->get($url)->content; # Get blocklist info for user return ($data =~ m|\) blocked = $limit); } return 0; }

sub detect_repeatition_numbers { my $string = shift; my $limit = shift; my $last_char; my $current_rep = 1; while (my $char = substr($string,0,1,'')) {   if ($char =~ m|^[0-9]$|) {     $current_rep++; }   else {     $current_rep = 0; }   return 1 if ($current_rep >= $limit); } return 0; }

sub compile_homoglyph_pattern { my $target_pattern = shift; my @homoglyphs = @{$self->{params}{homoglyphs}}; while (scalar(@homoglyphs)) { my ($str, $ra_values) = (shift(@homoglyphs),shift(@homoglyphs)); next unless (scalar(@{$ra_values})); my $pattern = ('('.join('|', $str,@{$ra_values}).')'); $target_pattern =~ s"$str"$pattern"ig; #" } return $target_pattern; }

1;