User:Wizzy/badwords


 * 1) ! /usr/bin/perl

use strict; use File::Find ; use English;

if ($#ARGV != 1) { print "Usage: $0 \n"; exit 1; }

my @dirs = ($ARGV[0]); my $badwordsfile = $ARGV[1];

open(BADWORDS, $ARGV[0]) || die("can't open badwords file $ARGV[0]: $!"); my @badwords = ; chomp(@badwords); my %count;

sub wanted { if (/^.*html\z/s) { my $title; my $endhead = 0; open(FILE, $_) || die ("Can't open file $_: $!"); while (my $line = ) { if ($line =~ m% (.*)- Wikipedia, the free encyclopedia %) { $title = $1; }	   if ($line =~ m% %) { $endhead = 1; }	   next if ! $endhead; study ($line); for my $badword (@badwords) { if ($badword =~ m:^/(.*)/$:) { my $pattern = $1; next if ($title =~ /\b$pattern\b/);	# skip this if it matches the title if ($line =~ /\b$pattern\b/) { my $prematch = substr($PREMATCH, -15); my $postmatch = substr($POSTMATCH, 0, 15); print "<$prematch:$MATCH:$postmatch>\t$title\n"; $count{$badword}++; }		} else { next if ($title =~ /\b\Q$badword\b/); if ($line =~ /\b\Q$badword\b/) { my $prematch = substr($PREMATCH, -15); my $postmatch = substr($POSTMATCH, 0, 15); print "<$prematch:$badword:$postmatch>\t$title\n"; $count{$badword}++; }		}	   }	}    } }

use vars qw/*name *dir *prune/;
 * 1) for the convenience of &wanted calls, including -eval statements:
 * name  = *File::Find::name;
 * dir   = *File::Find::dir;
 * prune = *File::Find::prune;

File::Find::find({wanted => \&wanted}, @dirs);
 * 1) Traverse desired filesystems

print "===================================\n";

foreach my $key (sort { $count{$a} <=> $count{$b} } keys %count) { print "$count{$key}\t$key\n"; }

exit;