User:Nanobear~enwiki/Importance script

# # $infilename = $ARGV[0]; open TFPAGE, "<$infilename"; while( defined($line = ) ) { 	$modify_line = 0; # for each line if( $line =~ /\*('*\[\[)(.*?)(\]\])(.*)/ ) { 		$link = $2; if(!($link =~ /:/)) # if line has ":", it's prob not an article title { 			$title = $link; $article_talk = &get_article_talk( $title ); if( !($article_talk eq "") ) { 				$importance = &get_importance( $article_talk ); chomp $line; $importance = ucfirst $importance; # if info is already listed, remove the old values first # = remove everything after "(<span" 				$line =~ s/\(<span.*//; # get tag-categories &get_categories; $tag_cats = &make_tags_string; print $line. " " . &get_colored_importance_string. $tag_cats. "\n"; $modify_line = 1; } 		} 	} 	if( $modify_line == 0 ) { 		print $line; } } sub get_importance { 	while( $article_talk =~ m/(\{\{WikiProject Russia\|?)((.|\n)*?)(\}\})/g ) { 		$tag_cont = $2; $class = "unknown importance"; $importance = "unknown importance"; if( $tag_cont =~ m/((?=.|\n)*?)(class=)((.|\n)*?)((\|)|(\}))((.|\n)*?)/i ) { 			$class = $3; }	 		if( $tag_cont =~ m/((?=.|\n)*?)(importance=)((.|\n)*?)((\|)|(\}))((.|\n)*?)/i ) { 			$importance = $3; }	 		#print "Tag found: ". $tag_cont. "\n"; #print "Importance: $importance, Class: $class \n\n"; last; } 	$res = $importance; } sub get_article_talk { 	# First change spaces in URL to underscores $title =~ s/ /_/g; # Download the article talk page use LWP; $browser = LWP::UserAgent->new; $url = 'http://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=Talk:'. $title. '&rvprop=content'; $response = $browser->get($url); if( !($response->is_success) ) { 		die; } 	# Return wikimarkup $txt = $response->content; if( !($txt =~ m/rev xml/) ) { 		# Content was empty; page likely doesn't exist in WP 		return ""; } 	else { 		return $txt; } } sub get_categories { 	# First change spaces in URL to underscores $title =~ s/ /_/g; use LWP; $browser = LWP::UserAgent->new; $url = 'http://en.wikipedia.org/w/api.php?action=query&prop=categories&titles='. $title. '&prop=categories'; $response = $browser->get($url); if( !($response->is_success) ) { 		die; } 	$cont = $response->content; @cats = ; if( $cont =~ m/Category:All NPOV disputes/ ) { 		push(@cats, "NPOV"); } 	if( $cont =~ m/Category:All articles lacking sources/ ) { 		push(@cats, "unreferenced"); } 	if( $cont =~ m/Category:All articles needing style editing/ ) { 		push(@cats, "style"); } 	if( $cont =~ m/Category:All articles to be split/ ) { 		push(@cats, "split"); } 	if( $cont =~ m/Category:All articles with unsourced statements/ ) { 		push(@cats, "refs"); } 	if( $cont =~ m/Category:All articles needing cleanup/ ) { 		push(@cats, "cleanup"); } 	if( $cont =~ m/Category:All accuracy disputes/ ) { 		push(@cats, "accuracy"); } 	if( $cont =~ m/Category:BLP articles/ ) { 		push(@cats, "BLP"); } 	# 2 different ways to get "OR" if( $cont =~ m/Category:All articles that may contain original research.*/ ) { 		push(@cats, "OR"); } 	elsif( $cont =~ m/Category:Articles that may contain original research/ ) { 		push(@cats, "OR"); } } sub make_tags_string { 	$numcats = scalar @cats; if( $numcats > 0) { 		$first_tag = 1; foreach $cat (@cats) { 			if( $first_tag ) { 				$tags = ' ( tags: ' . $cat; 			} 			else 			{ 				$tags = $tags . ", " . $cat; 			} 			$first_tag = 0; 		} 		$tags = $tags . ' )'; return $tags; } 	else { 		return ""; } } sub get_colored_importance_string { 	if ($importance eq "Top") { 		$color = "red"; } 	elsif ($importance eq "High") { 		$color = "blue"; } 	elsif ($importance eq "Mid") { 		$color = "orange"; } 	elsif ($importance eq "Low") { 		$color = "grey"; } 	elsif ($importance eq "Unknown importance") { 		$color = "purple"; } 	return ''. '(' . $importance . ')'. ' '; }
 * 1) !/usr/bin/perl
 * 1) Purpose:
 * 2) 1. Reads a file containing Wikipedia markup which has a list of articles (among other text)
 * 3) 2. For each article in the list, fetches WikiProject Russia template attributes from the article's talk page
 * 4)    as well as some tag categories of the article (such as "Category:All NPOV disputes", etc.)
 * 5) 3. Inserts these next to article name in the list in color-coded format.
 * 1) Notes:
 * 2) - The list must have the following format:
 * 3) *(optional apostrophes)Article name (any optional text)
 * 4) - The list of categories the script recognises can be found at http://en.wikipedia.org/wiki/User_talk:Nanobear/Importance_script
 * 5) - The Wikipedia markup must be located in a locally-stored file
 * 1) Command-line usage:
 * 2) perl script_filename page_filename > output_filename
 * 1) Returns the importance from talk page wikimarkup $article_talk
 * 1) Returns wikimarkup content of talk page of $title, or "" if page doesn't exist on WP
 * 1) Returns categories article with name $title is in
 * 1) Builds a comma-separated list from the cats in @cats, or "" if @cats is empty
 * 1) Returns importance with a suitable color