User:Svgalbertian/Scripts/TSX

This is a perl script retrieve all active TSX stocks.

Code
use LWP::Simple; use LWP::UserAgent; use WWW::Mechanize; use MediaWiki::API;

$dirURL = "http://www.tmxmoney.com/HttpController?GetPage=ListedCompanyDirectory&SearchCriteria=Name&SearchKeyword=\%char\%&SearchType=StartWith&Page=1&SearchIsMarket=Yes&Market=T&Language=en"; $companyURL = "http://tmx.quotemedia.com/company.php?qm_symbol=\%char\%&locale=EN"; $replaceme = '%char%'; @alpha = ("A" .. "Z", 0 .. 9);
 * 1) Directory search variables

my $mech = WWW::Mechanize->new;
 * 1) Initialize Mechanize

my $mw = MediaWiki::API->new; $mw->{config}->{api_url} = 'http://en.wikipedia.org/w/api.php';
 * 1) Initialize Wikipedia

my $browser = LWP::UserAgent->new;
 * 1) Initialize LWP

use POSIX qw(strftime); $querydate = strftime "%B %e, %Y", localtime;
 * 1) What is the date?

$footer = "|}\n\n\n==See also==\n*Toronto Stock Exchange\n*List of Canadian companies\n*List of mutual funds listed on the TSX\n*S&P/TSX Composite Index\n*List of companies listed on the TSX Venture Exchange\n\n==External links==\n* Toronto Stock Exchange\n\n\n";
 * 1) Page elements

open ( WIKINUM, ">wiki_numbers.txt" ) || die ("you die now!"); print WIKINUM "\n\n==0-9==\n{| style=\"background:transparent;\"\n!Stock Name\n!Symbol\n|\n";
 * 1) Create number page

foreach $alpha (@alpha) { (my $currenturl = $dirURL) =~s/$replaceme/$alpha/g;
 * 1) Cycle through all the directory pages
 * 1) Create the URL of the directory page

open ( WIKILRT, ">wiki$alpha.txt" ) || die ("you die now!"); print WIKILRT "\n\n==$alpha==\n{| style=\"background:transparent;\"\n!Stock Name\n!Symbol\n|\n";
 * 1) Create the letter pages

open ( WIKILRT_NEW, ">wikialt$alpha.txt" ) || die ("you die now!"); print WIKILRT_NEW "\n\n==$alpha==\n{| class=\"wikitable\"\n|-\n! COMPANY NAME !! SYMBOL !! INDUSTRY\n";

$mech->get($currenturl);
 * 1) Grab the directory page

do {

# Extract all links my @links = $mech->links;

# Add matching company links to hash table foreach (@links) { if ($_->url =~ m/company.php/) {

# Grab company name from the link text $company = $_->text;

# Get the Wikipedia page my $page = $mw->get_page( { title => $company } );

if($page->{'*'}=~ m{#(REDIRECT |REDIRECT)\[\[(.*)\]\]}i) { print WIKILRT "|" . $company . "\n"; print WIKINUM "|" . $company . "\n"; print WIKILRT_NEW "|-\n|" . $company . " || "; }	elsif ($page->{'*'}) { print WIKILRT "|" . $company . "\n"; print WIKINUM "|" . $company . "\n"; print WIKILRT_NEW "|-\n|" . $company . " || "; }	else { print WIKILRT "|". $company. "\n"; print WIKINUM "|". $company. "\n"; print WIKILRT_NEW "|-\n|". $company. " || ";	}

# Parse the ticker from the URL $_->url =~ m{=(.*?)&}gism; $ticker = $1;

print WIKILRT "|\n|\n"; print WIKINUM "|\n|\n"; print WIKILRT_NEW " || ";

# New code to get the industry and website (my $currenturl = $companyURL) =~s/$replaceme/$ticker/g; my $request = HTTP::Request->new(GET => $currenturl); my $response = $browser->request($request); $contents = $response->content;

$contents =~ m{Industry: .*?>(.*?) }gism; print "$company ($ticker), industry: $1 \n"; print WIKILRT_NEW "$1 \n";

#$contents =~ m{Website: .*?href=\"(.*?)\"}gism; #print "Website: $1 \n";

} }

} while ( $mech->find_link(text => 'Next >') && $mech->follow_link(text => 'Next >') );

print WIKILRT $footer; close (WIKILRT);

print WIKILRT2 $footer; close (WIKILRT2); }

print WIKINUM $footer; close (WIKINUM);