User:Drinibot/ExtractWikilinks.py

import re import subprocess rawfilename="raw.html" linksfilename="links.txt"

fi=open(rawfilename,'r') li=open(linksfilename,'w') regex=re.compile(r".*.org/wiki/(?P .*?)\".*$")

for line in fi: m=regex.findall(line) if m: 		if not ("Special:" ) in m[0]: wl= ""+ m[0] +"" print wl			li.write(wl+"\n") fi.close li.close