User:Okip/scrape/coding

This is the Autohotkey coding used to scrape Google news, books, and scholar.

Example: Category talk:Unreferenced BLPs from April 2007

S := 1 L0 := 0

URLDownloadToFile,http://en.wikipedia.org/w/index.php?title=Category:Unreferenced_BLPs_from_April_2007, wiki.txt FileRead, Con, wiki.txt While (Pos := RegExMatch(Con, "", Sub, S))
 * shorter list Category:Chinese_centenarians

{         L0++ Q := Sub1 S := Pos + StrLen(Sub1) + 13 NewStr := RegExReplace(Q, "\(.*?\)", " ", count) ; removes the parenthesis in a name.
 * original script: Hiroshi Takeyasu
 * msgbox %NewStr% for testing

URLDownloadToFile,

http://news.google.com/archivesearch?q="%NewStr%"+&btnG=Search+Archives, google.txt FileRead, CCon, google.txt RegExMatch(CCon, "of (?:about )?(\S+)", Sub) L%A_Index%Results := RegexReplace(RegExReplace(Sub1, "\D"), "^$", "0")

URLDownloadToFile, http://books.google.com/books?q=+"%Q%"+&btnG=Search+Books, google.htm FileRead, CCon1, google.htm RegExMatch(CCon1, "of (?:about )?(\S+)", Subl) n%A_Index%Results := RegexReplace(RegExReplace(Subl1, "\D"), "^$", "0")
 * msgbox, %Q% "http://books.google.com/books?q=+"%NewStr%"+&btnG=Search+Books" ; for testing

URLDownloadToFile, http://scholar.google.com/scholar?&q="%Q%, google.htm        FileRead, CCon1, google.htm         RegExMatch(CCon1, "of (?:about )?(\S+)", Subl)         m%A_Index%Results := RegexReplace(RegExReplace(Subl1, "\D"), "^$", "0")      L%A_Index% := NewStr }
 * msgbox, %Q% "http://scholar.google.com/scholar?&q="%NewStr%"""" ; for testing

L := " `r`n{|class=""wikitable sortable""`r`n!Name!!Google

news!!Google books!!Google scholar" Loop %L0% L .= "`r`n|-valign=top `r`n|align=left|" . L%A_Index% . "`n|align=center|" . L%A_Index%Results .  "`n|align=center|" . n%A_Index%Results  . "`n|align=center| " . m%A_Index%Results . "" FileAppend, %L%, %a_scriptdir%\L.txt Run, %a_scriptdir%\L.txt Return
 * L := "Found " . L0 . " results.`r`n"
 * MsgBox %L% ; for testing