User:Topbanana/Reports/Scripts/extract links.sql

This page describes a subscript used in the creation of a link analysis database.

You'll need to generate the whole thing yourself, it's too large to sensibly store here. 10000 repeats definately catches all links as of June 2004.

INSERT INTO raw_links (pos, from_id, lstart) SELECT pos+1, art_id, locate( '[[', art_text, rl2.lstart ) + 2 FROM raw_links rl2, art WHERE rl2.from_id = art_id AND rl2.pos = 1 AND locate( '[[', art_text, rl2.lstart ) > 0; INSERT INTO raw_links (pos, from_id, lstart) SELECT pos+1, art_id, locate( '[[', art_text, rl2.lstart ) + 2 FROM raw_links rl2, art WHERE rl2.from_id = art_id AND rl2.pos = 2 AND locate( '[[', art_text, rl2.lstart ) > 0; INSERT INTO raw_links (pos, from_id, lstart) SELECT pos+1, art_id, locate( '[[', art_text, rl2.lstart ) + 2 FROM raw_links rl2, art WHERE rl2.from_id = art_id AND rl2.pos = 3 AND locate( '[[', art_text, rl2.lstart ) > 0; INSERT INTO raw_links (pos, from_id, lstart) SELECT pos+1, art_id, locate( '[[', art_text, rl2.lstart ) + 2 FROM raw_links rl2, art WHERE rl2.from_id = art_id AND rl2.pos = 4 AND locate( '[[', art_text, rl2.lstart ) > 0; INSERT INTO raw_links (pos, from_id, lstart) SELECT pos+1, art_id, locate( '[[', art_text, rl2.lstart ) + 2 FROM raw_links rl2, art WHERE rl2.from_id = art_id AND rl2.pos = 5 AND locate( '[[', art_text, rl2.lstart ) > 0;

... and repeat as necessary, increasing 'pos' each time.