Wikipedia:Odd links/SQL

This page contains information on how to regenerate the Odd links report.

DROP TABLE IF EXISTS oddlinks;

CREATE TABLE oddlinks ( title varchar(255) BINARY NOT NULL,  link varchar(255) BINARY NOT NULL,  from_ns int(8) unsigned NOT NULL,  to_ns int(8) unsigned NOT NULL );

DROP TABLE IF EXISTS namespaces;

CREATE TABLE namespaces ( ns int(8) unsigned NOT NULL,  prefix varchar(255) BINARY NOT NULL );

INSERT INTO namespaces VALUES ( 0, '' ); INSERT INTO namespaces VALUES ( 1, 'Talk' ); INSERT INTO namespaces VALUES ( 2, 'User' ); INSERT INTO namespaces VALUES ( 3, 'User Talk' ); INSERT INTO namespaces VALUES ( 4, 'Wikipedia' ); INSERT INTO namespaces VALUES ( 5, 'Wikipedia Talk' ); INSERT INTO namespaces VALUES ( 6, 'File' ); INSERT INTO namespaces VALUES ( 7, 'File Talk' ); INSERT INTO namespaces VALUES ( 8, 'MediaWiki' ); INSERT INTO namespaces VALUES ( 9, 'MediaWiki Talk' ); INSERT INTO namespaces VALUES ( 10, 'Template' ); INSERT INTO namespaces VALUES ( 11, 'Template Talk' ); INSERT INTO namespaces VALUES ( 12, 'Help' ); INSERT INTO namespaces VALUES ( 13, 'Help Talk' ); INSERT INTO namespaces VALUES ( 14, 'Category' ); INSERT INTO namespaces VALUES ( 15, 'Category Talk' ); INSERT INTO namespaces VALUES ( 100, 'Portal' ); INSERT INTO namespaces VALUES ( 101, 'Portal Talk' );

// Capture links from namespace 0 to various other namespaces // This is done in sections as each million rows take around 3 minutes to process

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 000000 AND 999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 1000000 AND 1999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 2000000 AND 2999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 3000000 AND 3999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 4000000 AND 4999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 5000000 AND 5999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 6000000 AND 6999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 7000000 AND 7999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 8000000 AND 8999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 9000000 AND 9999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 10000000 AND 10999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 11000000 AND 11999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 12000000 AND 12999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 13000000 AND 13999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 14000000 AND 14999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 15000000 AND 15999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 16000000 AND 16999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 17000000 AND 17999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 18000000 AND 18999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 19000000 AND 19999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 20000000 AND 20999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 21000000 AND 21999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 22000000 AND 22999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 23000000 AND 23999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 24000000 AND 24999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 25000000 AND 25999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 26000000 AND 26999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 27000000 AND 27999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 28000000 AND 28999999; INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 29000000 AND 29999999;

// Do not consider pages with 'under construction' notices on them

DROP TABLE IF EXISTS pages_under_contruction;

CREATE TABLE pages_under_contruction AS SELECT p.page_title FROM enwiki_p.page p  INNER JOIN enwiki_p.templatelinks t ON p.page_id = t.tl_from WHERE t.tl_title IN ( 'Underconstruction','UnderConstruction','Under_construction','Under_Construction','Construction','Undercon','UnderCon' ) AND  t.tl_namespace = 10;

SELECT concat( '* ', o.title, ' ? ', n.prefix, ':', o.link, '' ) FROM oddlinks o INNER JOIN namespaces n ON o.to_ns = n.ns WHERE title IN (   SELECT *   FROM pages_under_contruction ); ORDER BY n.ns ASC, o.title ASC;

DELETE FROM oddlinks WHERE title IN (   SELECT *   FROM pages_under_contruction );

// Do not consider pages which link to User:CorenSearchBot SELECT * FROM oddlinks WHERE from_ns = 0 AND to_ns = 2 AND link = 'CorenSearchBot';

DELETE FROM oddlinks WHERE from_ns = 0 AND to_ns = 2 AND link = 'CorenSearchBot';

// Pick out signaturs in articles - paired links to User: and matching User_Talk: namespaces DROP TABLE IF EXISTS sigs;

CREATE TABLE sigs AS SELECT o1.title, o1.link FROM  oddlinks o1, oddlinks o2 WHERE o1.title = o2.title AND  o1.link = o2.link AND  o1.to_ns = 2 AND  o2.to_ns = 3;

SELECT concat( '* ', title, ' ? User:', link, ' User talk:', link, '' ) FROM sigs ORDER BY 1 ASC;

DELETE FROM oddlinks WHERE EXISTS ( SELECT * FROM sigs  WHERE oddlinks.title = sigs.title  AND oddlinks.link = sigs.link ) AND oddlinks.to_ns IN ( 2, 3 );

// Report remaining odd links

SELECT concat( '* ', o.title, ' ? ', n.prefix, ':', o.link, '' ) FROM oddlinks o INNER JOIN namespaces n ON o.to_ns = n.ns ORDER BY n.ns ASC, o.title ASC;