User:Topbanana/Reports/This page contains a link that might be mis-punctuated/SQL

The SQL below should be run on a Link Analysis Database. It creates a number of extra tables to aid in the analysis of 'close matches' for red links. - -- Create tables and copy bad link targets and article titles

CREATE TABLE bad_squish (   from_id mediumint(7) unsigned NOT NULL,    orig_title varchar(255) binary NOT NULL,    squish_title varchar(255) binary NOT NULL,

PRIMARY KEY( from_id, orig_title ) ) ENGINE=MyISAM;

CREATE TABLE art_squish (   orig_title varchar(255) binary NOT NULL,    squish_title varchar(255) binary NOT NULL,

PRIMARY KEY( orig_title ) ) ENGINE=MyISAM;

INSERT INTO art_squish SELECT art_title, art_title FROM art;

INSERT INTO bad_squish SELECT from_id, link, link FROM bad_links;

DELETE FROM art_squish WHERE Length( orig_title ) <=3;

DELETE FROM bad_squish WHERE Length( orig_title ) <=3;

-- Now progressively mangle the link targets and article titles, removing matches -- as we go.

-- Remove punctuation and double spaces

UPDATE bad_squish SET squish_title = replace( squish_title, '(',  ); UPDATE art_squish SET squish_title = replace( squish_title, '(',  );

UPDATE bad_squish SET squish_title = replace( squish_title, ')',  ); UPDATE art_squish SET squish_title = replace( squish_title, ')',  );

UPDATE bad_squish SET squish_title = replace( squish_title, '[', '' ); UPDATE art_squish SET squish_title = replace( squish_title, '[', '' );

UPDATE bad_squish SET squish_title = replace( squish_title, ']', '' ); UPDATE art_squish SET squish_title = replace( squish_title, ']', '' );

UPDATE bad_squish SET squish_title = replace( squish_title, ',', '' ); UPDATE art_squish SET squish_title = replace( squish_title, ',', '' );

UPDATE bad_squish SET squish_title = replace( squish_title, '!', '' ); UPDATE art_squish SET squish_title = replace( squish_title, '!', '' );

UPDATE bad_squish SET squish_title = replace( squish_title, '"',  ); UPDATE art_squish SET squish_title = replace( squish_title, '"',  );

UPDATE bad_squish SET squish_title = replace( squish_title, '%', '' ); UPDATE art_squish SET squish_title = replace( squish_title, '%', '' );

UPDATE bad_squish SET squish_title = replace( squish_title, '&', '' ); UPDATE art_squish SET squish_title = replace( squish_title, '&', '' );

UPDATE bad_squish SET squish_title = replace( squish_title, '*', '' ); UPDATE art_squish SET squish_title = replace( squish_title, '*', '' );

UPDATE bad_squish SET squish_title = replace( squish_title, '-', '' ); UPDATE art_squish SET squish_title = replace( squish_title, '-', '' );

UPDATE bad_squish SET squish_title = replace( squish_title, '+', '' ); UPDATE art_squish SET squish_title = replace( squish_title, '+', '' );

UPDATE bad_squish SET squish_title = replace( squish_title, '=', '' ); UPDATE art_squish SET squish_title = replace( squish_title, '=', '' );

UPDATE bad_squish SET squish_title = replace( squish_title, '{', '' ); UPDATE art_squish SET squish_title = replace( squish_title, '{', '' );

UPDATE bad_squish SET squish_title = replace( squish_title, '}', '' ); UPDATE art_squish SET squish_title = replace( squish_title, '}', '' );

UPDATE bad_squish SET squish_title = replace( squish_title, ':', '' ); UPDATE art_squish SET squish_title = replace( squish_title, ':', '' );

UPDATE bad_squish SET squish_title = replace( squish_title, ';', '' ); UPDATE art_squish SET squish_title = replace( squish_title, ';', '' );

UPDATE bad_squish SET squish_title = replace( squish_title, '@', '' ); UPDATE art_squish SET squish_title = replace( squish_title, '@', '' );

UPDATE bad_squish SET squish_title = replace( squish_title, '#', '' ); UPDATE art_squish SET squish_title = replace( squish_title, '#', '' );

UPDATE bad_squish SET squish_title = replace( squish_title, '<', '' ); UPDATE art_squish SET squish_title = replace( squish_title, '<', '' );

UPDATE bad_squish SET squish_title = replace( squish_title, '>', '' ); UPDATE art_squish SET squish_title = replace( squish_title, '>', '' );

UPDATE bad_squish SET squish_title = replace( squish_title, '.', '' ); UPDATE art_squish SET squish_title = replace( squish_title, '.', '' );

UPDATE bad_squish SET squish_title = replace( squish_title, '/', '' ); UPDATE art_squish SET squish_title = replace( squish_title, '/', '' );

UPDATE bad_squish SET squish_title = replace( squish_title, '\\', '' ); UPDATE art_squish SET squish_title = replace( squish_title, '\\', '' );

UPDATE bad_squish SET squish_title = replace( squish_title, , '' ); UPDATE art_squish SET squish_title = replace( squish_title, , '' );

ALTER TABLE bad_squish ADD INDEX ( squish_title ); ALTER TABLE art_squish ADD INDEX ( squish_title );

SELECT concat( '*',art_title,' links to ', bad_squish.orig_title, ', try ', group_concat( concat( , art_squish.orig_title,  )) ) INTO OUTFILE 'c:\punc.txt' FROM art_squish, bad_squish, art WHERE art.art_id = bad_squish.from_id AND  bad_squish.squish_title = art_squish.squish_title GROUP BY bad_squish.from_id, bad_squish.orig_title ORDER BY art_title, bad_squish.orig_title;

-