User:Trey314159/homoglyphHunter.js

// Latin-to-Cyrillic mapping var Lat2CyrMap = { 'a':'а', 'A':'А', 'ă':'ӑ', 'Ă':'Ӑ', 'ä':'ӓ', 'Ä':'Ӓ', 'æ':'ӕ', 'Æ':'Ӕ', 'B':'В', 'c':'с', 'C':'С', 'ç':'ҫ', 'Ç':'Ҫ', 'e':'е', 'E':'Е', 'è':'ѐ', 'È':'Ѐ', 'ë':'ё', 'Ë':'Ё', 'ĕ':'ӗ', 'Ĕ':'Ӗ', 'ə':'ә', 'Ə':'Ә', 'H':'Н', 'i':'і', 'I':'І', 'ï':'ї', 'Ï':'Ї', 'j':'ј', 'J':'Ј', 'k':'к', 'K':'К', 'M':'М', 'o':'о', 'O':'О', 'ö':'ӧ', 'Ö':'Ӧ', 'p':'р', 'P':'Р', 'Q':'Ԛ', 's':'ѕ', 'S':'Ѕ', 'T':'Т', 'W':'Ԝ', 'x':'х', 'X':'Х', 'y':'у', 'Y':'У', 'ȳ':'ӯ', 'ÿ':'ӱ', 'á':'а́', 'é':'е́', 'í':'і́', 'ó':'о́', 'ý':'у́', 'ħ':'ћ', 'ɜ':'з' };

var EncErrMap = {'ц':'ö', 'ч':'ç', 'у':'ã', 'б':'á', 'ж':'æ'};

var Cyr2LatMap = {};

// invert Lat2CyrMap to Cyr2LatMap and strip keys of length > 1 in both directions invertAndLengthFilter(Lat2CyrMap, Cyr2LatMap);

// define patterns and regexes for matching all chars in script, or just homoglyphs var LatAllPat = 'A-Za-zÀ-ɏɐ-ʯ'; var LatHomoglyphPat = Object.keys(Lat2CyrMap).join(''); var LatAllRegex = new RegExp("[" + LatAllPat + "]+", "g"); var LatOneRegex = new RegExp("[" + LatAllPat + "]", "g"); var LatHomoglyphRegex = new RegExp("[" + LatHomoglyphPat + "]+", "g");

var CyrAllPat = 'Ѐ-ԯ'; var CyrHomoglyphPat = Object.keys(Cyr2LatMap).join(''); var CyrAllRegex = new RegExp("[" + CyrAllPat + "]+", "g"); var CyrOneRegex = new RegExp("[" + CyrAllPat + "]", "g"); var CyrHomoglyphRegex = new RegExp("[" + CyrHomoglyphPat + "]+", "g");

var insourcePat = "/[" + CyrAllPat + LatAllPat + "]*([" + CyrAllPat + "][" + LatAllPat + "]|[" + LatAllPat + "][" + CyrAllPat + "])[" + CyrAllPat + LatAllPat + "]*/";

// Config var viceversa = 1; var sortbyscore = 1; var limitresults = 50;

var slowFetch = 0; var startTime = '';

var letsGo = "\	Options: \	Looking for Latin words with Cyrillic characters.\	Vice Versa:  [Wait for it...]  predominantly Cyrillic words.\	Sort: Sort by  [wait for it...] . (Magic score puts impactful, more obviously correctable results first.)\	 \	Let's go!";

function initialize_HHunter {

if ($("#HHContainer").length === 0){ var div = document.createElement('div'); div.setAttribute('id', 'HHContainer'); var dstyle = div.style; dstyle.position = 'fixed'; dstyle.width = "90%"; dstyle.height = "90%"; dstyle.top = "3%"; dstyle.left = "5%"; dstyle.margin = "0"; dstyle.zIndex = "1000000"; dstyle.backgroundColor = "#fefefe"; dstyle.border = "1px solid #aaa"; dstyle.overflow = "scroll"; dstyle.display = "none"; document.body.append(div); }	$('#HHContainer').css('display','inline'); $('#HHContainer').html(" ⓧ Homoglyph Hunter " + letsGo + "           ");

$("#FHOptViceVersa").html(viceversa?"Also show":"Skip"); $("#FHOptSort").html(sortbyscore?"magic score":"raw results count");

return; }

function closeHH { $('#HHContainer').css('display','none'); }

function getHHSnippets (mixedWord, theTitle) { var regexSearch = new mw.Api.get( {		action: 'query',		prop: 'revisions',		titles: theTitle,		rvprop: 'content',		format: 'json',		curtimestamp: '1',		} ); $.when( regexSearch ).then(function(article) {		var pages = article.query.pages;		var resultHTML = '';

startTime = article.curtimestamp;

var page; for (var prop in pages) { if (pages.hasOwnProperty(prop)) { page = pages[prop]; break; }			}		var articleText = page.revisions[0]["*"];

var contextPat = ".{0,75}" + mixedWord + ".{0,75}"; var contextRegex = new RegExp(contextPat, "g"); var myMatches = articleText.match(contextRegex);

if (myMatches) { var displayTitle = theTitle; var mixedWordRegex = new RegExp (mixedWord, "g"); displayTitle = displayTitle.replace(mixedWordRegex, colorizeString(mixedWord));

var latVersion = convertScript(mixedWord, Cyr2LatMap); var cyrVersion = convertScript(mixedWord, Lat2CyrMap); var encVersion = convertScript(mixedWord, EncErrMap);

resultHTML += '<font size=-1>'; if (latVersion != mixedWord) { resultHTML += '(<a style="color:blue" href=# onclick=\'fixHHArticle(this, "' + mixedWord + '","' + quoteEsc(theTitle) + '", 1)\'>fix-latn: ' + colorizeString(latVersion) + '</a>) '; }			if (cyrVersion != mixedWord) { resultHTML += '(<a style="color:red" href=# onclick=\'fixHHArticle(this, "' + mixedWord + '","' + quoteEsc(theTitle) + '", 2)\'>fix-cyrl: ' + colorizeString(cyrVersion) + '</a>) '; }			if (encVersion != mixedWord) { resultHTML += '(<a style="color:black" href=# onclick=\'fixHHArticle(this, "' + mixedWord + '","' + quoteEsc(theTitle) + '", 3)\'>fix-enc: ' + colorizeString(encVersion) + '</a>) '; }			resultHTML += '(<a href="/wiki/' + quoteEsc(theTitle) + '" target=_blank>open</a>) (<a href="/w/index.php?title=' + quoteEsc(theTitle) + '&action=edit" target=_blank>edit</a>) ' + displayTitle + ' <ol>'; for (var i = 0; i < myMatches.length; i++) { var display = myMatches[i].replace(/</g, "&lt;"); display = display.replace(/\[\[[^\]|]+]?]?|([^\s=|]+\s*=)|(&lt;[^\s|>]*>?)|https?:\/\/[^\s|]*|(\.(jpe?g|gif|png|svg|tiff|xcf|mp3|mid|ogg|flac|wav|djvu?|pdf|tab))/ig, " $& ");				display = display.replace(mixedWordRegex, " $& ");				resultHTML += '<li style="font-family:monospace">...' + display + '...</li>';				}			resultHTML += '</ol> ';			$('#HHSnippets').append(resultHTML);			}		});	return;	}

function getHHTitles( target ) {

if (slowFetch) { return; }	slowFetch = 1;

var title_target = target;

if (target.length > 2) { title_target='/' + target + '/'; }

$('#HHSnippets').html('Be careful changing text in links! ');

var titlesearch = new mw.Api.get( {		action: 'query',		list: 'search',		format: 'json',		srlimit: '50',		srsearch: 'intitle:' + title_target		} ).fail( function( code, result ) {		if ( code === "http" ) {			alert( "HTTP error: " + result.textStatus ); // result.xhr contains the jqXHR object		} else if ( code === "ok-but-empty" ) {			alert( "Error: Got an empty response from the server" );		} else {			alert( "API error: " + code );		}		return;		} ); $.when( titlesearch ).then(function(results) {		searches = results.query.search;

if (searches.length !== 0) { $('#HHSnippets').append(' Titles (' + searches.length + ') for ' + colorizeString(target) + ' '); for (var i = 0; i < searches.length; i++) { getHHSnippets(target, searches[i].title); }			}		getHHTemplates(target); });	return;	}

function getHHTemplates( target ) {

slowFetch = 1;

var templatesearch = new mw.Api.get( {		action: 'query',		list: 'search',		format: 'json',		srlimit: '50',		srsearch: 'template:"' + target + '"'		} ).fail( function( code, result ) {		if ( code === "http" ) {			alert( "HTTP error: " + result.textStatus ); // result.xhr contains the jqXHR object		} else if ( code === "ok-but-empty" ) {			alert( "Error: Got an empty response from the server" );		} else {			alert( "API error: " + code );		}		return;		} ); $.when( templatesearch ).then(function(results) {		searches = results.query.search;

if (searches.length != 0) { $('#HHSnippets').append(' Templates (' + searches.length + ') for ' + colorizeString(target) + ' '); for (var i = 0; i < searches.length; i++) { getHHSnippets(target, searches[i].title); }			}		getHHFullText(target); });	return;	}

function getHHFullText( target ) {

slowFetch = 1;

var fulltextsearch = new mw.Api.get( {		action: 'query',		list: 'search',		format: 'json',		srlimit: '50',		srsearch: 'insource:' + target		} ).fail( function( code, result ) {		if ( code === "http" ) {			alert( "HTTP error: " + result.textStatus ); // result.xhr contains the jqXHR object		} else if ( code === "ok-but-empty" ) {			alert( "Error: Got an empty response from the server" );		} else {			alert( "API error: " + code );		}		return;		} ); $.when( fulltextsearch ).then(function(results) {		searches = results.query.search;

if (searches.length != 0) { $('#HHSnippets').append(' Full-Text Results (' + searches.length + ') for ' + colorizeString(target) + ' '); for (var i = 0; i < searches.length; i++) { getHHSnippets(target, searches[i].title); }			}

slowFetch = 0;

});

return; }

function fixHHArticle( linkElem, mixedWord, theTitle, direction ) { theTitle = quoteUnesc(theTitle); $(linkElem).attr('onclick',''); $(linkElem).css('display', 'none');

//Get content of article new mw.Api.get( {		action: 'query',		titles: theTitle,		prop: [ 'revisions', 'info' ],		rvprop: 'content',		indexpageids: 1,		rawcontinue: ''	} ).done( function( result ) {		var artID = result.query.pageids;		var artContents = result.query.pages[ artID ].revisions[ 0 ][ '*' ];		var mixedWordRegex = new RegExp (mixedWord, "g");

var displayMixedWord = ''; var fixMsg = '';

if (direction == 3) { // Encoding Error var reEncoded = convertScript(mixedWord, EncErrMap); artContents = artContents.replace(mixedWordRegex, reEncoded ); fixMsg = 'fix encoding error: ' + mixedWord + ' → ' + reEncoded; }		else if (direction == 2) { // Latin to Cyrillic artContents = artContents.replace(mixedWordRegex, convertScript(mixedWord, Lat2CyrMap) ); displayMixedWord = mixedWord.replace(LatHomoglyphRegex, "[$&]"); fixMsg = 'fix homoglyphs: convert Latin characters in ' + displayMixedWord + ' to Cyrillic'; }		else { // Cyrillic to Latin artContents = artContents.replace(mixedWordRegex, convertScript(mixedWord, Cyr2LatMap) ); displayMixedWord = mixedWord.replace(CyrHomoglyphRegex, "[$&]"); fixMsg = 'fix homoglyphs: convert Cyrillic characters in ' + displayMixedWord + ' to Latin'; }

new mw.Api.postWithToken( 'edit', {			action: 'edit',			title: theTitle,			text: artContents,			summary: fixMsg,			minor: '1',			starttimestamp: startTime,		} ).done( function( result, jqXHR ) {			$(linkElem).after("<b style='font-size:80%'>FIXED</b>");			return;		} ).fail( function( code, result ) {			if ( code === "http" ) {				alert( "HTTP error: " + result.textStatus ); // result.xhr contains the jqXHR object			} else if ( code === "ok-but-empty" ) {				alert( "Error: Got an empty response from the server" );			} else {				alert( "API error: " + code );			}			$(linkElem).after("<b style='font-size:80%'>ERROR</b>");			return;		} );

} ).fail( function( code, result ) { if ( code === "http" ) { alert( "HTTP error: " + result.textStatus ); // result.xhr contains the jqXHR object } else if ( code === "ok-but-empty" ) { alert( "Error: Got an empty response from the server" ); } else { alert( "API error: " + code ); }		$(linkElem).after("<b style='font-size:80%'>ERROR</b>"); return; } );	}

function findHomoglyphs { if (slowFetch) { return; }	slowFetch = 1;

$('#HHStatus').html("Fetching data... this can take 30 seconds or more.");

var regexSearch = new mw.Api.get( {		action: 'query',		list: 'search',		format: 'json',		srlimit: '10000',		srsearch: 'insource:' + insourcePat		} );

$.when( regexSearch ).then(function(x) {

var matches = {}; var re = / (.*?)<\/span>/g; var m;

if (x.query.search.length == 0) { $('#HHMixedWords').html("Nothing found."); return; }

for (var i = 0; i < x.query.search.length; i++) { var snip = x.query.search[i].snippet;

while (m = re.exec(snip)) { if (typeof matches[m[1]] == 'undefined') { matches[m[1]] = (m[1].match(LatOneRegex) || []).length / m[1].length; if (viceversa == 1 && sortbyscore == 1 && matches[m[1]] < 0.5) { matches[m[1]] = (m[1].match(CyrOneRegex) || []).length / m[1].length; }					}				}			}		var terms = Object.keys(matches).sort(function(a, b) {			return matches[b] - matches[a];			});

var artCountPromises = []; var mwapi = new mw.Api; for (var i = 0; i < terms.length; i++) { artCountPromises.push( mwapi.get( { action: 'query', list: 'search', format: 'json', srlimit: '1', srsearch: 'insource:' + terms[i] } ) ); }

var count = []; var score = []; $.when ( ...artCountPromises ).then(function {			var results = arguments;			var resultHTML = '';			for (var i = 0; i < results.length; i++) {				count[terms[i]] = results[i][0].query.searchinfo.totalhits;				// give some weight to score, but more to Latin-ness, with a small x/1000 addition to sort 0-count items properly				score[terms[i]] = Math.log10(count[terms[i]] + 1) * matches[terms[i]] * matches[terms[i]] + (matches[terms[i]]/1000);				}

terms = terms.sort(function(a, b) {				if (sortbyscore) {					return score[b] - score[a];					}				return count[b] - count[a];				});

for (var i = 0; i < terms.length; i++) { if (score[terms[i]] <= 0 && viceversa == 0) { continue; }				if (viceversa == 1 || matches[terms[i]] >= 0.5) { var display = colorizeString(terms[i]); if (resultHTML) { resultHTML += ' — '; }					resultHTML += "<a href='#' style='color:black' onclick='copyToClipboard(\"" + terms[i] + "\"); getHHTitles(\"" + terms[i] + "\")'>" + display + "</a> (" + count[terms[i]] +						// "/" + matches[terms[i]] + "/" + score[terms[i]] +						")"; }				}

if ('' === resultHTML) { resultHTML = "Nothing found."; }

$('#HHStatus').html(''); $('#HHMixedWords').html(resultHTML);

} );

slowFetch = 0; } );

return; }

function colorizeString(str) { var str2 = [];

for (var i = 0; i < str.length; i++) { if (str[i].match(LatOneRegex)) { // Latin if (str[i].match(LatHomoglyphRegex)) { // Latin homoglyph str2.push(" " + str[i] + " ") }			else { str2.push(" " + str[i] + " ") }			}		else if (str[i].match(CyrOneRegex)) { // Cyrillic if (str[i].match(CyrHomoglyphRegex)) { // Cyrillic homoglyph str2.push(" " + str[i] + " ") }			else { str2.push(" " + str[i] + " ") }			}		else { // Hmm, what's this? str2.push(str[i]); }		}	str2 = str2.join('');

return str2; }

// copy a string to the clipboard function copyToClipboard(string) { var $temp = $(" "); $("body").append($temp); $temp.val(string).select; document.execCommand("copy"); $temp.remove; }

// invert one map into another; in both directions remove keys (but not values) with length > 1 function invertAndLengthFilter(src, dest) { for (var key in src) { var cyr = src[key]; if (cyr.length == 1) { dest[cyr] = key; }		if (key.length > 1) { delete src[key]; }		}	}

// map all available characters in string from one script to another based on a given map function convertScript(str, map) { var str2 = [];

for (var i = 0; i < str.length; i++) { str2.push( map[str[i]] || str[i] ); }	str2 = str2.join('');

return str2; }

// html encode quotes function quoteEsc (theString) { theString = theString.replace(/'/g, "%27"); theString = theString.replace(/"/g, "%22");	return theString;	}

// html decode quotes function quoteUnesc (theString) { theString = theString.replace(/%27/g, "'"); theString = theString.replace(/%22/g, '"');	return theString;	}

// when everything is loaded, add the Homoglyph Hunter link $.when( mw.loader.using( ['mediawiki.util']), $.ready ).then( function {   var portletLink = mw.util.addPortletLink( 'p-tb', '#', 'Homoglyph Hunter' );    $( portletLink ).click( function ( e ) { e.preventDefault; initialize_HHunter; });	});