User:Retro/Scripts/unicategorise.js

// // Fork of User:HarJIT/Scripts/unicategorise.js // // The documentation below is copied from the original // // I believe simply importScript('User:Retro/scripts/unicategorize.js') is necessary.

// Adjust chset-* style code chart colouration to match Unicode categories. // Adds a button below the source editor. // // I am not selling this and make no guarantees of safety, fitness or that it won't mangle the content. // You are advised to double check that the scripted process has produced the desired results, and // clean up where necessary (and manually fix the more complicated cases, such as multiple mappings // or PUA mappings). // // Furthermore, this loads JavaScript code from a third-party source for identifying Unicode character // category. I cannot guarantee that this will not be compromised. Proceed at your own risk. // // Usage: mw.loader.load("//en.wikipedia.org/w/index.php?action=raw&ctype=text/javascript&title=User:HarJIT/Scripts/unicategorise.js"); // // Canonical: m:w:User:HarJIT/Scripts/unicategorise.js

( => {

String.prototype.pysplit = function { if (arguments.length == 1) { return this.split(arguments[0]); }   var temp = this.split(arguments[0], arguments[1]); var heading = temp.join(arguments[0]).length + arguments[0].length; temp[temp.length] = this.substring(heading); return temp; };

String.prototype.startswith = function (s) { return this.substring(0, s.length) == s; };

String.prototype.endswith = function (s) { return this.substring(this.length - s.length) == s; };

String.prototype.contains = function (s) { return this.indexOf(s) >= 0; };

var ezh = null; jQuery.get("https://cdn.jsdelivr.net/gh/slevithan/xregexp@2cb340a819b290c2d8638965fb7e825cfc0efbc4/tools/output/categories.js", (b) => {   b = b.pysplit("=", 1)[1].trim;    if (b.endswith(";")) {        b = b.substring(0, b.length-1).trim;    }    ezh = eval(b); // Isn't in the JSON subset, sadly. }, "text");

var yogh = {}; var doyogh = => { if (ezh === null) { setTimeout(doyogh, 500); return; }   ezh.forEach((e) => {        if (typeof e.astral != "undefined") {            yogh[e.name] = new RegExp("[" + e.bmp + "]|" + e.astral);        } else {            yogh[e.name] = new RegExp("[" + e.bmp + "]");        }    }); }; doyogh;

var CHECKMS = ""; var SHARED = "\n"; var FROM = "Invariant alphanumeric, punctuation, and control characters are shown in color."; var TO = "Non-invariant characters are shown boxed."; var SUMMARY = "regenerate colour codes based on Unicode category (script)"; var SUMMARY2 = "box nationalised codes, and regenerate colour codes based on Unicode category (script)"; var FAKEHEAD = "|-\n| width=\"4%\" |\n! width=\"6%\" | \u20140 || width=\"6%\" | \u20141\n! width=\"6%\" | \u20142 || width=\"6%\" | \u20143\n! width=\"6%\" | \u20144 || width=\"6%\" | \u20145\n! width=\"6%\" | \u20146 || width=\"6%\" | \u20147\n! width=\"6%\" | \u20148 || width=\"6%\" | \u20149\n! width=\"6%\" | \u2014A || width=\"6%\" | \u2014B\n! width=\"6%\" | \u2014C || width=\"6%\" | \u2014D\n! width=\"6%\" | \u2014E || width=\"6%\" | \u2014F"; var FAKEFOOT = "|-\n||\n!—0||—1||—2||—3||—4||—5||—6||—7||—8||—9||—A||—B||—C||—D||—E||—F";

var fix = function (inp) { var output = ""; // We need *a* colour template to begin with in order to parse it, even if   // it is the wrong one. For the places where they aren't used already. // Also some EBCDIC pages use lack of colour as a distinguishing mark, which // no longer shows up now that -letter is white. var iox = (inp.contains(SHARED))?("color-intl-box"):("color-intl"); inp = inp.replace(/\n\|\s*\|?\{\{[Cc]hset-c(?=ell|trl)/g, "\n||{{chset-c"); var doz = inp.split("{{Chset-").join("{{chset-").split("\n|{{chset-color-"); output += doz[0]; doz.slice(1).forEach((ii) => {       var i = ii;        var nombre = "";        if (i.startswith("undef")) {            output += "\n|{{chset-color-" + ii;            return; /* i.e. continue */        } else if (i.startswith("intl}}|{{chset-cell3|| ")) {            // Common in APL code pages, including EBCDIC ones.            output += "\n|{{chset-color-letter" + ii.substring(4);            return; /* i.e. continue */        } else if (i.startswith("hangups}}|{{chset-cell3|| ")) {            // What the hell (chset-color-hangups does not exist and afaik never did)            output += "\n|{{chset-color-letter" + ii.substring(7);            return; /* i.e. continue */        } else if (i.substring(0, 40).contains("l4|")) {            nombre = "4";        } else if (i.substring(0, 40).contains("l3|")) {            nombre = "3";        } else if (!i.substring(0, 40).contains("l|")) { output += "\n|{{chset-color-" + ii; return; /* i.e. continue */ }       var layout = (i.substring(0, 40).contains("chset-ctrl")) ? ("-ctrl") : ("-cell"); var iii = i.substring(0, 15); if (iii.startswith("esc")) { output += "\n|{{chset-color-" + ii; return; /* i.e. continue */ }       var hilite = ((iii.contains("-box")) ? ("-box") :                          ((iii.contains("-var")) ? ("-var") : (""))); if (hilite == "-box" && iii.contains("-box|")) { hilite += "|" + i.pysplit("-box|", 1)[1].pysplit("}", 1)[0]; }       var wlink = null; if (nombre === "" && i.pysplit("}}", 1)[1].trim.startswith("|[[", 1)[1];           wlink = i.pysplit("|", 1)[0];            i = i.pysplit("|", 2)[2];        } else {            i = i.pysplit("l" + nombre + "|", 1)[1];        }        var cpt = i.pysplit("|", 1)[0].pysplit("}}", 1)[0].trim;        var codept;        if (wlink !== null) {            // MIK being _incredibly_ helpful indeed            var codep = String.fromCodePoint(parseInt(cpt, 16));            i = i.split("}}").join("|" + codep + "}}");        }        if (cpt.trim.length === 0 && iii.startswith("ctrl")) {            // Unmapped controls, common in articles about EBCDIC variants.            cpt = "0000"; // Kludgy        }        var cpts = cpt.replace(/]*?(\/>|>[^<]*?<\/ref>)/g, " ");        cpts = cpts.replace(/\(/, "/").replace(/\)/, " ").replace(/\?/, " ");        cpts = cpts.replace("  ", " ").replace("  ", " ").replace("  ", " ").replace("  ", " ");        cpts = cpts.split("/");        var colours = new Set([]);        var checkmsg = "";        cpts.forEach((cpt) => {            cpt = cpt.trim;            if (cpt.contains(" ")) {                cpt = cpt.pysplit(" ", 1)[0];                checkmsg = CHECKMS;            }            if (cpt.match(/^[0-9a-f]+$/i)) {                codept = String.fromCodePoint(parseInt(cpt, 16));            } else {                console.log(cpt); return; /* i.e. continue */            }            if (codept.match(yogh["Co"])) {                // Private use, which could mean:                //  (a) An end-user defined character in the source encoding (which would be -misc).                //  (b) A well-defined character without a standard Unicode mapping (e.g. the Apple                //      logo in Macintosh, the Windows logo in Wingdings, the radical extender in                //      x-mac-symbol, several characters in KPS 9566 and LMBCS...).                // ==> Let a human be the judge here.                console.log(cpt); return; /* i.e. continue */            } else if (codept.match(yogh["L"])) {                if (iii.startswith("alpha")) {                    // May as well keep it like that for now...                    colours.add("-alpha");                } else {                    colours.add("-letter");                }            } else if (codept.match(yogh["N"])) {                colours.add("-digit");            } else if (codept.match(yogh["P"])) {                if (iii.startswith("ext") && (parseInt(cpt, 16) > 0x7F)) {                    // May as well keep it like that for now...                    colours.add("-ext-punct");                } else {                    colours.add("-punct");                }            } else if (codept.match(yogh["S"])) {                colours.add("-graph");            } else if (codept.match(yogh["C"])) {                colours.add("-ctrl");            } else {                colours.add("-misc");            }        });        colours = Array.from(colours);        var colour = null;        if (colours.length == 0) {            output += "\n|" + CHECKMS + "{{chset-color-" + ii;            return; /* i.e. continue */        } else if (colours.length == 1) {            colour = colours[0];        } else {            colours.forEach((col) => {                if (iii.startswith(col.substring(1))) {                    colour = col;                }            });            if (colour === null) {                output += "\n|" + CHECKMS + "{{chset-color-" + ii;                return; /* i.e. continue */            }        }        output += "\n|" + checkmsg + "{{chset-color" + colour + hilite + "}}|{{chset" + layout + nombre + "|" + i;    });    return output; };

jQuery( => {   if (jQuery("#editpage-copywarn").length) {        var butn = document.createElement("input");        butn.setAttribute("type", "button");        butn.setAttribute("value", "Fix chset-color");        var nxt = jQuery("#editpage-copywarn")[0];        nxt.parentNode.insertBefore(butn, nxt);        butn.onclick =  => {            var rprt = SUMMARY;            var txt = jQuery("#wpTextbox1")[0];            var vl = txt.value;            var fx = fix(vl);            if (fx != vl) {                if (vl.contains(SHARED)) {                    fx = fx.split(SHARED).join("").split(FROM).join(TO);                    rprt = SUMMARY2;                }                var tytl = jQuery("#firstHeading")[0].innerText.trim.substring("Editing ".length);                fx = fx.replace(FAKEHEAD, "{{chset-table-header|" + tytl + "}}");                fx = fx.replace(FAKEFOOT, "{{chset-table-footer}}"); txt.value = fx; jQuery("#wpSummary")[0].value = rprt; }       }    } });

});

// End: m:w:User:HarJIT/Scripts/unicategorise.js //