User:Opencooper/showKanji.js

// This script shows, if found, the kanji and kana for an article // It then calls another script, bindKana.js, to clean up the display of ruby // For configuration, please see the documentation

// License: CC0

function setup { // If we're not reading an article, do nothing if (!(mw.config.get( 'wgAction' ) === 'view' && mw.config.get( 'wgIsArticle' ) && !location.search.split('oldid=')[1] && !mw.config.get("wgIsMainPage") && mw.config.get("wgContentLanguage") !== "ja")) { return; }

// Assuming that if there's no wikidata, there're no 1:1 interlanguage links, // and we don't want cases where a page links to a subsection of a jawiki // article if (wikidataId === null) { return; }

// Placeholder so other elements don't push it down later var header; if ($('#firstHeading').length) { // Vector header = $('#firstHeading'); } else if ($('.page-heading').length) { // Minerva header = $('.page-heading'); } else { console.error("showKanji.js: Couldn't find a page heading. This skin (" + mw.config.get( 'skin' ) + ") might not be supported."); return; }   header.append(" ");

// Get the Japanese label from wikidata // API docs: https://www.wikidata.org/w/api.php?action=help&modules=wbgetentities $.ajax({       url: "https://www.wikidata.org/w/api.php",        data: {            action: "wbgetentities",            ids: wikidataId,            props: "labels",            languages: "ja",            format: "json",            origin: "*"        },        success: parseJaLabel    }); }

function parseJaLabel(response) { var wikidataInfo = response.entities[wikidataId]; var jaLabel; if (!jQuery.isEmptyObject(wikidataInfo.labels.ja)) { jaLabel = wikidataInfo.labels.ja.value; }

if (jaLabel) { jaLabel = jaLabel.toHalfWidth; buildRegexes(jaLabel); displayKanji(jaLabel); } else { return; }

// If the japanese title is not just only kana, get the reading if (!kanjiRegexes.kanaOnly.test(jaLabel)) { requestKana; } }

function buildRegexes(kanji) { // Strip $kanji of all kanji and kana, adding whatever is left to the regex var reKanjiKana = /[\u3400-\u4DB5\u4E00-\u9FCB\uF900-\uFA6Aぁ-ゔァ-ヴー-]/g; var kanjiStripped = kanji.replace(reKanjiKana, ""); kanjiStripped += " "; // Need to add hyphen escaped since it has special behavior in regex classes kanjiStripped += "\\-"; var kanjiAuxillary = kanjiStripped.replace(/\w/g, "");

kanjiRegexes.latinOnly = /^[A-Za-z0-9\-.?!/,:;@#$%&+=*'"・ ]+$/;   kanjiRegexes.kanaOnly = new RegExp("^[ぁ-ゔァ-ヴー" + kanjiAuxillary + "]+$");    kanjiRegexes.hiraganaOnly = new RegExp("^[ぁ-ゔーA-Za-z" + kanjiAuxillary + "]+$");    kanjiRegexes.katakanaOnly = new RegExp("^[ァ-ヴーA-Za-z" + kanjiAuxillary + "]+$");

// Add midpoint for Latin in titles if (/\w/.test(kanji)) { kanjiStripped += "・"; }

var leadReBase = "([ぁ-ゔァ-ヴー" + kanjiStripped + "]+)"; var kanjiEscaped = mw.util.escapeRegExp(kanji); // Account for spaces, but ignore backslash and other misc characters var reKanjiKanaLatin = /([\u3400-\u4DB5\u4E00-\u9FCB\uF900-\uFA6Aぁ-ゔァ-ヴーA-Za-z0-9])/g; var kanjiSpaced = kanjiEscaped.replace(/ /g, " ?"); kanjiSpaced = kanjiSpaced.replace(reKanjiKanaLatin, "$1 ?");

// Add kanji to regex to make sure we're not getting the reading of some // other term kanjiRegexes.lead = new RegExp(kanjiSpaced + "[^(\n)]*?\\(" + leadReBase, "i"); // brittle }

function displayKanji(kanji) { wikidataKanji = kanji; $('#kanjiInfo').append(" " + kanji + " ");

// Add some classes so users can choose to not display for example // katakana-only kanji in their CSS if (kanjiRegexes.latinOnly.test(kanji)) { $("#kanjiInfo").addClass("kanjiInfo-latin-only"); $("#kanjiInfo").prop("title", "Japanese title in Latin script"); $("#kanjiInfo").css("display", "none"); } else if (kanjiRegexes.hiraganaOnly.test(kanji)) { $("#kanjiInfo").addClass("kanjiInfo-hiragana-only"); $("#kanjiInfo").prop("title", "Japanese title in hiragana"); } else if (kanjiRegexes.katakanaOnly.test(kanji)) { $("#kanjiInfo").addClass("kanjiInfo-katakana-only"); $("#kanjiInfo").prop("title", "Japanese title in katakana"); } else { $("#kanjiInfo").prop("title", "Japanese title in kanji"); } }

function requestKana { // API docs: https://www.wikidata.org/w/api.php?action=help&modules=wbgetclaims // We have to wholesale get all the claims instead of just one because the // kana might be present as a qualifier to another claim $.ajax({       url: "https://www.wikidata.org/w/api.php",        data: {            action: "wbgetclaims",            entity: wikidataId,            format: "json",            origin: "*"        },        success: parseKanaClaim    }); }

function parseKanaClaim(response) { var kana; var properties = { title: "P1476", nativeLabel: "P1705", officialName: "P1448", nameInNativeLanguage: "P1559" };   var nameInKana = "P1814"; // Try getting nameInKana as a qualifier to some properties for (var prop in properties) { var pnum = properties[prop]; if (response.claims[pnum]) { var kanji = response.claims[pnum][0].mainsnak.datavalue.value.text; if (kanji.replace(/ /g, "") == wikidataKanji.replace(/ /g, "")               && response.claims[pnum][0].qualifiers                && response.claims[pnum][0].qualifiers[nameInKana]) { kana = response.claims[pnum][0].qualifiers[nameInKana][0].datavalue.value; break; }   	}    }

// Try getting nameInKana as a general claim if (!kana && response.claims[nameInKana]) { prop = "nameInKana"; kana = response.claims[nameInKana][0].mainsnak.datavalue.value; }   // We couldn't find nameInKana if (!kana) { getInterlanguage; return; }

kana = kana.toHalfWidth; displayKana(kana); $("#kanjiInfo").addClass("kanjiInfo-wikidata"); $("#kanjiInfo").addClass("kanjiInfo-wikidata-" + prop); }

function getInterlanguage { var apiUrl = location.origin + "/w/api.php"; // Documentation: https://en.wikipedia.org/w/api.php?action=help&modules=query%2Blanglinks $.ajax({       url: apiUrl,        data: {            action: "query",            format: "json",            prop: "langlinks",            lllang: "ja",            titles: mw.config.get( 'wgTitle' )        },        success: function(response) {        	var pageId = mw.config.get( 'wgArticleId' );        	var page = response.query.pages[pageId];            var langlinks = page ? page.langlinks : undefined;        	var jaLabel;        	if (langlinks) {        	    jaLabel = langlinks[0]["*"];        	    jaLabel = jaLabel.replace(/(.*)#.*/, "$1"); // rm anchors        	} else {        		getWiktionary;        		return;        	}        	scrapeKana(jaLabel);        }    }); }

function scrapeKana(jaLabel) { // Get jawiki article's lead wikitext // API docs: https://www.mediawiki.org/w/api.php?action=help&modules=query%2Bextracts $.ajax({       url: "https://ja.wikipedia.org/w/api.php",        data: {            action: "query",            prop: "extracts",            format: "json",            redirects: true,            exintro: true,            exsentences: 2,            exlimit: 1,            explaintext: true,            titles: jaLabel,            origin: "*"        },        success: getFirstSentence    }); }

function getFirstSentence(response) { var responsePart = response.query.pages; // Have to split parsing into two parts since jawiki pageid is unknown var pageId = Object.keys(responsePart)[0]; var introText = responsePart[pageId].extract;

if (!introText) { console.error("showKanji.js: TextExtracts failed to get a lead for the Japanese article."); getWiktionary; return; }

var wikitext = introText.toHalfWidth;

var kana; var kanaSearch = wikitext.match(kanjiRegexes.lead); if (kanaSearch && kanaSearch.length == 2) { kana = kanaSearch[1]; } else { getWiktionary; return; }

// Rm trailing characters kana = kana.replace(/[・、 ]$/, "");

// Abort if our reading is only katakana (for non-Latin) or Latin if ((!kanjiRegexes.latinOnly.test(wikidataKanji) && kanjiRegexes.katakanaOnly.test(kana))       || kanjiRegexes.latinOnly.test(kana)) { getWiktionary; return; }

displayKana(kana); $("#kanjiInfo").addClass("kanjiInfo-jawiki"); }

// Adapted from: //    http://ilog4.blogspot.com/2015/09/javascript-convert-full-width-and-half.html //    https://stackoverflow.com/a/20488304/1995949 //    https://en.wikipedia.org/wiki/Halfwidth_and_fullwidth_forms String.prototype.toHalfWidth = function { var halfWidth = this.replace(/[\uff01-\uff5e]/g, function(s) {return String.fromCharCode(s.charCodeAt(0) - 0xFEE0)}); halfWidth = halfWidth.replace(/　/g, " "); return halfWidth; };

// We use the English Wiktionary because it has more terms and better structure function getWiktionary { // API docs: https://en.wikipedia.org/w/api.php?action=help&modules=parse $.ajax({       url: "https://en.wiktionary.org/w/api.php",        data: {            action: "parse",            format: "json",            page: wikidataKanji,            prop: "sections",            origin: "*"        },        success: findJapaneseSection    }); }

function findJapaneseSection(response) { if (response.error) { return; }   var sectionsCount = response.parse.sections.length; var sectionIndex; for (let i = 0; i < sectionsCount; i++) { var sectionHeader = response.parse.sections[i].line; if (sectionHeader == "Japanese") { sectionIndex = response.parse.sections[i].index; break; }   }    if (sectionIndex == null) { return; }

// API docs: https://en.wikipedia.org/w/api.php?action=help&modules=parse $.ajax({       url: "https://en.wiktionary.org/w/api.php",        data: {            action: "parse",            format: "json",            page: wikidataKanji,            prop: "text",            section: sectionIndex,            origin: "*"        },        success: parseWiktionary    }); }

function parseWiktionary(response) { var html = response.parse.text["*"]; var parsed = $($.parseHTML(html));

// Wiktionary adds readings as furigana var headword = parsed.find(".headword:lang(ja)").first; var seeTable = parsed.find(".Jpan ruby").first; var kanji = ""; var kana = ""; if (headword.length) { // Wiktionary already binds their kana, so we have to undo the process to get // the constituent parts, at least with the current markup var childNodes = headword[0].childNodes; for (let i = 0; i < childNodes.length; i++) { if (childNodes[i].nodeName == "RUBY") { var ruby = $(childNodes[i]); // convert back to JQuery for convenience ruby.children("rp").remove; kana += ruby.children("rt").detach.text; kanji += ruby.text; } else if (childNodes[i].nodeType == 3) { // "#text" kanji += childNodes[i].nodeValue; kana += childNodes[i].nodeValue; }       }

if (kanji != wikidataKanji) { return; } } else if (seeTable.length) { kanji = seeTable.children("rb").text; kana = seeTable.children("rt").text; } else { return; }

if (kana) { displayKana(kana); $("#kanjiInfo").addClass("kanjiInfo-wiktionary"); } }

function displayKana(kana) { $("#kanjiInfo ruby").append("" + kana + "");

// Cleanup redundant furigana with another script var kanjiOnlyRe = /^[\u3400-\u4DB5\u4E00-\u9FCB\uF900-\uFA6A]+$/; if (!kanjiOnlyRe.test(wikidataKanji)) { mw.loader.load( '//en.wikipedia.org/w/index.php?title=User:Opencooper/bindKana.js&action=raw&ctype=text/javascript' ); } }

var wikidataId = mw.config.get( 'wgWikibaseItemId' ); var wikidataKanji; var kanjiRegexes = {}; $(setup);