User:Michael Devore/wiksort.js

/* * wiksort.js * Version 1.1 * Sort out unique words in Wikipedia articles to separate page * pop-ups must be enabled for the wikipedia domain * works best with Firefox due to its built-in spelling checker for textareas * * Written by Michael Devore * Comments to: http://en.wikipedia.org/wiki/User_talk:Michael_Devore * Released to the public domain var wiksGlobals = {	regularNounCutoff : 6, properNounCutoff : 6, baseApiString : mw.config.get('wgServer') + mw.config.get('wgScriptPath') + "/api.php?action=parse&format=xml&page=", wiksText : "Sort words", wiksId : "t-sort-words", wiksToolTip : "Sort unique article words" };

function wiksMain {	var wikiPage = mw.config.get('wgPageName').replace('_', ' '); //	wiksLoadPageXML(wikiPage); wiksLoadPageXML(encodeURIComponent(wikiPage)); }

function wiksLoad {	if (mw.config.get('wgCanonicalNamespace') == "Special") {		return; }	// set up the tool link if (mw.config.get('skin') != "cologneblue") {		mw.util.addPortletLink('p-tb', 'javascript: wiksMain;',			wiksGlobals.wiksText,			wiksGlobals.wiksId,			wiksGlobals.wiksToolTip); }	else {		// my favorite cologne blue skin doesn't support portlet links // place sort word link right before 'This page' quickbar section, // hopefully as last entry in previous 'Edit' section var quickEl = document.getElementById("quickbar"); var childNode = quickEl ? quickEl.firstChild : null; while (childNode) {			var tNode = childNode.firstChild; if (tNode && tNode.nodeName == "#text" && tNode.nodeValue == "This page") {				break; }			childNode = childNode.nextSibling; }		if (childNode) {			var aBR = document.createElement("BR"); var aEl = document.createElement("A"); aEl.setAttribute("href", "javascript: wiksMain;"); aEl.appendChild(document.createTextNode(wiksGlobals.wiksText)); quickEl.insertBefore(aEl, childNode); quickEl.insertBefore(aBR, childNode); }	} }

function wiksLoadPageXML(wikiPage) {	var wikiApiString = wiksGlobals.baseApiString + wikiPage; var request = new XMLHttpRequest; request.open("GET", wikiApiString, true); request.setRequestHeader("User-Agent", "Mozilla/5.0"); request.setRequestHeader("Accept", "text/xml"); request.onreadystatechange = function { if (request.readyState == 4 && request.status == 200) {			if (request.responseText) {				 wiksProcessPage(request.responseText); }		}	};	request.send(null); }

function wiksProcessPage(pText) { //	var pDoc = (new DOMParser).parseFromString(pText, "application/xml"); var pDoc; var isIE = false; try //Internet Explorer {		pDoc = new ActiveXObject("Microsoft.XMLDOM"); pDoc.async = "false"; pDoc.loadXML(pText); isIE = true; }	catch(e) {		try //Firefox, Mozilla, Opera, etc.		{ var dParser = new DOMParser; pDoc = dParser.parseFromString(pText, "application/xml"); }		catch(e) {			alert("wiksort could not process this page."); return false; }	}

var tElements = pDoc.getElementsByTagName("text"); var tElem = tElements[0]; var d = document.createElement("div"); var htmlContent = ""; var cNode = tElem.firstChild; while (cNode) {		htmlContent += cNode.nodeValue; cNode = cNode.nextSibling; }	d.innerHTML = htmlContent; var theText; if (isIE) {		theText = d.innerText; }	else {		theText = d.textContent; }	wiksFilterAndShow(theText); return true; }

function wiksFilterAndShow(articleText) {	var regNoun = new RegExp("\\b[a-z][a-zA-Zé]{" + (wiksGlobals.regularNounCutoff - 1) + ",}", "g"); propNoun = new RegExp("\\b[A-Z][a-zA-Zé]{" + (wiksGlobals.properNounCutoff - 1) + ",}", "g");

var resultLC = new Array; // initial capped/proper nouns var result1 = articleText.match(propNoun); // any noun var result2 = articleText.match(regNoun); var result; if (result1 != null) {		result = result1; if (result2 != null) {			result = result1.concat(result2); }	}	else {		result = result2; }	if (result) {		result.sort(function(x,y) { 			var a = String(x).toUpperCase; 			var b = String(y).toUpperCase; 			if (a > b) 			{				return 1;			}			if (a < b)			{				return -1;			}			return 0; 		} ); var wordCount = new Array; for (var i = 0; i < result.length; i++) {			var lcWord = result[i].toLowerCase; if (lcWord == "reverse" ||				lcWord == "constructor" ||				lcWord == "every" ||				lcWord == "reduce" ||				lcWord == "splice" ||				lcWord == "filter") {				lcWord += "_x_wiks"; }			if (wordCount[lcWord] !== undefined) {				wordCount[lcWord]++; }			else {				wordCount[lcWord] = 1; }		}		wiksShowResults(result, wordCount); } }

function wiksShowResults(wordList, wordCount) {	var feedback = ""; var feedFormat = "  "; var feedTitle = "Sorted words in " + window.location.href.match(/[^\/]+$/); feedTitle = feedTitle.replace(/_/g," "); feedFormat += feedTitle; feedFormat += ' '; feedFormat += "==" + feedTitle + '== '; var previousWord = ""; var rowCount = 1; var lastLetter = ""; for (var i = 0; i < wordList.length; i++) {		var lcWord = wordList[i].toLowerCase; var adjustWord = lcWord; if (lcWord == "reverse" ||			lcWord == "constructor" ||			lcWord == "every" ||			lcWord == "reduce" ||			lcWord == "splice" ||			lcWord == "filter") {			adjustWord += "_x_wiks"; }		var count = wordCount[adjustWord]; if (lcWord !== previousWord + "s") {			var startLetter = lcWord.slice(0,1); if (lastLetter != startLetter) {				lastLetter = startLetter; feedFormat += "\n == " + startLetter.toUpperCase+" ==\n "; rowCount += 2; }			feedback += wordList[i] + "(" + count + ") - " + (((i+1) % 10 == 0) ? "\n" : ""); feedFormat += wordList[i] + " (" + count + ") \n "; rowCount++; }		i += count - 1; previousWord = lcWord; }	feedFormat += " \n"; feedFormat += ' \n'; feedFormat += '\n'; feedFormat += 'var st=document.getElementById("sorttext");\n'; feedFormat += 'var sortSpan=document.getElementById("wiksort");\n'; feedFormat += 'st.style.width="340px";\n'; feedFormat += 'st.style.fontSize="large";\n'; feedFormat += 'st.value=(sortSpan.textContent === undefined ? sortSpan.innerText : sortSpan.textContent);\n'; feedFormat += 'sortSpan.style.display="none";\n'; feedFormat += ' '; feedFormat += " ";

if (feedback.length) {		var feedWin = null; var iCounter = 0; while ((feedWin = window.open) === null && iCounter < 10) {			iCounter++; }		if (iCounter >= 10) {			alert("You need to enable pop-ups for the Wikipedia site\n"				+ " (or the browser doesn't work with wiksort)\n"); }		var feedDoc = feedWin.document; feedDoc.open; feedDoc.write(feedFormat); feedDoc.close; }	else {		alert("wiksort found no sortable words."); } }

addOnloadHook(wiksLoad);