User:Phlsph7/Readability.js

/* Userscript to highlight sentences by readability */

// Set score colors for css if they have not been defined var readabilityScoreColors = readabilityScoreColors || [ 'rgb(255,96,96)', 'rgb(255,128,128)', 'rgb(255,192,128)', 'rgb(255,224,128)', 'rgb(255,255,128)', 'rgb(214,255,128)', 'rgb(171,255,128)', 'rgb(128,255,128)', 'rgb(128,255,171)', 'rgb(128,255,214)',];

// Goes through all the p-elements and splits their content into span-elements. Each span corresponds to a sentence. function divideParagraphsIntoSentences{ let paragraphs = document.querySelectorAll('.mw-parser-output > p'); // Periods are the main guide for where sentences start and end. // However, not all periods mark sentences, like in different forms of abbreviations. // Placeholders are used for exceptions. let periodPlaceholder = 'PERIOD_PLACEHOLDER'; let exceptionString = '...; Mr.; Mrs.; Dr.; Jr.; Sr.; Prof.; St.; Ave.; Corp.; Inc.; Ltd.; Co.; Gov.; Capt.; Sgt.; et al.; vs.; e.t.a.; .A.; .B.; .C.; .D.; .E.; .F.; .G.; .H.; .I.; .J.; .K.; .L.; .M.; .N.; .O.; .P.; .Q.; .R.; .S.; .T.; .U.; .V.; .W.; .X.; .Y.; .Z.; A.;  B.;  C.;  D.;  E.;  F.;  G.;  H.;  I.;  J.;  K.;  L.;  M.;  N.;  O.;  P.;  Q.;  R.;  S.;  T.;  U.;  V.;  W.;  X.;  Y.;  Z.; .a.; .b.; .c.; .d.; .e.; .f.; .g.; .h.; .i.; .j.; .k.; .l.; .m.; .n.; .o.; .p.; .q.; .r.; .s.; .t.; .u.; .v.; .w.; .x.; .y.; .z.; .a; .b; .c; .d; .e; .f; .g; .h; .i; .j; .k; .l; .m; .n; .o; .p; .q; .r; .s; .t; .u; .v; .w; .x; .y; .z; 0.0; 0.1; 0.2; 0.3; 0.4; 0.5; 0.6; 0.7; 0.8; 0.9; 1.0; 1.1; 1.2; 1.3; 1.4; 1.5; 1.6; 1.7; 1.8; 1.9; 2.0; 2.1; 2.2; 2.3; 2.4; 2.5; 2.6; 2.7; 2.8; 2.9; 3.0; 3.1; 3.2; 3.3; 3.4; 3.5; 3.6; 3.7; 3.8; 3.9; 4.0; 4.1; 4.2; 4.3; 4.4; 4.5; 4.6; 4.7; 4.8; 4.9; 5.0; 5.1; 5.2; 5.3; 5.4; 5.5; 5.6; 5.7; 5.8; 5.9; 6.0; 6.1; 6.2; 6.3; 6.4; 6.5; 6.6; 6.7; 6.8; 6.9; 7.0; 7.1; 7.2; 7.3; 7.4; 7.5; 7.6; 7.7; 7.8; 7.9; 8.0; 8.1; 8.2; 8.3; 8.4; 8.5; 8.6; 8.7; 8.8; 8.9; 9.0; 9.1; 9.2; 9.3; 9.4; 9.5; 9.6; 9.7; 9.8; 9.9. .0; .1; .2; .3; .4; .5; .6; .7; .8; .9;';	let exceptionStringSeparator = '; '; let periodExceptions = exceptionString.split(exceptionStringSeparator); let periodExceptionPlaceholders = exceptionString.split('.').join(periodPlaceholder).split(exceptionStringSeparator); for(let paragraph of paragraphs){ let textContent = paragraph.textContent.split('\r').join().split('\n').join().trim; // exclude very short paragraphs if(textContent.length > 20){ divideIntoSentences(paragraph, periodExceptions, periodExceptionPlaceholders); }	}

// remove very short sentences let sentenceElements = document.getElementsByClassName('sentence'); for(let sentenceElement of sentenceElements){ let sentenceText = getSentenceText(sentenceElement); if(sentenceText.trim.length < 10){ sentenceElement.classList.remove('sentence'); }	}

// Split the content of a p-element into span-elements. Each span corresponds to a sentence. function divideIntoSentences(paragraph, periodExceptions, periodExceptionPlaceholders){ // Loop through all the nodes inside the p-element. // Span-open-tags and close-tags are placed through code. let innerHTML = getSpanStartTag; let currentChild = paragraph.firstChild; while(currentChild){ // if it is a text node, modify it			if(currentChild.nodeType === Node.TEXT_NODE){ innerHTML += adjustTextNodes(currentChild.nodeValue, periodExceptions, periodExceptionPlaceholders); }			// if its an element, add outerHTML else if(currentChild.nodeType === Node.ELEMENT_NODE){ innerHTML += currentChild.outerHTML; }			// otherwise add nodeValue else{ innerHTML += currentChild.nodeValue; }			currentChild = currentChild.nextSibling; }

innerHTML += ' '; paragraph.innerHTML = innerHTML;

// utility function to get the code for the opening span tag function getSpanStartTag{ return ` `; }		// utility function to get the code for span tags in the middle (closing + opening) function getSpanEndAndStart(punctuation){ return punctuation + ' ' + getSpanStartTag; }		// utility function to modify text nodes // they contain the punctuation relevant for sentences function adjustTextNodes(text, periodExceptions, periodExceptionPlaceholders){ // use placeholders to remove all periods that do not mark sentences text = insertPlaceholders(text, periodExceptions, periodExceptionPlaceholders); // split using the remaining punctuation text = text.split('.').join(getSpanEndAndStart('.')) .split('!').join(getSpanEndAndStart('!')) .split('?').join(getSpanEndAndStart('?')); // use placeholders to return all periods that do not mark sentences text = removePlaceholders(text, periodExceptions, periodExceptionPlaceholders); return text; function insertPlaceholders(text, periodExceptions, periodExceptionPlaceholders){ let modifiedText = text; for(let i = 0; i < periodExceptions.length; i++){ modifiedText = modifiedText.split(periodExceptions[i]).join(periodExceptionPlaceholders[i]); }				return modifiedText; }			function removePlaceholders(text, periodExceptions, periodExceptionPlaceholders){ let modifiedText = text; for(let i = 0; i < periodExceptions.length; i++){ modifiedText = modifiedText.split(periodExceptionPlaceholders[i]).join(periodExceptions[i]); }				return modifiedText; }		}	} }

// Function to rate the readability of sentences and give them their class accordingly. function rateSentences{ // class names for different scores const scoreClasses = [ 'score-10-0', 'score-20-10', 'score-30-20', 'score-40-30', 'score-50-40', 'score-60-50', 'score-70-60', 'score-80-70', 'score-90-80', 'score-100-90']; // Loop through all sentences, add their score class and their title attribute. let sentenceElements = document.body.getElementsByClassName('sentence'); for(let sentenceElement of sentenceElements){ let sentenceText = getSentenceText(sentenceElement); let score = getSentenceScore(sentenceText); if(!isNaN(score)){ sentenceElement.title = `Score: ${score.toFixed(2)}`; sentenceElement.dataset.sentenceText = sentenceText; sentenceElement.dataset.score = score; sentenceElement.classList.add(getScoreClass(score, scoreClasses)); }		else{ sentenceElement.classList.remove('sentence'); }	}

// Add the style sheet to color the score classes. addScoreStyleSheet(scoreClasses, readabilityScoreColors);

function getScoreClass(score, scoreClasses){ let index = Math.floor(score / 10); if(index < 0){ index = 0; }		if(index > 9){ index = 9; }		return scoreClasses[index]; }

function addScoreStyleSheet(scoreClasses, readabilityScoreColors){ const style = document.createElement('style'); for(let i = 0; i < scoreClasses.length; i++){ style.innerHTML += `.${scoreClasses[i]} {background-color: ${readabilityScoreColors[i]}; } `; }		document.head.appendChild(style); } }

// Creates an overview at the top of the page // This overview shows the readability of the whole article and other information function createOverview{ // Readability depends on the number of syllables, words, and sentences let totalSyllableCount = 0; let totalPolySyllableCount = 0; let totalWordCount = 0; const sentenceElements = document.getElementsByClassName('sentence'); let totalSentenceCount = sentenceElements.length; const sentenceLengthArray = []; for(let sentenceElement of sentenceElements){ let sentenceText = getSentenceText(sentenceElement); let words = getWords(sentenceText); totalWordCount += words.length; sentenceLengthArray.push(words.length); for(let word of words){ let syllableCount = getSyllableCount(word); totalSyllableCount += syllableCount; if(syllableCount >= 3){ totalPolySyllableCount++; }		}	}	let totalReadability = getFleschKincaidReadability(totalSyllableCount, totalWordCount, totalSentenceCount); let totalGradeLevel = getFleschKincaidGradeLevel(totalSyllableCount, totalWordCount, totalSentenceCount); let totalSmogGradeLevel = getSmogGradeLevel(totalPolySyllableCount, totalSentenceCount); const mainDiv = document.getElementById('mw-content-text'); const overviewDiv = document.createElement('div'); mainDiv.insertBefore(overviewDiv, mainDiv.firstChild); const headline = document.createElement('h2'); overviewDiv.appendChild(headline); headline.innerHTML = 'Readability overview'; const tableDiv = document.createElement('div'); overviewDiv.appendChild(tableDiv); tableDiv.style.display = 'flex'; insertOverviewTable(tableDiv); insertKeyTable(tableDiv); //overviewDiv.appendChild(document.createElement('br')); insertSentenceTable(overviewDiv); function insertOverviewTable(parent){ const overviewTable = document.createElement('table'); parent.appendChild(overviewTable); overviewTable.classList.add('wikitable'); overviewTable.style.marginRight = '20px'; /*const overviewCaption = document.createElement('caption'); overviewTable.appendChild(overviewCaption); overviewCaption.innerHTML = 'Readability overview';*/ const overviewTableBody = document.createElement('tbody'); overviewTable.appendChild(overviewTableBody); addRow(overviewTableBody, 'Readability (Flesch)', totalReadability.toFixed(2)); addRow(overviewTableBody, 'Grade level (Flesch)', totalGradeLevel.toFixed(2)); addRow(overviewTableBody, 'Grade level (SMOG)', totalSmogGradeLevel.toFixed(2)); addRow(overviewTableBody, 'Sentences', totalSentenceCount); addRow(overviewTableBody, 'Words', totalWordCount); addRow(overviewTableBody, 'Syllables', totalSyllableCount); addRow(overviewTableBody, 'Average sentence length', (totalWordCount/totalSentenceCount).toFixed(2)); addRow(overviewTableBody, 'Standard deviation of sentence length', (getStandardDeviation(sentenceLengthArray)).toFixed(2)); function getStandardDeviation(numbers){ const sum = numbers.reduce(function(sum, number){return sum + number;}); const mean = sum / numbers.length; const varianceSum = numbers.reduce(function(sum, number){console.log(Math.pow(number - mean, 2)); return sum + Math.pow(number - mean, 2);}, 0); const variance = varianceSum/numbers.length; const standardDeviation = Math.sqrt(variance); return standardDeviation; }	}	// Key for the coloring function insertKeyTable(parent){ const keyTable = document.createElement('table'); parent.appendChild(keyTable); keyTable.classList.add('wikitable'); keyTable.innerHTML = ` Score School level  100–90 			 5th grade  90–80 			 6th grade  80–70 			 7th grade  70–60 			 8th & 9th grade  60–50 			 10th to 12th grade  50–40 			 College  40–30 			 College  30–20 			 College graduate  20–10 			 College graduate  10–0 			 College graduate `;	}	// sentence table to display all sentences ordered by lowest score function insertSentenceTable(parent){ // button to show/hide the table const sentenceTableButton = document.createElement('button'); parent.append(sentenceTableButton); sentenceTableButton.innerHTML = 'Show sentences ordered by lowest score'; sentenceTableButton.style.fontSize = "24px"; // the table itself const sentenceTable = document.createElement('table'); parent.appendChild(sentenceTable); sentenceTable.classList.add('wikitable'); sentenceTable.style.display = 'none'; const sentenceCaption = document.createElement('caption'); sentenceTable.appendChild(sentenceCaption); sentenceCaption.innerHTML = 'Sentences ordered by lowest score'; const sentenceTableBody = document.createElement('tbody'); sentenceTable.appendChild(sentenceTableBody);

// matrix to store the table values const sentenceMatrix = []; for(let sentenceElement of sentenceElements){ let sentenceText = sentenceElement.dataset.sentenceText; let score = parseFloat(sentenceElement.dataset.score); sentenceMatrix.push([sentenceText, score]); }		// sort by lowest score sentenceMatrix.sort(function(a, b){			return a[1] - b[1];		}); // loop through the matrix and add one row per index for(let i = 0; i < sentenceMatrix.length; i++){ let sentenceText = sentenceMatrix[i][0]; let score = sentenceMatrix[i][1]; addRow(sentenceTableBody, sentenceText, score.toFixed(2)); }		// show/hide function of the button sentenceTableButton.onclick = function{ if(sentenceTableButton.innerHTML.includes('Show')){ sentenceTable.style.display = ''; sentenceTableButton.innerHTML = sentenceTableButton.innerHTML.split('Show').join('Hide'); }			else{ sentenceTable.style.display = 'none'; sentenceTableButton.innerHTML = sentenceTableButton.innerHTML.split('Hide').join('Show'); }		};	}	// utility function to add rows to a table function addRow(tableBody, name, value){ let row = document.createElement('tr'); let nameCell = document.createElement('td'); nameCell.innerHTML = name; row.appendChild(nameCell); let valueCell = document.createElement('td'); valueCell.innerHTML = value; row.appendChild(valueCell); tableBody.appendChild(row); } }

// Utility function to extract the text from a sentence element function getSentenceText(sentenceElement){ // hide references and certain templates let refs = sentenceElement.querySelectorAll('.reference, .Inline-Template'); for(let ref of refs){ ref.style.display = 'none'; }	// the innerText attribute ignores hidden elements let sentenceText = sentenceElement.innerText; // show them again for(let ref of refs){ ref.style.display = ''; }	// formatting if(sentenceText[0] == '"'){		sentenceText = sentenceText.substring(1);	}	sentenceText = sentenceText.trim;	return sentenceText; }

// utility function to get the readability score of a sentence function getSentenceScore(sentenceText){ let syllableCount = 0; let words = getWords(sentenceText); for(let word of words){ syllableCount += getSyllableCount(word); }	let wordCount = words.length; let score = getFleschKincaidReadability(syllableCount, wordCount, 1); return score; }

// utility function: this is the main metric function getFleschKincaidReadability(syllableCount, wordCount, sentenceCount){ let wordsPerSentence = wordCount / sentenceCount; let syllablesPerWord = syllableCount / wordCount; let score = 206.835 - (1.015 * wordsPerSentence) - (84.6 * syllablesPerWord); return score; }

// utility function: this shows the grade level and is used for the overview function getFleschKincaidGradeLevel(syllableCount, wordCount, sentenceCount){ let wordsPerSentence = wordCount / sentenceCount; let syllablesPerWord = syllableCount / wordCount; let score = (0.39 * wordsPerSentence) + (11.8 * syllablesPerWord) - 15.59; return score; }

// utility function: get SMOG readability for the overview function getSmogGradeLevel(polySyllableCount, sentenceCount){ let smogGradeLevel = 1.0430 * Math.sqrt(polySyllableCount * 30 / sentenceCount) + 3.1291; return smogGradeLevel; }

// utility function to extract words from a sentence function getWords(sentenceText){ let punctuation = '.?!,;:"[]{}--./&*#$%@+-=<>|~^\\' + "'";	for(let character of punctuation){		sentenceText = sentenceText.split(character).join('');	}	sentenceText = sentenceText.trim;	sentenceText = fullReplace(sentenceText, ' ', ' ');	let words = sentenceText.split(' ');	words = words.filter(function(word) {		return word.length > 0;	});	return words; }

// utility function to count the syllables of a word function getSyllableCount(word){ word = word.toLowerCase; if (word.endsWith('e')) { word = word.slice(0, -1); }	word = word.split('e').join('a') .split('i').join('a') .split('o').join('a') .split('u').join('a') .split('y').join('a') .split('e').join('a'); word = fullReplace(word, 'aa', 'a'); let syllableCount = word.split('a').length - 1; if(syllableCount < 1){ syllableCount = 1; }	return syllableCount; }

// utility function to iteratively replace a string until no more occurrences are found function fullReplace(string, oldSubstring, newSubstring){ let newString = string; while(newString.includes(oldSubstring)){ newString = newString.split(oldSubstring).join(newSubstring); }	return newString; }

// anonymous main function (function{	// restrict script to mainspace, userspace, wikipedia, help, and draftspace	const namespaceNumber = mw.config.get('wgNamespaceNumber');	const allowedNamespaces = [0, 2, 4, 12, 118];	if (allowedNamespaces.indexOf(namespaceNumber) != -1) {		// add a link to the toolbox		$.when(mw.loader.using('mediawiki.util'), $.ready).then(function { var portletlink = mw.util.addPortletLink('p-tb', '#', 'Readability'); // run the main function when the link is clicked portletlink.onclick = function(e) { e.preventDefault; divideParagraphsIntoSentences; rateSentences; createOverview; };		});	} });