User:Proteins/readability.js

// // Assess an article's readability

function nodeText(arg_node) { var untagged_text; var unspaced_text;

untagged_text = arg_node.innerHTML; untagged_text = untagged_text.replace(/ /ig,""); // keep simple superscript text untagged_text = untagged_text.replace(/(]+)>)(.*?<\/sup>)/ig,""); // remove other superscript text untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character > untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character < untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character & untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

// THIS DIFFERS FROM ARTICLE STRUCTURE SCRIPT, BUT IT'S USEFUL FOR SYLLABLE COUNTING // since silent "e" can occur within a hyphenated word such as "fore-and-aft" untagged_text = untagged_text.replace(/[\-\–]/ig, " "); // replace hyphens and en-dashes with space;

spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/&#160;/ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespaces to single spaces

return spaced_text; } // closes coreArticleText function

function syllabicizeWord(arg_word) { var suffix_text; var text_matches; var syllable_text; var temp_syllable_text;

temp_syllable_text = arg_word; temp_syllable_text = temp_syllable_text.replace(/[\.\,\;\:\?\\\/\(\)\[\]\{\}]/g, ""); temp_syllable_text = temp_syllable_text.replace(/[^a-z0-9]/ig, ""); temp_syllable_text = temp_syllable_text.replace(/nineteen/ig, "ninteen");

// Look for standard suffixes suffix_text = ""; if (temp_syllable_text.match(/ly$/)) { temp_syllable_text = temp_syllable_text.replace(/ly$/i,""); suffix_text = "ly" + suffix_text; }	if (temp_syllable_text.match(/ment$/)) { temp_syllable_text = temp_syllable_text.replace(/ment$/i,""); suffix_text = "ment" + suffix_text; }	if (temp_syllable_text.match(/ments$/)) { temp_syllable_text = temp_syllable_text.replace(/ments$/i,""); suffix_text = "ments" + suffix_text; }	if (temp_syllable_text.match(/ness$/)) { temp_syllable_text = temp_syllable_text.replace(/ness$/i,""); suffix_text = "ness" + suffix_text; }	if (temp_syllable_text.match(/nesses$/)) { temp_syllable_text = temp_syllable_text.replace(/nesses$/i,""); suffix_text = "nesses" + suffix_text; }	if (temp_syllable_text.match(/tion$/)) { temp_syllable_text = temp_syllable_text.replace(/tion$/i,"shun"); }	if (temp_syllable_text.match(/tions$/)) { temp_syllable_text = temp_syllable_text.replace(/tions$/i,"shuns"); }

// modify terminus, such as silent e, -ed and -es syllable_text = temp_syllable_text.replace(/([aeiouy])([lr])e$/i,"$1$2"); // silent e	if (syllable_text == temp_syllable_text) { // silent e, but not the, be, see, stable, dazzle, sabre, centre or such words syllable_text = temp_syllable_text.replace(/([aeiouy])(.*)([^lr])e$/i,"$1$2$3"); }	if (syllable_text == temp_syllable_text) { syllable_text = temp_syllable_text.replace(/([^dt])ed$/i,"$1d"); // contract "-ed" }	if (syllable_text == temp_syllable_text) { syllable_text = temp_syllable_text.replace(/([^cghsxz])es$/i,"$1s"); // contract "-es" }

// Remove vowel diphthongs syllable_text = syllable_text.replace(/eau/ig,"u"); // beautiful syllable_text = syllable_text.replace(/[aeou]y/ig,"e"); // yay, fey, enjoy, guy syllable_text = syllable_text.replace(/y[aeiou]/ig,"y"); // ying-yang, yen, yodel, Yusuf, "yuns" (Pittsburghese) syllable_text = syllable_text.replace(/ie/ig,"e"); // !diary, pier, !radii, !erbium syllable_text = syllable_text.replace(/io/ig,"u"); // superior diction syllable_text = syllable_text.replace(/e[aeiu]/ig,"e"); // near, veer, vein, !aqueous, eukaryote syllable_text = syllable_text.replace(/[ao]u/ig,"u"); // Aussie, out syllable_text = syllable_text.replace(/oo/ig,"u"); // hoot syllable_text = syllable_text.replace(/ai/ig,"e"); // fair syllable_text = syllable_text.replace(/[ou]i/ig,"i"); // voice, guide syllable_text = syllable_text.replace(/oa/ig,"o"); // boat syllable_text = syllable_text.replace(/ua/ig,"a"); // square, dual syllable_text = syllable_text.replace(/oe/ig,"o"); // poem, amoeba syllable_text = syllable_text.replace(/[au]e/ig,"e"); // gaelic, aeon, heroes, question syllable_text += suffix_text;

return syllable_text; } // closes function syllabicizeWord

function countSyllablesInWord(arg_word) { var text_matches; var syllable_text; var temp_syllable_text; var num_syllables_in_word;

syllable_text = syllabicizeWord(arg_word);

// Count syllables by splitting on vowel & (0+) consonants text_matches = syllable_text.match(/[aeiouy]/ig); if (text_matches) { num_syllables_in_word = syllable_text.match(/[aeiouy]/ig).length; } else { temp_syllable_text = syllable_text.replace(/[^0-9]/, ""); if (syllable_text == temp_syllable_text) { // pure numbers count as one syllable per digit num_syllables_in_word = syllable_text.length; }	}	if (num_syllables_in_word < 1) { num_syllables_in_word = 1; }

return num_syllables_in_word; } closes function countSyllablesInWord

function articleReadability { var alert_string = ""; var diagnostic_string = ""; var read_entire_article = true;

var show_lead_diagnostics = true; var show_section_diagnostics = false; var display_individual_words = true;

var using_Internet_Explorer = false;

var spaced_text = ""; var untagged_text = ""; var stripped_text = ""; var unescaped_text = "";

var anchors; var temp_anchor; var section_name = ""; var temp_anchor_name = "";

var num_anchors = 0; var anchor_index = 0; var anchor_level = 0; var prev_anchor_level = 0; var num_H2_anchors = 0; var H2_anchor_index = 0;

var cutoff_anchor_index = 0; var cutoff_H2_anchor_index = 0; var cutoff_child_node_index = 0; var last_P_child_node_index = 0; var cutoff_element_node_index = 0;

var num_sections = 0; var section_index = 0;

var element_node; var num_element_nodes = 0; var element_node_index = 0;

var temp_node_name = "";

var parent_node; var grandparent_node; var greatgrandparent_node;

var sibling_node; var next_sibling_node;

var child_node; var child_nodes; var prev_child_node; var num_child_nodes = 0; var child_node_index = 0; var child_node_name = ""; var num_prose_counted_nodes = 0;

var grandchild_node; var grandchild_nodes; var num_grandchild_nodes = 0; var grandchild_node_index = 0;

var path_names; var file_name = "";

var num_characters = 0; var del_num_characters = 0; var temp_num_characters = 0;

var temp_word = ""; var num_words = 0; var word_count = 0; var word_index = 0; var nonempty_word_index = 0; var tentative_num_words = 0;

var syllable_text = ""; var temp_syllable_text = "";

var num_spaces = 0; var syllable_count = 0; var paragraph_count = 0; var list_item_count = 0; var prose_size_bytes = 0; var num_syllables_in_word = 0;

var total_word_count = 0; var total_syllable_count = 0; var total_paragraph_count = 0; var total_list_item_count = 0; var total_prose_size_bytes = 0;

var section_word_count = new Array; var section_syllable_count = new Array; var section_paragraph_count = new Array; var section_list_item_count = new Array; var section_prose_size_bytes = new Array;

var word_count_string = ""; var paragraph_count_string = ""; var list_item_count_string = ""; var prose_size_bytes_string = "";

var temp_paragraph; var text_paragraphs; var num_paragraphs = 0; var paragraph_index = 0;

var temp_list_item; var text_list_items; var num_list_items = 0; var list_item_index = 0;

var temp_image; var num_pixels = 0; var image_index = 0; var image_counter = 0; var num_raw_images = 0; var num_nonicon_images = 0;

var num_anchors = 0; var num_raw_links = 0; var num_raw_tables = 0; var num_raw_references = 0;

// check for Internet Explorer browser

using_Internet_Explorer = false; if (navigator.userAgent.indexOf("MSIE") > -1) { using_Internet_Explorer = true; //		alert_string = "This script works correctly in every browser — except Internet Explorer. Please be patient!" //		window.alert(alert_string); }

// Find the cutoff H2 anchor index, where we stop counting things alert_string = ""; num_H2_anchors = 0; section_name = "lead section"; prev_anchor_level = 1; //begin at the H1 heading read_entire_article = true; anchors = document.anchors; num_anchors = anchors.length; for (anchor_index=1; anchor_index<num_anchors; anchor_index++) { temp_anchor = anchors[anchor_index];

parent_node = temp_anchor.parentNode; if (!parent_node) { continue; }

sibling_node = parent_node.nextSibling; if (!sibling_node) { continue; }

// Check headings for jumps upwards in heading level anchor_level = 0; if (sibling_node.nodeName == "H1") { alert_string += " WARNING: Illegal H1 heading in this section\n"; } else if (sibling_node.nodeName == "H2") { anchor_level = 2; } else if (sibling_node.nodeName == "H3") { anchor_level = 3; } else if (sibling_node.nodeName == "H4") { anchor_level = 4; } else if (sibling_node.nodeName == "H5") { anchor_level = 5; } else { next_sibling_node = sibling_node.nextSibling; if (!next_sibling_node) { continue; }

// Check headings for jumps upwards in heading level if (next_sibling_node.nodeName == "H1") { alert_string += " WARNING: Illegal H1 heading in this section\n"; } else if (next_sibling_node.nodeName == "H2") { anchor_level = 2; } else if (next_sibling_node.nodeName == "H3") { anchor_level = 3; } else if (next_sibling_node.nodeName == "H4") { anchor_level = 4; } else if (next_sibling_node.nodeName == "H5") { anchor_level = 5; }		} // closes assignment of the anchor level, if any if (((anchor_level - prev_anchor_level) > 1) && (prev_anchor_level != 0)) { if (num_H2_anchors == 0) { alert_string += " WARNING: H" + prev_anchor_level + " to H" + anchor_level + " jump in the lead\n"; } else { alert_string += " WARNING: H" + prev_anchor_level + " to H" + anchor_level + " jump in \"" + section_name.replace(/(_+)/ig, " ") + "\"\n"; }		}		if (anchor_level > 0) { prev_anchor_level = anchor_level; }

//Check major section headings for closing sections if (anchor_level == 2) { num_H2_anchors++; section_name = temp_anchor.name; temp_anchor_name = temp_anchor.name;

alert_string += "Section " + num_H2_anchors + " : " + section_name.replace(/(_+)/ig, " ") + "\n"; //			alert_string += "Section " + num_H2_anchors + " : " + section_name.replace(/(_+)/ig, " ") + " " + temp_anchor.parentNode.nodeName + " " + sibling_node.nodeName + "\n";

temp_anchor_name = temp_anchor_name.replace(/:$/ig,""); // eliminate colons at end temp_anchor_name = temp_anchor_name.replace(/s$/ig,""); // eliminate plurals at end temp_anchor_name = temp_anchor_name.replace(/See_also/ig,""); temp_anchor_name = temp_anchor_name.replace(/Related_topic/ig,""); temp_anchor_name = temp_anchor_name.replace(/Related_article/ig,""); temp_anchor_name = temp_anchor_name.replace(/Further_reading/ig,""); temp_anchor_name = temp_anchor_name.replace(/External_link/ig,""); temp_anchor_name = temp_anchor_name.replace(/Footnote/ig,""); temp_anchor_name = temp_anchor_name.replace(/Note/ig,""); temp_anchor_name = temp_anchor_name.replace(/Reference/ig,""); temp_anchor_name = temp_anchor_name.replace(/Citation/ig,""); temp_anchor_name = temp_anchor_name.replace(/Source/ig,""); temp_anchor_name = temp_anchor_name.replace(/Link/ig,""); temp_anchor_name = temp_anchor_name.replace(/s([_\s]+)and([_\s]+)/ig,""); temp_anchor_name = temp_anchor_name.replace(/([_\s]+)and([_\s]+)/ig,""); temp_anchor_name = temp_anchor_name.replace(/([_\s]+)/ig,""); if (temp_anchor_name == "") { break; }

//			diagnostic_string = "Section " + num_H2_anchors + " : " + temp_anchor_name + " L: " + temp_anchor_name.length; //			window.alert(diagnostic_string);

} // closes check for H2 anchor } // closes loop over the anchors cutoff_anchor_index = anchor_index; cutoff_H2_anchor_index = num_H2_anchors; if (cutoff_anchor_index < num_anchors) { read_entire_article = false; alert_string += "\nReadability analysis will cover the text before the \"" + temp_anchor.name.replace(/(_+)/ig, " ") + "\" section.\n"; } else { read_entire_article = true; alert_string += "\nReadability analysis will cover the entire article.\n"; }	window.alert(alert_string);

// Count child and element nodes alert_string = ""; num_element_nodes = 0; child_nodes = document.getElementById("bodyContent").childNodes; num_child_nodes = child_nodes.length; //	if (num_child_nodes > 40) { num_child_nodes = 40;} // truncate loop for testing

for (child_node_index=0; child_node_index < num_child_nodes; child_node_index++) { child_node = child_nodes[child_node_index]; if (child_node.nodeType != 1) { //			alert_string += "Child node " + child_node_index + " : " + child_node.nodeName + "\n"; continue; } // examine only Element nodes num_element_nodes++; //		alert_string += "Element node " + num_element_nodes + " : " + child_node.nodeName + "\n"; } // closes loop counting the element nodes //	window.alert(alert_string);

// Determine the corresponding childNode index cutoff alert_string = ""; if (read_entire_article == true) { cutoff_child_node_index = num_child_nodes; cutoff_element_node_index = num_element_nodes; } else { H2_anchor_index = 0; element_node_index = 0; last_P_child_node_index = -1; last_P_element_node_index = -1; for (child_node_index=0; child_node_index < num_child_nodes; child_node_index++) { child_node = child_nodes[child_node_index]; if (child_node.nodeType != 1) { continue; } // examine only Element nodes element_node_index++;

if (child_node.nodeName == "P") { last_P_child_node_index = child_node_index; last_P_element_node_index = num_element_nodes; } else if (child_node.nodeName == "H2") { H2_anchor_index++; if (H2_anchor_index == cutoff_H2_anchor_index) { cutoff_child_node_index = last_P_child_node_index; cutoff_element_node_index = last_P_element_node_index; break; }			} //			alert_string += "Section " + H2_anchor_index + ", Element node " + num_element_nodes + " : " + child_node.nodeName + " " + child_node.childNodes.length + "\n"; //			if (num_element_nodes > 45) { break; } // for debugging } // closes loop over the childNodes of the Document if (last_P_child_node_index < 0) { // if no cutoff was discovered; should never happen cutoff_child_node_index = num_child_nodes; cutoff_element_node_index = num_element_nodes; }	} // closes check whether to read entire article alert_string = "\nThe child_node_index and element_node_index cutoffs are " + cutoff_child_node_index + " and " + cutoff_element_node_index + ", respectively.\n"; //	window.alert(alert_string);

// Count the paragraphs, words, syllables and prose size bytes by section word_count = 0; syllable_count = 0; paragraph_count = 0; list_item_count = 0; prose_size_bytes = 0; num_prose_counted_nodes = 0;

H2_anchor_index = 0; for (child_node_index=0; child_node_index < cutoff_child_node_index; child_node_index++) { child_node = child_nodes[child_node_index]; if (child_node.nodeType != 1) { continue; } // examine only Element nodes element_node_index++;

if (child_node.nodeName == "H2") { section_word_count.push(word_count); section_syllable_count.push(syllable_count); section_paragraph_count.push(paragraph_count); section_list_item_count.push(list_item_count); section_prose_size_bytes.push(prose_size_bytes);

H2_anchor_index++; word_count = 0; syllable_count = 0; paragraph_count = 0; list_item_count = 0; prose_size_bytes = 0; }

// if the child node meets the criteria, add to the prose size, word and paragraph counts if ((child_node.nodeName == "P") || (child_node.nodeName == "PRE")) {

/*			untagged_text = child_node.innerHTML; untagged_text = untagged_text.replace(/ /ig,""); // keep simple superscript text untagged_text = untagged_text.replace(/(]+)>)(.*?<\/sup>)/ig,""); // remove superscript text untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character > untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character < untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character & untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space untagged_text = untagged_text.replace(/[\-\–]/ig, " "); // replace hyphens and en-dashes with space; THIS DIFFERS FROM STRUCTURE SCRIPT, BUT IT'S USEFUL FOR SYLLABLE COUNTING

spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/&#160;/ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

// try out new function spaced_text = nodeText(child_node);

words = spaced_text.split(' '); tentative_num_words = words.length; if (tentative_num_words > 0) { // verify that the paragraph contributes text num_words = 0; num_characters = 0; for (word_index=0; word_index 0) { num_words++; num_characters += del_num_characters; }				}				if (num_words > 0) { paragraph_count++; num_prose_counted_nodes++; word_count += num_words; prose_size_bytes += num_characters; num_spaces = num_words - 1; prose_size_bytes += num_spaces; // add spaces to character count child_node.style.cssText = "background-color:yellow"; // Code for testing output if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { continue; }					diagnostic_string = ""; nonempty_word_index = 0; temp_num_characters = 0; temp_num_syllables = 0; for (word_index=0; word_index45) && (display_individual_words)) { window.alert(diagnostic_string); diagnostic_string = "Continued from previous screen:\n\n"; }						temp_word = words[word_index]; del_num_characters = temp_word.length; if (del_num_characters > 0) { nonempty_word_index++; temp_num_characters += del_num_characters;

syllable_text = syllabicizeWord(temp_word);

// Count syllables by splitting on vowel & (0+) consonants text_matches = syllable_text.match(/[aeiouy]/ig); if (text_matches) { num_syllables_in_word = syllable_text.match(/[aeiouy]/ig).length; } else { temp_syllable_text = syllable_text.replace(/[^0-9]/, ""); if (syllable_text == temp_syllable_text) { // pure numbers count as one syllable per digit num_syllables_in_word = syllable_text.length; }							}

if (num_syllables_in_word < 1) { num_syllables_in_word = 1; }

num_syllables_in_word = countSyllablesInWord(temp_word);

//							diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + " " + temp_num_characters + "  syllables " + num_syllables_in_word + "\n"; diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " |" + syllable_text + "| syllables " + num_syllables_in_word + "\n"; }					}					temp_num_characters += num_spaces; diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n"; if (display_individual_words) { diagnostic_string += "\nEND of paragraph " + paragraph_count + " of Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; window.alert(diagnostic_string); }				} // closes check for non-empty paragraph } // tentative check for words } else if ((child_node.nodeName == "UL") || (child_node.nodeName == "OL")) { // unordered and ordered lists grandchild_nodes = child_node.childNodes; // not all LI elements because of possible nesting num_grandchild_nodes = grandchild_nodes.length; for (grandchild_node_index=0; grandchild_node_index]+)>)(.*?<\/sup>)/ig,""); // remove superscript text untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character > untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character < untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character & untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/&#160;/ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

words = spaced_text.split(' '); tentative_num_words = words.length; if (tentative_num_words > 0) { // verify that the list item contributes text num_words = 0; num_characters = 0; for (word_index=0; word_index 0) { num_words++; num_characters += del_num_characters; }						}						if (num_words > 0) { list_item_count++; num_prose_counted_nodes++; word_count += num_words; prose_size_bytes += num_characters; num_spaces = num_words - 1; prose_size_bytes += num_spaces; // add spaces to character count child_node.style.cssText = "background-color:yellow";

// Code for testing output if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { continue; }							diagnostic_string = ""; nonempty_word_index = 0; temp_num_characters = 0; for (word_index=0; word_index45) && (display_individual_words)) { window.alert(diagnostic_string); diagnostic_string = "Continued from previous screen:\n\n"; }								temp_word = words[word_index]; del_num_characters = temp_word.length; if (del_num_characters > 0) { nonempty_word_index++; temp_num_characters += del_num_characters; diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", List item " + list_item_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n"; }							}							temp_num_characters += num_spaces; diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n"; if (display_individual_words) { diagnostic_string += "\nEND of list item " + list_item_count + " of Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; window.alert(diagnostic_string); }						} // closes check for non-empty list item } // tentative check for words } // closes check for a list item (LI) node } // closes loop over grandchild nodes of an ordered (OL) or unordered (UL) list } else if (child_node.nodeName == "DL") { // discursive lists grandchild_nodes = child_node.childNodes; num_grandchild_nodes = grandchild_nodes.length; for (grandchild_node_index=0; grandchild_node_index 0) { temp_node_name = grandchild_node.childNodes[0].nodeName; if ((temp_node_name == "DIV") || (temp_node_name == "SPAN")) { continue; } }					if (grandchild_node.childNodes.length > 1) { temp_node_name = grandchild_node.childNodes[1].nodeName; if ((temp_node_name == "DIV") || (temp_node_name == "SPAN")) { continue; } }

untagged_text = grandchild_node.innerHTML; untagged_text = untagged_text.replace(/ /ig,""); // keep simple superscript text untagged_text = untagged_text.replace(/(]+)>)(.*?<\/sup>)/ig,""); // remove superscript text untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character > untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character < untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character & untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/&#160;/ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

words = spaced_text.split(' '); tentative_num_words = words.length; if (tentative_num_words > 0) { // verify that the list item contributes text num_words = 0; num_characters = 0; for (word_index=0; word_index 0) { num_words++; num_characters += del_num_characters; }						}						if (num_words > 0) { list_item_count++; num_prose_counted_nodes++; word_count += num_words; prose_size_bytes += num_characters; num_spaces = num_words - 1; prose_size_bytes += num_spaces; // add spaces to character count child_node.style.cssText = "background-color:yellow";

// Code for testing output if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { continue; }							diagnostic_string = ""; nonempty_word_index = 0; temp_num_characters = 0; for (word_index=0; word_index45) && (display_individual_words)) { window.alert(diagnostic_string); diagnostic_string = "Continued from previous screen:\n\n"; }								temp_word = words[word_index]; del_num_characters = temp_word.length; if (del_num_characters > 0) { nonempty_word_index++; temp_num_characters += del_num_characters; diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", List item " + list_item_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n";								} }							temp_num_characters += num_spaces; diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n"; if (display_individual_words) { diagnostic_string += "\nEND of discursive list item " + list_item_count + " of Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; window.alert(diagnostic_string); }						} // closes check for non-empty list item } // tentative check for words } // closes check for a discursive list item (DT or DD) node } // closes loop over grandchild nodes of a discursive list DL		} else if (child_node.nodeName == "BLOCKQUOTE") { grandchild_nodes = child_node.getElementsByTagName("P"); num_grandchild_nodes = grandchild_nodes.length; for (grandchild_node_index=0; grandchild_node_index]+)>)(.*?<\/sup>)/ig,""); // remove superscript text untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character > untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character < untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character & untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/&#160;/ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

words = spaced_text.split(' '); tentative_num_words = words.length; if (tentative_num_words > 0) { // verify that the list item contributes text num_words = 0; num_characters = 0; for (word_index=0; word_index 0) { num_words++; num_characters += del_num_characters; }						}						if (num_words > 0) { // don't count blockquotes, for now num_prose_counted_nodes++; word_count += num_words; prose_size_bytes += num_characters; num_spaces = num_words - 1; prose_size_bytes += num_spaces; // add spaces to character count child_node.style.cssText = "background-color:yellow";

// Code for testing output if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { continue; }							diagnostic_string = ""; nonempty_word_index = 0; temp_num_characters = 0; for (word_index=0; word_index45) && (display_individual_words)) { window.alert(diagnostic_string); diagnostic_string = "Continued from previous screen:\n\n"; }								temp_word = words[word_index]; del_num_characters = temp_word.length; if (del_num_characters > 0) { nonempty_word_index++; temp_num_characters += del_num_characters; diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n"; }							}							temp_num_characters += num_spaces; diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n"; if (display_individual_words) { diagnostic_string += "\nEND of BLOCKQUOTE in Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; window.alert(diagnostic_string); }						} // closes check for non-empty list item } // tentative check for words } // closes check for a paragraph (P) node in a BLOCKQUOTE } // closes loop over grandchild nodes in a BLOCKQUOTE } else if (child_node.nodeName == "TABLE") { if (child_node.className != "cquote") { continue; } // count only tables that are cquotes grandchild_nodes = child_node.getElementsByTagName("TD"); num_grandchild_nodes = grandchild_nodes.length; for (grandchild_node_index=0; grandchild_node_index]+)>)(.*?<\/sup>)/ig,""); // remove superscript text untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character > untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character < untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character & untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/&#160;/ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

words = spaced_text.split(' '); tentative_num_words = words.length; if (tentative_num_words > 0) { // verify that the list item contributes text num_words = 0; num_characters = 0; for (word_index=0; word_index 0) { num_words++; num_characters += del_num_characters; }						}						if (num_words > 0) { // don't count cquotes, for now num_prose_counted_nodes++; word_count += num_words; prose_size_bytes += num_characters; num_spaces = num_words - 1; prose_size_bytes += num_spaces; // add spaces to character count child_node.style.cssText = "background-color:yellow";

// Code for testing output if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { continue; }							diagnostic_string = ""; nonempty_word_index = 0; temp_num_characters = 0; for (word_index=0; word_index<tentative_num_words; word_index++) { if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) { window.alert(diagnostic_string); diagnostic_string = "Continued from previous screen:\n\n"; }								temp_word = words[word_index]; del_num_characters = temp_word.length; if (del_num_characters > 0) { nonempty_word_index++; temp_num_characters += del_num_characters; diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n"; }							}							temp_num_characters += num_spaces; diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n"; if (display_individual_words) { diagnostic_string += "\nEND of CQUOTE paragraph in Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; window.alert(diagnostic_string); }						} // closes check for non-empty list item } // tentative check for words } // closes check for a paragraph (P) node in a CQUOTE } // closes loop over grandchild nodes in a CQUOTE } else if (child_node.nodeName == "DIV") { // Poems if (child_node.className != "poem") { continue; } // allow only poem DIV's			grandchild_nodes = child_node.getElementsByTagName("P"); num_grandchild_nodes = grandchild_nodes.length; for (grandchild_node_index=0; grandchild_node_index<num_grandchild_nodes; grandchild_node_index++) { grandchild_node = grandchild_nodes[grandchild_node_index]; if (grandchild_node.nodeName == "P") { untagged_text = grandchild_node.innerHTML; untagged_text = untagged_text.replace(/ /ig,""); // keep simple superscript text untagged_text = untagged_text.replace(/(<sup([^>]+)>)(.*?<\/sup>)/ig,""); // remove superscript text untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character > untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character < untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character & untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/&#160;/ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

words = spaced_text.split(' '); tentative_num_words = words.length; if (tentative_num_words > 0) { // verify that the list item contributes text num_words = 0; num_characters = 0; for (word_index=0; word_index<tentative_num_words; word_index++) { temp_word = words[word_index]; del_num_characters = temp_word.length; if (del_num_characters > 0) { num_words++; num_characters += del_num_characters; }						}						if (num_words > 0) { // don't count blockquotes, for now num_prose_counted_nodes++; word_count += num_words; prose_size_bytes += num_characters; num_spaces = num_words - 1; prose_size_bytes += num_spaces; // add spaces to character count child_node.style.cssText = "background-color:yellow";

// Code for testing output if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { continue; }							diagnostic_string = ""; nonempty_word_index = 0; temp_num_characters = 0; for (word_index=0; word_index<tentative_num_words; word_index++) { if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) { window.alert(diagnostic_string); diagnostic_string = "Continued from previous screen:\n\n"; }								temp_word = words[word_index]; del_num_characters = temp_word.length; if (del_num_characters > 0) { nonempty_word_index++; temp_num_characters += del_num_characters; diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n"; }							}							temp_num_characters += num_spaces; diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n"; if (display_individual_words) { diagnostic_string += "\nEND of in Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; window.alert(diagnostic_string); }						} // closes check for non-empty list item } // tentative check for words } // closes check for a paragraph (P) node in a poem } // closes loop over grandchild nodes in a poem } // closes check for appropriate elements } // closes loop over the child nodes section_word_count.push(word_count); section_paragraph_count.push(paragraph_count); section_list_item_count.push(list_item_count); section_prose_size_bytes.push(prose_size_bytes);

// Output the various counts

word_count_string = " word"; paragraph_count_string = " paragraph"; list_item_count_string = " list item"; prose_size_bytes_string = " byte"; if (section_word_count[0] != 1) { word_count_string += "s";} if (section_paragraph_count[0] != 1) { paragraph_count_string += "s";} if (section_list_item_count[0] != 1) { list_item_count_string += "s";} if (section_prose_size_bytes[0] != 1) { prose_size_bytes_string += "s";}

alert_string = "Lead section: " + section_paragraph_count[0] + paragraph_count_string + ", " + section_list_item_count[0] + list_item_count_string + ", " + section_word_count[0] + word_count_string + ", " + section_prose_size_bytes[0] + prose_size_bytes_string + "\n\n";

total_word_count = section_word_count[0]; total_paragraph_count = section_paragraph_count[0]; total_list_item_count = section_list_item_count[0]; total_prose_size_bytes = section_prose_size_bytes[0];

num_sections = section_word_count.length; for (section_index=1; section_index<num_sections; section_index++) { total_word_count += section_word_count[section_index]; total_paragraph_count += section_paragraph_count[section_index]; total_list_item_count += section_list_item_count[section_index]; total_prose_size_bytes += section_prose_size_bytes[section_index]; word_count_string = " word"; paragraph_count_string = " paragraph"; list_item_count_string = " list item"; prose_size_bytes_string = " byte"; if (section_word_count[section_index] != 1) { word_count_string += "s";} if (section_paragraph_count[section_index] != 1) { paragraph_count_string += "s";} if (section_list_item_count[section_index] != 1) { list_item_count_string += "s";} if (section_prose_size_bytes[section_index] != 1) { prose_size_bytes_string += "s";}

alert_string += "Section " + section_index + " : " + section_paragraph_count[section_index] + paragraph_count_string + ", " + section_list_item_count[section_index] + list_item_count_string + ", " + section_word_count[section_index] + word_count_string + ", " + section_prose_size_bytes[section_index] + prose_size_bytes_string + "\n"; }	if (num_sections>1) {alert_string += "\n";} // Make space for the totals

word_count_string = " word"; paragraph_count_string = " paragraph"; list_item_count_string = " list item"; prose_size_bytes_string = " byte"; if (total_word_count != 1) { word_count_string += "s";} if (total_paragraph_count != 1) { paragraph_count_string += "s";} if (total_list_item_count != 1) { list_item_count_string += "s";} if (total_prose_size_bytes != 1) { prose_size_bytes_string += "s";} alert_string += "Totals: " + total_paragraph_count + paragraph_count_string + ", " + total_list_item_count + list_item_count_string + ", " + total_word_count + word_count_string + ", " + total_prose_size_bytes + prose_size_bytes_string + "\n"; window.alert(alert_string);

} // closes function articleReadability

$(function {            mw.util.addPortletLink('p-cactions', 'javascript:articleReadability', 'readability', 'ca-readability', 'Assess article readability', , ); });

//