User:Proteins/articlestructure.js

// // Analyze the article's structure // with kind respects to Dr. pda, whose excellent prosesizebytes.js script was the inspiration // // To use this script, add "importScript('User:Proteins/articlestructure.js');" to your monobook.js subpage // under your user page, as you can see at User:Proteins/monobook.js

function articleStructure { var alert_string = ""; var diagnostic_string = ""; var read_entire_article = true;

var show_lead_diagnostics = true; var show_section_diagnostics = false; var display_individual_words = false;

var using_Internet_Explorer = false;

var spaced_text = ""; var untagged_text = ""; var stripped_text = ""; var unescaped_text = "";

var anchors; var temp_anchor; var section_name = ""; var temp_anchor_name = "";

var num_anchors = 0; var anchor_index = 0; var anchor_level = 0; var prev_anchor_level = 0; var num_H2_anchors = 0; var H2_anchor_index = 0;

var cutoff_anchor_index = 0; var cutoff_H2_anchor_index = 0; var cutoff_child_node_index = 0; var last_P_child_node_index = 0; var cutoff_element_node_index = 0;

var num_sections = 0; var section_index = 0;

var element_node; var num_element_nodes = 0; var element_node_index = 0;

var temp_node_name = "";

var parent_node; var grandparent_node; var greatgrandparent_node;

var sibling_node; var next_sibling_node;

var child_node; var child_nodes; var prev_child_node; var num_child_nodes = 0; var child_node_index = 0; var child_node_name = ""; var num_prose_counted_nodes = 0;

var grandchild_node; var grandchild_nodes; var num_grandchild_nodes = 0; var grandchild_node_index = 0;

var path_names; var file_name = "";

var num_characters = 0; var del_num_characters = 0; var temp_num_characters = 0;

var temp_word = ""; var num_words = 0; var word_count = 0; var word_index = 0; var nonempty_word_index = 0; var tentative_num_words = 0;

var num_spaces = 0; var paragraph_count = 0; var list_item_count = 0; var prose_size_bytes = 0; var total_word_count = 0; var total_paragraph_count = 0; var total_list_item_count = 0; var total_prose_size_bytes = 0; var section_word_count = new Array; var section_paragraph_count = new Array; var section_list_item_count = new Array; var section_prose_size_bytes = new Array;

var word_count_string = ""; var paragraph_count_string = ""; var list_item_count_string = ""; var prose_size_bytes_string = "";

var temp_paragraph; var text_paragraphs; var num_paragraphs = 0; var paragraph_index = 0;

var temp_list_item; var text_list_items; var num_list_items = 0; var list_item_index = 0;

var temp_image; var num_pixels = 0; var image_index = 0; var image_counter = 0; var num_raw_images = 0; var num_nonicon_images = 0;

var num_anchors = 0; var num_raw_links = 0; var num_raw_tables = 0; var num_raw_references = 0;

// check for Internet Explorer browser

using_Internet_Explorer = false; if (navigator.userAgent.indexOf("MSIE") > -1) { using_Internet_Explorer = true; //		alert_string = "This script works correctly in every browser — except Internet Explorer. Please be patient!" //		window.alert(alert_string); }

// Find the cutoff H2 anchor index, where we stop counting things alert_string = ""; num_H2_anchors = 0; section_name = "lead section"; prev_anchor_level = 1; //begin at the H1 heading read_entire_article = true; anchors = document.anchors; num_anchors = anchors.length; for (anchor_index=1; anchor_index<num_anchors; anchor_index++) { temp_anchor = anchors[anchor_index];

parent_node = temp_anchor.parentNode; if (!parent_node) { continue; }

sibling_node = parent_node.nextSibling; if (!sibling_node) { continue; }

// Check headings for jumps upwards in heading level anchor_level = 0; if (sibling_node.nodeName == "H1") { alert_string += " WARNING: Illegal H1 heading in this section\n"; } else if (sibling_node.nodeName == "H2") { anchor_level = 2; } else if (sibling_node.nodeName == "H3") { anchor_level = 3; } else if (sibling_node.nodeName == "H4") { anchor_level = 4; } else if (sibling_node.nodeName == "H5") { anchor_level = 5; } else { next_sibling_node = sibling_node.nextSibling; if (!next_sibling_node) { continue; }

// Check headings for jumps upwards in heading level if (next_sibling_node.nodeName == "H1") { alert_string += " WARNING: Illegal H1 heading in this section\n"; } else if (next_sibling_node.nodeName == "H2") { anchor_level = 2; } else if (next_sibling_node.nodeName == "H3") { anchor_level = 3; } else if (next_sibling_node.nodeName == "H4") { anchor_level = 4; } else if (next_sibling_node.nodeName == "H5") { anchor_level = 5; }		} // closes assignment of the anchor level, if any if (((anchor_level - prev_anchor_level) > 1) && (prev_anchor_level != 0)) { if (num_H2_anchors == 0) { alert_string += " WARNING: H" + prev_anchor_level + " to H" + anchor_level + " jump in the lead\n"; } else { alert_string += " WARNING: H" + prev_anchor_level + " to H" + anchor_level + " jump in \"" + section_name.replace(/(_+)/ig, " ") + "\"\n"; }		}		if (anchor_level > 0) { prev_anchor_level = anchor_level; }

//Check major section headings for closing sections if (anchor_level == 2) { num_H2_anchors++; section_name = temp_anchor.name; temp_anchor_name = temp_anchor.name;

alert_string += "Section " + num_H2_anchors + " : " + section_name.replace(/(_+)/ig, " ") + "\n"; //			alert_string += "Section " + num_H2_anchors + " : " + section_name.replace(/(_+)/ig, " ") + " " + temp_anchor.parentNode.nodeName + " " + sibling_node.nodeName + "\n";

temp_anchor_name = temp_anchor_name.replace(/:$/ig,""); // eliminate colons at end temp_anchor_name = temp_anchor_name.replace(/s$/ig,""); // eliminate plurals at end temp_anchor_name = temp_anchor_name.replace(/See_also/ig,""); temp_anchor_name = temp_anchor_name.replace(/Related_topic/ig,""); temp_anchor_name = temp_anchor_name.replace(/Related_article/ig,""); temp_anchor_name = temp_anchor_name.replace(/Further_reading/ig,""); temp_anchor_name = temp_anchor_name.replace(/External_link/ig,""); temp_anchor_name = temp_anchor_name.replace(/Footnote/ig,""); temp_anchor_name = temp_anchor_name.replace(/Note/ig,""); temp_anchor_name = temp_anchor_name.replace(/Reference/ig,""); temp_anchor_name = temp_anchor_name.replace(/Citation/ig,""); temp_anchor_name = temp_anchor_name.replace(/Source/ig,""); temp_anchor_name = temp_anchor_name.replace(/Link/ig,""); temp_anchor_name = temp_anchor_name.replace(/s([_\s]+)and([_\s]+)/ig,""); temp_anchor_name = temp_anchor_name.replace(/([_\s]+)and([_\s]+)/ig,""); temp_anchor_name = temp_anchor_name.replace(/([_\s]+)/ig,""); if (temp_anchor_name == "") { break; }

//			diagnostic_string = "Section " + num_H2_anchors + " : " + temp_anchor_name + " L: " + temp_anchor_name.length; //			window.alert(diagnostic_string);

} // closes check for H2 anchor } // closes loop over the anchors cutoff_anchor_index = anchor_index; cutoff_H2_anchor_index = num_H2_anchors; if (cutoff_anchor_index < num_anchors) { read_entire_article = false; alert_string += "\nProse counting will stop before the \"" + temp_anchor.name.replace(/(_+)/ig, " ") + "\" section.\n"; } else { read_entire_article = true; alert_string += "\nProse counting will cover the entire article.\n"; }	window.alert(alert_string);

// Count child and element nodes alert_string = ""; num_element_nodes = 0; child_nodes = document.getElementById("bodyContent").childNodes; num_child_nodes = child_nodes.length; //	if (num_child_nodes > 40) { num_child_nodes = 40;} // truncate loop for testing

for (child_node_index=0; child_node_index < num_child_nodes; child_node_index++) { child_node = child_nodes[child_node_index]; if (child_node.nodeType != 1) { //			alert_string += "Child node " + child_node_index + " : " + child_node.nodeName + "\n"; continue; } // examine only Element nodes num_element_nodes++; //		alert_string += "Element node " + num_element_nodes + " : " + child_node.nodeName + "\n"; } // closes loop counting the element nodes //	window.alert(alert_string);

// Determine the corresponding childNode index cutoff alert_string = ""; if (read_entire_article == true) { cutoff_child_node_index = num_child_nodes; cutoff_element_node_index = num_element_nodes; } else { H2_anchor_index = 0; element_node_index = 0; last_P_child_node_index = -1; last_P_element_node_index = -1; for (child_node_index=0; child_node_index < num_child_nodes; child_node_index++) { child_node = child_nodes[child_node_index]; if (child_node.nodeType != 1) { continue; } // examine only Element nodes element_node_index++;

if (child_node.nodeName == "P") { last_P_child_node_index = child_node_index; last_P_element_node_index = num_element_nodes; } else if (child_node.nodeName == "H2") { H2_anchor_index++; if (H2_anchor_index == cutoff_H2_anchor_index) { cutoff_child_node_index = last_P_child_node_index; cutoff_element_node_index = last_P_element_node_index; break; }			} //			alert_string += "Section " + H2_anchor_index + ", Element node " + num_element_nodes + " : " + child_node.nodeName + " " + child_node.childNodes.length + "\n"; //			if (num_element_nodes > 45) { break; } // for debugging } // closes loop over the childNodes of the Document if (last_P_child_node_index < 0) { // if no cutoff was discovered; should never happen cutoff_child_node_index = num_child_nodes; cutoff_element_node_index = num_element_nodes; }	} // closes check whether to read entire article alert_string = "\nThe child_node_index and element_node_index cutoffs are " + cutoff_child_node_index + " and " + cutoff_element_node_index + ", respectively.\n"; //	window.alert(alert_string);

// Count the words, paragraphs and prose size bytes by section word_count = 0; paragraph_count = 0; list_item_count = 0; prose_size_bytes = 0; num_prose_counted_nodes = 0;

H2_anchor_index = 0; for (child_node_index=0; child_node_index < cutoff_child_node_index; child_node_index++) { child_node = child_nodes[child_node_index]; if (child_node.nodeType != 1) { continue; } // examine only Element nodes element_node_index++;

if (child_node.nodeName == "H2") { section_word_count.push(word_count); section_paragraph_count.push(paragraph_count); section_list_item_count.push(list_item_count); section_prose_size_bytes.push(prose_size_bytes);

H2_anchor_index++; word_count = 0; paragraph_count = 0; list_item_count = 0; prose_size_bytes = 0; }

// if the child node meets the criteria, add to the prose size, word and paragraph counts if ((child_node.nodeName == "P") || (child_node.nodeName == "PRE")) {

untagged_text = child_node.innerHTML; untagged_text = untagged_text.replace(/ /ig,""); // keep simple superscript text untagged_text = untagged_text.replace(/(]+)>)(.*?<\/sup>)/ig,""); // remove superscript text untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character > untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character < untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character & untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/&#160;/ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

//			spaced_text = filterStringForProseSizeCounting(untagged_text); words = spaced_text.split(' '); tentative_num_words = words.length; if (tentative_num_words > 0) { // verify that the paragraph contributes text num_words = 0; num_characters = 0; for (word_index=0; word_index 0) { num_words++; num_characters += del_num_characters; }				}				if (num_words > 0) { paragraph_count++; num_prose_counted_nodes++; word_count += num_words; prose_size_bytes += num_characters; num_spaces = num_words - 1; prose_size_bytes += num_spaces; // add spaces to character count child_node.style.cssText = "background-color:yellow"; // Code for testing output if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { continue; }					diagnostic_string = ""; nonempty_word_index = 0; temp_num_characters = 0; for (word_index=0; word_index45) && (display_individual_words)) { window.alert(diagnostic_string); diagnostic_string = "Continued from previous screen:\n\n"; }						temp_word = words[word_index]; del_num_characters = temp_word.length; if (del_num_characters > 0) { nonempty_word_index++; temp_num_characters += del_num_characters; diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + " " + temp_num_characters + "\n"; }					}					temp_num_characters += num_spaces; diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n"; if (display_individual_words) { diagnostic_string += "\nEND of paragraph " + paragraph_count + " of Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; window.alert(diagnostic_string); }				} // closes check for non-empty paragraph } // tentative check for words } else if ((child_node.nodeName == "UL") || (child_node.nodeName == "OL")) { // unordered and ordered lists grandchild_nodes = child_node.childNodes; // not all LI elements because of possible nesting num_grandchild_nodes = grandchild_nodes.length; for (grandchild_node_index=0; grandchild_node_index]+)>)(.*?<\/sup>)/ig,""); // remove superscript text untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character > untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character < untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character & untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/&#160;/ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

words = spaced_text.split(' '); tentative_num_words = words.length; if (tentative_num_words > 0) { // verify that the list item contributes text num_words = 0; num_characters = 0; for (word_index=0; word_index 0) { num_words++; num_characters += del_num_characters; }						}						if (num_words > 0) { list_item_count++; num_prose_counted_nodes++; word_count += num_words; prose_size_bytes += num_characters; num_spaces = num_words - 1; prose_size_bytes += num_spaces; // add spaces to character count child_node.style.cssText = "background-color:yellow";

// Code for testing output if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { continue; }							diagnostic_string = ""; nonempty_word_index = 0; temp_num_characters = 0; for (word_index=0; word_index45) && (display_individual_words)) { window.alert(diagnostic_string); diagnostic_string = "Continued from previous screen:\n\n"; }								temp_word = words[word_index]; del_num_characters = temp_word.length; if (del_num_characters > 0) { nonempty_word_index++; temp_num_characters += del_num_characters; diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", List item " + list_item_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n"; }							}							temp_num_characters += num_spaces; diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n"; if (display_individual_words) { diagnostic_string += "\nEND of list item " + list_item_count + " of Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; window.alert(diagnostic_string); }						} // closes check for non-empty list item } // tentative check for words } // closes check for a list item (LI) node } // closes loop over grandchild nodes of an ordered (OL) or unordered (UL) list } else if (child_node.nodeName == "DL") { // discursive lists grandchild_nodes = child_node.childNodes; num_grandchild_nodes = grandchild_nodes.length; for (grandchild_node_index=0; grandchild_node_index 0) { temp_node_name = grandchild_node.childNodes[0].nodeName; if ((temp_node_name == "DIV") || (temp_node_name == "SPAN")) { continue; } }					if (grandchild_node.childNodes.length > 1) { temp_node_name = grandchild_node.childNodes[1].nodeName; if ((temp_node_name == "DIV") || (temp_node_name == "SPAN")) { continue; } }

untagged_text = grandchild_node.innerHTML; untagged_text = untagged_text.replace(/ /ig,""); // keep simple superscript text untagged_text = untagged_text.replace(/(]+)>)(.*?<\/sup>)/ig,""); // remove superscript text untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character > untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character < untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character & untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/&#160;/ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

words = spaced_text.split(' '); tentative_num_words = words.length; if (tentative_num_words > 0) { // verify that the list item contributes text num_words = 0; num_characters = 0; for (word_index=0; word_index 0) { num_words++; num_characters += del_num_characters; }						}						if (num_words > 0) { list_item_count++; num_prose_counted_nodes++; word_count += num_words; prose_size_bytes += num_characters; num_spaces = num_words - 1; prose_size_bytes += num_spaces; // add spaces to character count child_node.style.cssText = "background-color:yellow";

// Code for testing output if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { continue; }							diagnostic_string = ""; nonempty_word_index = 0; temp_num_characters = 0; for (word_index=0; word_index45) && (display_individual_words)) { window.alert(diagnostic_string); diagnostic_string = "Continued from previous screen:\n\n"; }								temp_word = words[word_index]; del_num_characters = temp_word.length; if (del_num_characters > 0) { nonempty_word_index++; temp_num_characters += del_num_characters; diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", List item " + list_item_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n";								} }							temp_num_characters += num_spaces; diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n"; if (display_individual_words) { diagnostic_string += "\nEND of discursive list item " + list_item_count + " of Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; window.alert(diagnostic_string); }						} // closes check for non-empty list item } // tentative check for words } // closes check for a discursive list item (DT or DD) node } // closes loop over grandchild nodes of a discursive list DL		} else if (child_node.nodeName == "BLOCKQUOTE") { grandchild_nodes = child_node.getElementsByTagName("P"); num_grandchild_nodes = grandchild_nodes.length; for (grandchild_node_index=0; grandchild_node_index]+)>)(.*?<\/sup>)/ig,""); // remove superscript text untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character > untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character < untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character & untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/&#160;/ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

words = spaced_text.split(' '); tentative_num_words = words.length; if (tentative_num_words > 0) { // verify that the list item contributes text num_words = 0; num_characters = 0; for (word_index=0; word_index 0) { num_words++; num_characters += del_num_characters; }						}						if (num_words > 0) { // don't count blockquotes, for now num_prose_counted_nodes++; word_count += num_words; prose_size_bytes += num_characters; num_spaces = num_words - 1; prose_size_bytes += num_spaces; // add spaces to character count child_node.style.cssText = "background-color:yellow";

// Code for testing output if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { continue; }							diagnostic_string = ""; nonempty_word_index = 0; temp_num_characters = 0; for (word_index=0; word_index45) && (display_individual_words)) { window.alert(diagnostic_string); diagnostic_string = "Continued from previous screen:\n\n"; }								temp_word = words[word_index]; del_num_characters = temp_word.length; if (del_num_characters > 0) { nonempty_word_index++; temp_num_characters += del_num_characters; diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n"; }							}							temp_num_characters += num_spaces; diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n"; if (display_individual_words) { diagnostic_string += "\nEND of BLOCKQUOTE in Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; window.alert(diagnostic_string); }						} // closes check for non-empty list item } // tentative check for words } // closes check for a paragraph (P) node in a BLOCKQUOTE } // closes loop over grandchild nodes in a BLOCKQUOTE } else if (child_node.nodeName == "TABLE") { if (child_node.className != "cquote") { continue; } // count only tables that are cquotes grandchild_nodes = child_node.getElementsByTagName("TD"); num_grandchild_nodes = grandchild_nodes.length; for (grandchild_node_index=0; grandchild_node_index]+)>)(.*?<\/sup>)/ig,""); // remove superscript text untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character > untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character < untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character & untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/&#160;/ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

words = spaced_text.split(' '); tentative_num_words = words.length; if (tentative_num_words > 0) { // verify that the list item contributes text num_words = 0; num_characters = 0; for (word_index=0; word_index 0) { num_words++; num_characters += del_num_characters; }						}						if (num_words > 0) { // don't count cquotes, for now num_prose_counted_nodes++; word_count += num_words; prose_size_bytes += num_characters; num_spaces = num_words - 1; prose_size_bytes += num_spaces; // add spaces to character count child_node.style.cssText = "background-color:yellow";

// Code for testing output if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { continue; }							diagnostic_string = ""; nonempty_word_index = 0; temp_num_characters = 0; for (word_index=0; word_index45) && (display_individual_words)) { window.alert(diagnostic_string); diagnostic_string = "Continued from previous screen:\n\n"; }								temp_word = words[word_index]; del_num_characters = temp_word.length; if (del_num_characters > 0) { nonempty_word_index++; temp_num_characters += del_num_characters; diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n"; }							}							temp_num_characters += num_spaces; diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n"; if (display_individual_words) { diagnostic_string += "\nEND of CQUOTE paragraph in Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; window.alert(diagnostic_string); }						} // closes check for non-empty list item } // tentative check for words } // closes check for a paragraph (P) node in a CQUOTE } // closes loop over grandchild nodes in a CQUOTE } else if (child_node.nodeName == "DIV") { // Poems if (child_node.className != "poem") { continue; } // allow only poem DIV's			grandchild_nodes = child_node.getElementsByTagName("P"); num_grandchild_nodes = grandchild_nodes.length; for (grandchild_node_index=0; grandchild_node_index<num_grandchild_nodes; grandchild_node_index++) { grandchild_node = grandchild_nodes[grandchild_node_index]; if (grandchild_node.nodeName == "P") { untagged_text = grandchild_node.innerHTML; untagged_text = untagged_text.replace(/ /ig,""); // keep simple superscript text untagged_text = untagged_text.replace(/(<sup([^>]+)>)(.*?<\/sup>)/ig,""); // remove superscript text untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags untagged_text = untagged_text.replace(/&gt;/ig, ">"); // convert &gt; to a single character > untagged_text = untagged_text.replace(/&lt;/ig, "<"); // convert &lt; to a single character < untagged_text = untagged_text.replace(/&amp;/ig, "&"); // convert &amp; to a single character & untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/&#160;/ig, " "); // convert non-breaking spaces to spaces spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

words = spaced_text.split(' '); tentative_num_words = words.length; if (tentative_num_words > 0) { // verify that the list item contributes text num_words = 0; num_characters = 0; for (word_index=0; word_index<tentative_num_words; word_index++) { temp_word = words[word_index]; del_num_characters = temp_word.length; if (del_num_characters > 0) { num_words++; num_characters += del_num_characters; }						}						if (num_words > 0) { // don't count blockquotes, for now num_prose_counted_nodes++; word_count += num_words; prose_size_bytes += num_characters; num_spaces = num_words - 1; prose_size_bytes += num_spaces; // add spaces to character count child_node.style.cssText = "background-color:yellow";

// Code for testing output if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) { continue; }							diagnostic_string = ""; nonempty_word_index = 0; temp_num_characters = 0; for (word_index=0; word_index<tentative_num_words; word_index++) { if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) { window.alert(diagnostic_string); diagnostic_string = "Continued from previous screen:\n\n"; }								temp_word = words[word_index]; del_num_characters = temp_word.length; if (del_num_characters > 0) { nonempty_word_index++; temp_num_characters += del_num_characters; diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n"; }							}							temp_num_characters += num_spaces; diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n"; if (display_individual_words) { diagnostic_string += "\nEND of in Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n"; window.alert(diagnostic_string); }						} // closes check for non-empty list item } // tentative check for words } // closes check for a paragraph (P) node in a poem } // closes loop over grandchild nodes in a poem } // closes check for appropriate elements } // closes loop over the child nodes section_word_count.push(word_count); section_paragraph_count.push(paragraph_count); section_list_item_count.push(list_item_count); section_prose_size_bytes.push(prose_size_bytes);

// Output the various counts

word_count_string = " word"; paragraph_count_string = " paragraph"; list_item_count_string = " list item"; prose_size_bytes_string = " byte"; if (section_word_count[0] != 1) { word_count_string += "s";} if (section_paragraph_count[0] != 1) { paragraph_count_string += "s";} if (section_list_item_count[0] != 1) { list_item_count_string += "s";} if (section_prose_size_bytes[0] != 1) { prose_size_bytes_string += "s";}

alert_string = "Lead section: " + section_paragraph_count[0] + paragraph_count_string + ", " + section_list_item_count[0] + list_item_count_string + ", " + section_word_count[0] + word_count_string + ", " + section_prose_size_bytes[0] + prose_size_bytes_string + "\n\n";

total_word_count = section_word_count[0]; total_paragraph_count = section_paragraph_count[0]; total_list_item_count = section_list_item_count[0]; total_prose_size_bytes = section_prose_size_bytes[0];

num_sections = section_word_count.length; for (section_index=1; section_index<num_sections; section_index++) { total_word_count += section_word_count[section_index]; total_paragraph_count += section_paragraph_count[section_index]; total_list_item_count += section_list_item_count[section_index]; total_prose_size_bytes += section_prose_size_bytes[section_index]; word_count_string = " word"; paragraph_count_string = " paragraph"; list_item_count_string = " list item"; prose_size_bytes_string = " byte"; if (section_word_count[section_index] != 1) { word_count_string += "s";} if (section_paragraph_count[section_index] != 1) { paragraph_count_string += "s";} if (section_list_item_count[section_index] != 1) { list_item_count_string += "s";} if (section_prose_size_bytes[section_index] != 1) { prose_size_bytes_string += "s";}

alert_string += "Section " + section_index + " : " + section_paragraph_count[section_index] + paragraph_count_string + ", " + section_list_item_count[section_index] + list_item_count_string + ", " + section_word_count[section_index] + word_count_string + ", " + section_prose_size_bytes[section_index] + prose_size_bytes_string + "\n"; }	if (num_sections>1) {alert_string += "\n";} // Make space for the totals

word_count_string = " word"; paragraph_count_string = " paragraph"; list_item_count_string = " list item"; prose_size_bytes_string = " byte"; if (total_word_count != 1) { word_count_string += "s";} if (total_paragraph_count != 1) { paragraph_count_string += "s";} if (total_list_item_count != 1) { list_item_count_string += "s";} if (total_prose_size_bytes != 1) { prose_size_bytes_string += "s";} alert_string += "Totals: " + total_paragraph_count + paragraph_count_string + ", " + total_list_item_count + list_item_count_string + ", " + total_word_count + word_count_string + ", " + total_prose_size_bytes + prose_size_bytes_string + "\n"; window.alert(alert_string);

// Count the article images num_nonicon_images = 0; num_raw_images = document.images.length; alert_string = "This document has " + num_raw_images + " images.\n"; for (image_index=0; image_index<num_raw_images; image_index++) { temp_image = document.images[image_index]; parent_node = temp_image.parentNode; grandparent_node = parent_node.parentNode; greatgrandparent_node = grandparent_node.parentNode;

num_pixels = temp_image.width * temp_image.height; if (temp_image.src.match(/Replace_this_image_male\.svg/)) { continue; } if (temp_image.src.match(/Replace_this_image_female\.svg/)) { continue; } if (num_pixels > 5000) { num_nonicon_images++; } }	if (num_nonicon_images == 1) { alert_string = "This document has 1 image with more than 5000 pixels.\n\n"; } else { alert_string = "This document has " + num_nonicon_images + " images with more than 5000 pixels.\n\n"; }	image_counter = 0; for (image_index=0; image_index<num_raw_images; image_index++) { temp_image = document.images[image_index]; parent_node = temp_image.parentNode; grandparent_node = parent_node.parentNode; greatgrandparent_node = grandparent_node.parentNode;

num_pixels = temp_image.width * temp_image.height; if (temp_image.src.match(/Replace_this_image_male\.svg/)) { continue; } if (temp_image.src.match(/Replace_this_image_female\.svg/)) { continue; } if (num_pixels < 5001) { continue; }

image_counter++; alert_string += image_counter + " " + temp_image.width + "x" + temp_image.height + " " + num_pixels + " "; path_names = temp_image.src.split("/"); file_name = path_names.pop; file_name = file_name.replace(/^(\d+)px-/, ""); alert_string += file_name + "\n"; }	window.alert(alert_string); return;

// Count the article tables and check for infoboxes and navigation templates num_raw_tables = document.getElementsByTagName("table").length;

// Check for className = "infobox vcard" or "navbox-group"

alert_string = "This document has " + num_raw_tables + " tables.\n"; window.alert(alert_string);

// Count the article references num_raw_references = document.getElementsByTagName("li").length;

// Count the article interwikis num_raw_interwikis = document.getElementsByTagName("li").length;

// Count the article categories num_raw_categories = document.getElementsByTagName("table").length;

// Count the article anchors; for each anchor... alert_string = "This document has " + document.anchors.length + " anchors:\n"; for (anchor_index=0; anchor_index<document.anchors.length; anchor_index++) { temp_anchor = document.anchors[anchor_index]; alert_string += "Name " + anchor_index + ": " + temp_anchor.name + "\n"; }	window.alert(alert_string);

} // closes function articleStructure

addOnloadHook(function {            mw.util.addPortletLink('p-cactions', 'javascript:articleStructure', 'structure', 'ca-structure', 'Structure of the article', 'g', ''); });

//