User:Lingzhi/reviewsourcecheckTESTINGONLY.js

// jQuery(document).ready(function($) {

if ((wgPageName.indexOf('talk:') < 0) && (wgPageName.indexOf('Talk:') < 0) && (wgPageName.indexOf('Special:') < 0) && (wgPageName.indexOf('Wikipedia:') < 0)) {

// spantitles gives direct access to span.title, // which is used extensively

var spantitles = document.getElementsByClassName("Z3988"); var myTOC = document.getElementsByClassName("toctext"); // var myHeadings = $("h2, h3, h4"); myTOCarray = []; for (var z = 0; z < myTOC.length; z++) { myTOCtxt = myTOC[z].innerText; myTOCtxt = myTOCtxt.replace(" ", "_"); myTOCtxt = "#" + myTOCtxt; myTOCarray.push(myTOCtxt); }

///the TOC is used to get a list of section headers used on page // these are reversed and checked from bottom-to-top while sorting /// so that if an article has stacked headings, for /// example Primary and Secondary references listed under Works cited, // the script won't try to insert Secondary (i.e., lower on the page) ///into the sorted list of Primary ones (higher on page) causing // numerous confusing false positives in the sorting process

myTOCarray.reverse;

//var citejournals = document.getElementsByClassName("citation journal"); // var reftext = document.getElementsByClassName("reference-text"); // all sfns // var bookspan = document.querySelectorAll("citation book.span title"); //var webspan = document.querySelectorAll("citation web.span title"); // spantitle[13].title

// first check: // Hyphen in pg. range; // P/PP error?

var links = document.links; for (var i = 0; i < links.length; i++) { var href = links[i].getAttribute('href');

var srctxt = links[i].parentNode.textContent; try { var id = links[i].getAttribute('id'); } catch (err) { continue; }

var parent = links[i].parentNode;

// var index is used below to address the case of sfnm, // whose output (i. e., links[i].parentNode.innerHTML) includes different links as siblings, // so the output would be recursively expanded/duplicated // within the loop unless you prevent that

var index = Array.prototype.indexOf.call(parent.children, links[i - 1]); var spline = srctxt.split(";"); for (var k = 0; k < spline.length; k++) { var commacount = (spline[k].match(/,/g) || []).length; //var hrefcount = (links[i].parentNode.innerHTML.match(spline[k], 'g') || []).length; if (index < 0) {

if (spline[k].indexOf('pp.') > 0) {

// so output from Ucucha's script won't be grabbed and // added to this output

spline[k] = spline[k].replace("Harv error: link to", "           ");

/* temptxt and commacount2 are used to avoid false positives like " Brennan, Heathcote & Lucas 1984, p. 9" (commas and                            ampersand before p. 9) and "Jones 1942, p. 10, note 3" (commas irrelevant to pagination) */ var myPos = spline[k].indexOf('pp.'); var temptxt = spline[k].substring(myPos, myPos + 12); var commacount2 = (temptxt.match(/,/g) || []).length;

if (temptxt.indexOf('-') > 0) { links[i].parentNode.innerHTML += " Hyphen in pg. range; "; }

if ((href.indexOf('#CITEREF') === 0) && (spline[k].indexOf('–') < 0) && (spline[k].indexOf('&') < 0) && (commacount < 2) && (spline[k].indexOf('-') < 0) && (spline[k].indexOf(' and ') < 0) && (spline[k].indexOf('–') < 0)) {

links[i].parentNode.innerHTML += " P/PP error? " + temptxt + "; ";

}                   } else if ((href.indexOf('#CITEREF') === 0) && (spline[k].indexOf(' p.')) > 0) { spline[k] = spline[k].replace("Harv error: link to", "           "); var myPos = spline[k].indexOf(' p.');

var temptxt = spline[k].substring(myPos, myPos + 12); var commacount2 = (temptxt.match(/,/g) || []).length;

if ((temptxt.indexOf('–') > 0) || (commacount2 > 0) || (temptxt.indexOf('-') > 0) || (temptxt.indexOf('–') > 0)) {

//p. 23, note 7; p. 23, n. 7; p.23, citing Smith 1989 //                           if ((temptxt.indexOf(', not') < 0) && (temptxt.indexOf(', n.') < 0) && (temptxt.indexOf(', cit')) < 0) {

links[i].parentNode.innerHTML += " P/PP error? " + temptxt + "; "; }                       }                        if (temptxt.indexOf('-') > 0) { links[i].parentNode.innerHTML += " Hyphen in pg. range; "; }                   }                }            }        }

// second check: // Warning: Unexpected result – extra formatting in template? // Caution: Missing pagenums for book chapter? // Missing first name for: // Inconsistent use of Publisher Location // Missing Publisher // Missing ISBN // Pub. too early for ISBN, perhaps needs ; // Missing Identifier/control number, e.g. OCLC; // Missing Year/Date; // Missing access date; // Missing archive link;

///withLocs etc. used for "Inconsistent use of Publisher Location"

var withLocs = false; var withoutLocs = false; var contraryLocs = false; var withLocsCnt = 0; var withoutLocsCnt = 0; idArray = ["arXiv", "ASIN", "Bibcode", "doi:", "ISBN", "ISSN", "JFM", "JSTOR", "LCCN", " MR ", "OCLC", " OL ", "OSTI", "PMC", "PMID", "RFC", "SSRN", "Zbl"];

for (i = 0; i < spantitles.length; i++) {

// there is nothing in spantitles[i].title // which indicates that a link has been archived, so            // srctext is used to catch from textContent

srctxt = spantitles[i].parentNode.textContent; spline = spantitles[i].title.split("rft.au="); typoCk = spantitles[i].parentNode.nodeName;

if ((typoCk != "LI") && (typoCk != "SPAN")) {

spantitles[i].parentNode.innerHTML += " Warning: Unexpected result – extra formatting in template? ";

}

if ((spantitles[i].title.indexOf("rft.atitle=") > 0) && (spantitles[i].title.indexOf("rft.btitle=") > 0)) { if ((srctxt.indexOf(" pp.") < 0) && (srctxt.indexOf(" p.") < 0)) { spantitles[i].parentNode.innerHTML += " Caution: Missing pagenums for book chapter? "; }           }            for (k = 1; k < spline.length; k++) { if ((spantitles[i].title.indexOf("rft.au=") > 0) && (srctxt.indexOf("et al.") < 0)) { if ((spline[k].indexOf("+") < 0)) { var spline2 = spline[k].split("&"); spantitles[i].parentNode.innerHTML += " Missing first name for: " + spline2[0] + " ; ";                   }                }            }

var hasID = false; for (qq = 0; qq < idArray.length; qq += 1) { if (srctxt.indexOf(idArray[qq]) > 0) { hasID = true; }           }            if (spantitles[i].title.indexOf("rft.genre=article") > 0) { if (hasID === false) {

spantitles[i].parentNode.innerHTML += " Missing identifier (ISSN, JSTOR, etc.); "; }

}

if (spantitles[i].title.indexOf("rft.genre=book") > 0) {

if ((srctxt.indexOf("Oxford University Press") < 0) && (srctxt.indexOf("University of Calcutta") < 0) && (srctxt.indexOf("Princeton University Press") < 0) && (srctxt.indexOf("Cambridge University Press") < 0)) {

if ((spantitles[i].title.indexOf("rft.place") < 0)) { withoutLocs = true; withoutLocsCnt += 1; if ((withLocs === true) && (withoutLocs === true)) { contraryLocs = true;

}                       if (contraryLocs === true) { spantitles[i].parentNode.innerHTML += " Inconsistent use of Publisher Location (" +                               withLocsCnt + " with; " + withoutLocsCnt + " without ); "; }                   } else { withLocs = true; withLocsCnt += 1; if ((withLocs === true) && (withoutLocs === true)) { contraryLocs = true; spantitles[i].parentNode.innerHTML += " Inconsistent use of Publisher Location (" +                           withLocsCnt + " with; " + withoutLocsCnt + " without); ";

}                   }                }

if (spantitles[i].title.indexOf("rft.pub") < 0) { spantitles[i].parentNode.innerHTML += " Missing Publisher; "; }

//               if (spantitles[i].parentNode.innerHTML.indexOf("open access publication – free to read") > 0) { //                   hasID = true; //               }

if ((spantitles[i].title.indexOf("rft.date") > 0)) { var myDate = spantitles[i].title.slice(spantitles[i].title.indexOf("rft.date") + 9, spantitles[i].title.indexOf("rft.date") + 13); if (myDate >= 1970) { if (hasID === false) {

spantitles[i].parentNode.innerHTML += " Missing ISBN; ";

}                   } else { if ((spantitles[i].title.indexOf("rft.isbn") > 0) && (srctxt.indexOf(") [") < 0)) {

// OK this (") [") is a huge kluge but there's                             // nothing to indicate whether origyear is                             // populated except by the srctext                            // having (pubdate) [origdate] & there's                             // little restriction on the format of the two dates

spantitles[i].parentNode.innerHTML += " Pub. too early for ISBN, perhaps needs ; "; }                       if (hasID === false) { spantitles[i].parentNode.innerHTML += " Missing Identifier/control number, e.g. OCLC; "; }                   }                } else { spantitles[i].parentNode.innerHTML += " Missing Year/Date; "; }           }

if ((spantitles[i].title.indexOf("http") > 0) && (spantitles[i].title.indexOf("rft.genre=book") < 0)) { if (srctxt.indexOf("rchived") < 0) { spantitles[i].parentNode.innerHTML += " Missing archive link; "; if ((srctxt.indexOf("Retrieved") < 0) && (spantitles[i].title.indexOf("rft.date") < 0)) { spantitles[i].parentNode.innerHTML += " Missing access date; "; }               }            }        }

// third check: // sorting // finding duplicate author/title, // removing meaningless initial words, // handling odd date formats "(April 2007)" etc., // sorting stacked reference sections ///reverse TOC order and skipping if alreadySorted

var refheaders = []; var allRefheaders = ["#Books", "#Journals", "#Biographies", "#Bibliography", "#References", "#Citations_and_notes", "#Literature_cited", "#Works_cited", "#Book_sources", "#Primary_sources", "#Secondary_sources", "#Sources", "#Specialized_studies"];

/// refheaders lets us sort in reverse TOC order for (var d = 0; d < myTOCarray.length; d++) {

if (allRefheaders.indexOf(myTOCarray[d]) > -1) { refheaders.push(myTOCarray[d]); }       }

var alreadySorted = []; for (var r = 0; r < refheaders.length; r++) {

var refsection = jQuery(refheaders[r]).parent.next; var newcites = refsection.find('.citation');

sortedCites = []; unsortedCites = []; sortIndices = []; var id3 = ''; var oldAuth = ''; var mySortTxt2 = ''; for (var h = 0; h < newcites.length; h++) {

if (alreadySorted.indexOf(newcites[h]) > -1) { continue; }

try { id3 = newcites[h].getAttribute('id'); } catch (err) { //sortIndices.push(h); continue; }               if (!id3 || id3.indexOf('CITEREF') === 0) { var parentid = newcites[h].parentNode.parentNode.getAttribute('id');

if (parentid && parentid.indexOf('cite_note') > -1) {

continue; }

if (!id3 || id3.indexOf('CITEREF') < 0) { newcites[h].innerHTML += " Caution: Missing ref= anchor?; "; }

if (id3 == null) { mySortTxt2 = newcites[h].innerText; mySortTxt2 = mySortTxt2.replace('"', '');

// check for empty string if (mySortTxt2 === '') {

continue; }                       mySortTxt2 = mySortTxt2.trim; // A, An, The.. if (mySortTxt2.slice(0, 2) === "A ") { mySortTxt2 = mySortTxt2.slice(2); mySortTxt2 = mySortTxt2.charAt(0).toUpperCase + mySortTxt2.slice(1); }                       if (mySortTxt2.slice(0, 3) === "An ") { mySortTxt2 = mySortTxt2.slice(3); mySortTxt2 = mySortTxt2.charAt(0).toUpperCase + mySortTxt2.slice(1); }                       if (mySortTxt2.slice(0, 4) === "The ") { mySortTxt2 = mySortTxt2.slice(4); mySortTxt2 = mySortTxt2.charAt(0).toUpperCase + mySortTxt2.slice(1); }

// for example, in |author-mask=——— if (mySortTxt2[0] == "—") {

mySortTxt2 = oldAuth + mySortTxt2; }                       mySortTxt2 = mySortTxt2.replace('"', '');                        if (mySortTxt2.indexOf(")") > 0) {                            mySortTxt2 = mySortTxt2.substring(0, mySortTxt2.indexOf(")")) + ")";

// handle (April 2006) or (04-11-2006) or anything not (YYYY) mySortTxt3 = /\d{4}/.exec(mySortTxt2); mySortTxt2 = mySortTxt2.replace(mySortTxt3, ""); if (mySortTxt3 != null) { mySortTxt2 = mySortTxt2.replace("(", "(" + mySortTxt3 + "-"); }                           mySortTxt2 = mySortTxt2.replace(" )", ")")

}                       if ((mySortTxt2.match(/\s/g) || []).length > 5) {

mySortPos2 = mySortTxt2.split(" ", 5).join(" ").length; mySortTxt2 = mySortTxt2.substring(0, mySortPos2); }                   } else { mySortTxt2 = id3; mySortTxt2 = mySortTxt2.replace("CITEREF", ""); firstAuthLname = ''; firstAuthLname = newcites[h].innerText; fspline = firstAuthLname.split(","); firstAuthLname = fspline[0].replace('"', '');                       if (mySortTxt2.indexOf(firstAuthLname) > 0) {

//for example, A._Sanjoy2011 --> Sanjoy_A.2011

mySortTxt2 = firstAuthLname + "_" + mySortTxt2.replace(firstAuthLname, ""); }                   }

//remove html  mySortTxt2=mySortTxt2.replace(/<(?:.|\n)*?>/gm, );

// oldauth is for |author-mask=——— // below, rmv all digits, parens, curly braces, endashes //   oldAuth = mySortTxt2.replace(/\d+/g, ''); oldAuth = mySortTxt2.replace(/[{}]/g, ''); oldAuth = oldAuth.replace(/—/g, ""); oldAuth = oldAuth.replace(/\)/g, '');                   oldAuth = oldAuth.trim;

sortedCites.push(mySortTxt2); if (unsortedCites.indexOf(mySortTxt2) > -1) { newcites[h].innerHTML += " Warning: duplicate author/date: " + mySortTxt2 + "; "; }                   unsortedCites.push(mySortTxt2); sortIndices.push(h);

alreadySorted.push(newcites[h]);

}           }            sortedCites.sort(Intl.Collator.compare);

var myPos2 = 0; var txtOut = ''; for (var p = 0; p < unsortedCites.length; p++) { if (unsortedCites[p] != sortedCites[p]) { myPos2 = sortIndices[p]; txtOut = sortedCites[p]; //             txtOut = sortedCites[p].replace(/\+/g, " "); //             txtOut = txtOut.replace(/\&/g, " "); //txtOut = txtOut.replace(/\%3A/g, ":"); //             txtOut = txtOut.replace(/\%2C/g, ",");

newcites[myPos2].innerHTML += " Sort error, expected: " + txtOut + " ; "; }           }        }    } }); //