User:Andrybak/Scripts/Archiver.js

/* * * This script is a fork of https://en.wikipedia.org/w/index.php?title=User:Enterprisey/archiver.js&oldid=1113588553 * which was forked from https://en.wikipedia.org/w/index.php?title=User:%CE%A3/Testing_facility/Archiver.js&oldid=1003561411 */

/* * Documentation of CSS classes. * * .arky-span is the main custom class of the script. * Inside a .arky-span is an archive link, which triggers selection. * .arky-span tags also store data (not visible in the UI) associated with * corresponding sections: the index of the section and heading level * (i.e. ==2==, ===3===, etc) * Tags with .arky-span class are also called "archive spans". * * .arky-selected-section is put onto the whole semantic heading * of a section, selected by user for archiving. * During selection the class is used for styling (a light-blue background). * After clicking "archive ... selected threads" the class is used to * find all the archive spans, which live inside the semantic heading. */

const USERSCRIPT_NAME = "Archiver";

function notifyUser(notificationMessage) { mw.notify(notificationMessage, {		title: USERSCRIPT_NAME	}); }

const LOG_PREFIX = `[${USERSCRIPT_NAME}]:`;

function error(...toLog) { console.error(LOG_PREFIX, ...toLog); }

function warn(...toLog) { console.warn(LOG_PREFIX, ...toLog); }

function info(...toLog) { console.info(LOG_PREFIX, ...toLog); }

function debug(...toLog) { console.debug(LOG_PREFIX, ...toLog); }

function constructAd { // TODO maybe also introduce versions + include version in the ad?	return `using ${USERSCRIPT_NAME}`; }

function constructEditSummary(mainEditSummary) { return `${mainEditSummary} (${constructAd})`; }

$.when( mw.loader.using(['mediawiki.util','mediawiki.api']), $.ready).done( function {	/*	 * Reference documentation about keys and values in mw.config:	 * https://www.mediawiki.org/wiki/Manual:Interface/JavaScript#mw.config	 */	if (!mw.config.get('wgIsArticle')) { // This variable is badly named -- it is not related to a page being a main namespace "article".		info('Not a wiki page. Aborting.');		return;	}	if (mw.config.get('wgArticleId') === 0 || mw.config.get('wgRevisionId') === 0) {		info('Either the page does not exist yet or it is a diffonly=yes view. Aborting.');		return;	}	if (mw.config.get("wgNamespaceNumber") % 2 == 0 && mw.config.get("wgNamespaceNumber") != 4) {		// not a talk page and not project namespace		info('Not a discussion namespace. Aborting.');		return;	}	if (mw.config.get("wgNamespaceNumber") == -1) {		// is a special page		info('This is a "Special:" page. Aborting.');		return;	}	const parserOutput = document.querySelector('#mw-content-text .mw-parser-output');	if (!parserOutput || $(parserOutput).find(':header').length === 0) {		info('Nothing to archive here. Aborting.');		return;	}	if (mw.config.get('wgDiffNewId') != null || mw.config.get('wgDiffOldId') != null) {		info('Detected diff view. Aborting.');		return;	}

mw.util.addCSS(".arky-selected-section { background-color:#D9E9FF } .arky-selected-section .arky-span a { font-weight:bold }");

var sectionCodepointOffsets = new Object; var wikiText = ""; var revStamp; // The timestamp when we originally got the page contents - we pass it to the "edit" API call for edit conflict detection

var portletLink = mw.util.addPortletLink("p-cactions", "#", "ⵙCA", "ca-oecaAndrybak", "Enter/exit the archival process", null, null); var archiveButton = $(document.createElement("button")); let highestArchiveSubpagePromise = null; $(portletLink).click(function(e) {		$(".arky-selected-section").removeClass('.arky-selected-section');		$(".arky-span").toggle;		archiveButton.toggle;		if (highestArchiveSubpagePromise == null) {			/*			 * Start searching for the archive subpage with highest number immediately.			 * Then the click listener on `archiveButton` will wait for this `Promise`.			 */			highestArchiveSubpagePromise = findHighestArchiveSubpage;		} else {			// TODO: if "Loading..." was already shown to the user via the button, we need to reset the text here.		}	});

archiveButton.html("archive all the selected threads") .attr("id", 'arky-archive-button') .css("position", 'sticky') .css("bottom", 0) .css("width", '100%') .css("font-size", '200%'); $(document.body).append(archiveButton); archiveButton.toggle; archiveButton.click(function(e) {		var selectedSections = $(".arky-selected-section .arky-span").map(function { return $(this).data("section"); }).toArray;		if (selectedSections.length === 0) {			return alert("No threads selected, aborting");		}		const timeoutId = setTimeout( => { /*			 * In case highestArchiveSubpagePromise is taking a long time, * show to the user that stuff is happening. */			archiveButton.text("Loading..."); }, 1000);		highestArchiveSubpagePromise.then(result => { clearTimeout(timeoutId); info("Successful highestArchiveSubpagePromise:", result); doArchive(selectedSections, result); }, rejection => { info("Failed highestArchiveSubpagePromise:", rejection); const currentPageName = mw.config.get("wgPageName"); doArchive(selectedSections, archiveSpacedSubpageName(currentPageName, "???")); });	}); // end of archiveButton click handler

addArchiveLinks;

function midPoint(lower, upper) { return Math.floor(lower + (upper - lower) / 2); }

/*	 * Based on https://en.wikipedia.org/wiki/Module:Exponential_search */	async function exponentialSearch(testFunc, i, lower, upper) { if (await testFunc(i)) { if (i + 1 == upper) { return i;			} lower = i;			if (upper) { i = midPoint(lower, upper); } else { i = i * 2; }			return exponentialSearch(testFunc, i, lower, upper); } else { upper = i;			i = midPoint(lower, upper); return exponentialSearch(testFunc, i, lower, upper); }	}

function archiveSpacedSubpageName(pageName, archiveNumber) { return pageName + "/Archive " + archiveNumber; }

function archiveSlashedSubpageName(pageName, archiveNumber) { return pageName + "/Archive/" + archiveNumber; }

/*	 * Based on https://en.wikipedia.org/wiki/Wikipedia_talk:User_scripts/Archive_7#nocreate-missing */	async function pageExists(title) { const api = new mw.Api; const response = await api.get({			"action": "query",			"format": "json",			"titles": title		}); const missing = "missing" in Object.values(response.query.pages)[0]; return !missing; }

/*	 * Find the subpage of this page, which will be used as destination/target of archiving. * It is just "Archive 1" by default, but can be increased by exponentialSearch. */	function findHighestArchiveSubpage { info("findHighestArchiveSubpage: start"); // mw.config.get("wgPageName") return new Promise(async (resolve, reject) => {			try {				const currentPageName = mw.config.get("wgPageName");				const currentYear = new Date.getUTCFullYear;				let subpageFunc;				/*				 * Check if "current year" subpage is a good candidate for				 * pages with https://en.wikipedia.org/wiki/Template:Archived_annually				 * TODO: maybe implement checking if is transcluded.				 */				if (await pageExists(archiveSpacedSubpageName(currentPageName, currentYear - 1)) && !await pageExists(archiveSpacedSubpageName(currentPageName, currentYear + 1))) {					resolve(archiveSpacedSubpageName(currentPageName, currentYear));					return;				} else if (await pageExists(archiveSpacedSubpageName(currentPageName, 1))) {					subpageFunc = archiveSpacedSubpageName;				} else if (await pageExists(archiveSlashedSubpageName(currentPageName, 1))) {					subpageFunc = archiveSlashedSubpageName;				} else { notifyUser("Cannot find the first archive subpage"); info('Assuming zero archive subpages.'); resolve(archiveSpacedSubpageName(currentPageName, 1)); return; }

async function checkArchiveSubpageExists(archiveNumber) { const archiveSubpageTitle = subpageFunc(currentPageName, archiveNumber); return pageExists(archiveSubpageTitle); }				// see also https://en.wikipedia.org/wiki/Module:Highest_archive_number const highestNumber = await exponentialSearch(checkArchiveSubpageExists, 10, 1, null); const highestArchiveSubpage = subpageFunc(currentPageName, highestNumber); resolve(highestArchiveSubpage); } catch (e) { const msg = "Cannot find archive subpage with the highest number"; error(msg, e); notifyUser(msg); reject(e); }		});	}

function doArchive(selectedSections, highestArchiveSubpage) { // returns `s` without the substring starting at `start` and ending at `end` function cut(s, start, end) { return s.substr(0, start) + s.substring(end); }

const archivePageName = prompt("Archiving " + selectedSections.length + " threads: where should we move them to? The latest archive number seems to be:", highestArchiveSubpage); if (!archivePageName || archivePageName == mw.config.get("wgPageName")) { return alert("No archive target selected, aborting"); }

// codepointToUtf16Idx maps codepoint idx (i.e. MediaWiki index into page text) to utf-16 idx (i.e. JavaScript index into wikiText) var codepointToUtf16Idx = {};

// Initialize "important" (= either a section start or end) values to 0 selectedSections.forEach(function(n) {			codepointToUtf16Idx[sectionCodepointOffsets[n].start] = 0;			codepointToUtf16Idx[sectionCodepointOffsets[n].end] = 0;		}); codepointToUtf16Idx[Infinity] = Infinity; // Because sometimes we'll have Infinity as an "end" value

// fill in our mapping from codepoints (MediaWiki indices) to utf-16 (i.e. JavaScript). // yes, this loops through every character in the wikitext. very unfortunate. var codepointPos = 0; for (var utf16Pos = 0; utf16Pos < wikiText.length; utf16Pos++, codepointPos++) { if (codepointToUtf16Idx.hasOwnProperty(codepointPos)) { codepointToUtf16Idx[codepointPos] = utf16Pos; }

if ((0xD800 <= wikiText.charCodeAt(utf16Pos)) && (wikiText.charCodeAt(utf16Pos) <= 0xDBFF)) { // high surrogate! utf16Pos goes up by 2, but codepointPos goes up by only 1. utf16Pos++; // skip the low surrogate }		}

var newTextForArchivePage = selectedSections.map(function(n) {			return wikiText.substring( codepointToUtf16Idx[sectionCodepointOffsets[n].start], codepointToUtf16Idx[sectionCodepointOffsets[n].end] );		}).join("");

selectedSections.reverse; // go in reverse order so that we don't invalidate the offsets of earlier sections var newWikiText = wikiText; selectedSections.forEach(function(n) {			newWikiText = cut( newWikiText, codepointToUtf16Idx[sectionCodepointOffsets[n].start], codepointToUtf16Idx[sectionCodepointOffsets[n].end] );		});

info("archive this:" + newTextForArchivePage); info("revised page:" + newWikiText); var pluralizedThreads = selectedSections.length + ' thread' + ((selectedSections.length === 1) ? '' : 's'); new mw.Api.postWithToken("csrf", {			action: 'edit',			title: mw.config.get("wgPageName"),			text: newWikiText,			summary: constructEditSummary(`Removing ${pluralizedThreads}, will be on ${archivePageName}`),			basetimestamp: revStamp,			starttimestamp: revStamp		}) .done(function(res1) {			alert("Successfully removed threads from talk page");			info(res1);			new mw.Api.postWithToken("csrf", { action: 'edit', title: archivePageName, appendtext: "\n" + newTextForArchivePage, summary: constructEditSummary(`Adding ${pluralizedThreads} from ${mw.config.get("wgPageName")}`) })				.done( => alert("Successfully added threads to archive page"))				.fail( => alert("Failed to add threads to archive page. Manual inspection needed."))				.always(function(res2) { info(res2); window.location.reload; });		})		.fail(function(res1) {			alert("Failed to remove threads from talk page. Aborting archive process.");			error(res1);			window.location.reload;		}); } // end of doArchive

/*	 * Filters the result of the API query. * Plus, importantly, populates the global variable `sectionCodepointOffsets`. */	function extractValidSections(apiResultSections) { const validSections = {};

// generate the list/array $(apiResultSections) // For sections transcluded from other pages, s.index will look // like T-1 instead of just 1. Remove those. .filter((i, s) => { return s.index == parseInt(s.index) }) .each((i, s) => { validSections[s.index] = s });

// record the offsets in the global variable for (var i in validSections) { i = parseInt(i); // What MediaWiki calls "byteoffset" is actually a codepoint offset!! Drat!! sectionCodepointOffsets[i] = { start: validSections[i].byteoffset, end: validSections.hasOwnProperty(i+1)?validSections[i+1].byteoffset:Infinity };		}

return validSections; }

/*	 * The convoluted way of "header" vs "headerContainer" is needed, because * there are different HTML layouts for "headings" in different skins. * In Vector 2022, layout of ==Second level== versus ===Third level=== * headings is different even for a _single_ skin. *	 * The HTML layout is either *    * or	 *     *	 * "headerContainer" is always the outer of the tags, it always contains the tags. * "header" is always one of the  tags. * Meaning that in some cases "header" and "headContainer" is the same HTML element. *	 * arky-span, aka archiveSpans are put inside the " ". *	 * For details, see: *  - https://www.mediawiki.org/w/index.php?title=Heading_HTML_changes&oldid=6538029 *  - https://en.wikipedia.org/wiki/Wikipedia:Village_pump_(technical)/Archive_213#Tech_News_%E2%80%93_User%3AEnterprisey%2Farchiver.js	 */

// Returns a plain HTMLElement function findEditSectionForHeader(header) { // in Vector, the bracketed [edit] section link is a direct child element/node const maybeVectorEditSection = header.querySelector('.mw-editsection'); if (maybeVectorEditSection) { return maybeVectorEditSection; }		// in other skins, the bracketed [edit] section link is a sibling of etc if (header.parentElement.classList.contains('mw-heading')) { const maybeEditSection = header.parentElement.querySelector('.mw-editsection'); if (maybeEditSection) { return maybeEditSection; }		}		return null; }

// Returns a jQuery object function findHeaderContainerForArchiveSpan(archiveSpan) { const jQueryArchiveSpan = $(archiveSpan); const maybeDivMwHeading = jQueryArchiveSpan.parents('.mw-heading'); if (maybeDivMwHeading.length > 0) { return maybeDivMwHeading; }		const maybeHeaderParent = jQueryArchiveSpan.parents(':header'); if (maybeHeaderParent.length > 0) { return maybeHeaderParent; }		notifyUser("findHeaderContainerForArchiveSpan: Cannot parse section headings in this skin. Aborting.") error("findHeaderContainerForArchiveSpan: Tags for bug report:", archiveSpan, archiveSpan.parentElement); return null; }

/*	 * We need to get the top-level element of the whole header. * In some cases it's a * In other cases it's just a, , etc tag. *	 * Returns a plain HTML element. */	function getHeaderContainer(header) { if (header.parentElement.classList.contains('mw-heading')) { return header.parentElement; }		return header; }

/*	 * Create the bracketed [archive] links next to the [edit] section links. * These [archive] links are used by a user to select sections for archival. */	function addArchiveLinks { // grab page sections and wikitext so we can add the "archive" links to appropriate sections new mw.Api.get({action: 'parse', page: mw.config.get("wgPageName")}).done(function(parseApiResult) {			new mw.Api.get({action: 'query', pageids: mw.config.get("wgArticleId"), prop: ['revisions'], rvprop: ['content', 'timestamp']}).done(function(revisionsApiResult) { var rv; rv = revisionsApiResult.query.pages[mw.config.get("wgArticleId")].revisions[0]; wikiText = rv["*"]; revStamp = rv['timestamp']; });

const validSections = extractValidSections(parseApiResult.parse.sections);

/*			 * The search for all section headings starts with * finding all  tags, which aren't for the table of contents. * From the  tags, we find the "[edit] section links" and * "header containers" (see big comment above). */			const allHeaders = $("#mw-content-text .mw-parser-output").find(":header").filter(':not(#mw-toc-heading)'); if (allHeaders.length == 0) { warn('Nothing to archive here. The script should have aborted earlier. Aborting.'); return; }			allHeaders.each(function(i, header) {				var sectionNumber = undefined;				const headerLevel = header.tagName.slice(1) * 1; // wtf javascript				const editSection = findEditSectionForHeader(header);				if (!editSection) {					// we're either in an archived page ([edit] links are hidden with magic word )					return;				}				{					const editSectionLink = editSection.querySelector('a');					if (editSectionLink) {						// Note: href may not be set.						const sectionNumberMatch = editSectionLink.href && editSectionLink.href.match(/&section=(\d+)/);						if (sectionNumberMatch) {							sectionNumber = sectionNumberMatch[1];						}					}				}				// if the if statement fails, it might be something like not a real section 				if (validSections.hasOwnProperty(sectionNumber)) {					const archiveLink = $('')					.text('archive')					.click(function { const correspondingHeaderContainer = $(getHeaderContainer(header)); correspondingHeaderContainer.toggleClass('arky-selected-section');

// now, click all sub-sections of this section // i.e. mark all needed header containers with our CSS class .arky-selected-section const isThisSectionSelected = correspondingHeaderContainer.hasClass('arky-selected-section'); const thisHeaderLevel = archiveLink.parents('.arky-span').data('header-level');

// starting from the current section, loop through each section const allArchiveSpans = $('.arky-span'); const currSectionIdx = allArchiveSpans.index(archiveLink.parents('.arky-span')); for (var i = currSectionIdx + 1; i < allArchiveSpans.length; i++) { if ($(allArchiveSpans[i]).data('header-level') <= thisHeaderLevel) { // if this isn't a subsection, quit break; }							const closestHeaderContainer = findHeaderContainerForArchiveSpan(allArchiveSpans[i]); if (closestHeaderContainer.hasClass('arky-selected-section') != isThisSectionSelected) { // if this section needs toggling, toggle it								closestHeaderContainer.toggleClass('arky-selected-section'); }						}

// finally, update button const selectedSectionCount = $('.arky-selected-section').length; archiveButton .prop('disabled', selectedSectionCount === 0) .text('archive ' + selectedSectionCount + ' selected thread' +								 ((selectedSectionCount === 1) ? '' : 's')); });

const arkySpan = $(" ", { "class": "arky-span" }) .css({'display':'none'}) .data({'header-level': headerLevel, 'section': sectionNumber}) .append(						$(' ', { 'class': 'mw-editsection-bracket' }).text('['),						archiveLink,						$(' ', { 'class': 'mw-editsection-bracket' }).text(']')					);

$(editSection).append(" ", arkySpan); }			});		})		.fail( => warn('addArchiveLinks: Cannot download current page. Aborting.')); } }); //