User:GeneralNotability/edit-filter-hit-analyzer.js

// // @ts-check // More information on how an edit filter was tripped

importStylesheet('w:en:User:GeneralNotability/edit-filter-hit-analyzer.css' ); /** * @typedef EditFilterLine * @type {Object} * @property {string} text Text of the line * @property {string} normedText Text with some modifications applied for parsing * @property {string[]} variables Variables found in the line * @property {number} indentation how far to indent the line */

const efa_knownVars = {};

const efa_PAGE_NAME_RE = /Special:AbuseLog\/\d+/; const efa_FILTER_PAGE_RE = /\/wiki\/(Special:AbuseFilter\/(\d+))/; // Vars in this list shouldn't have their full content displayed because they're usually really big const efa_HIDDEN_VARS = [ 'old_wikitext', 'new_wikitext', 'edit_diff', 'all_links', 'added_lines', 'removed_lines', 'new_html' ];

// Parser regexes const efa_REGEX_ASSIGNMENT_RE = /(\w+)\s*:=\s*"(.*)"/; const efa_RLIKE_RE = /\b(.*)\s*(i?rlike|regex)\s*(\w+|".*")/;

// Future reference: if we want to do ccnorm ourselves, we can pull the conversion list // from https://phab.wmfusercontent.org/file/data/jcoued3dziiwwwdr53lp/PHID-FILE-lkxia6juxnhqt263dbrj/equivset.json

async function efa_main { // populate knownVars with built-in values Object.entries(mw.config.get('wgAbuseFilterVariables')).forEach(([ key, value ]) => {		efa_knownVars[key] = value;	});

const $actionParams = $('h3:contains("Action parameters")', document); $(' ').text('Filter rule analysis').insertBefore($actionParams); const $ruleAnchor = $('').attr('id', 'efa-anchor').insertBefore($actionParams); // Find the link which goes to Special:AbuseFilter, then pull out the wikilink part of it	const filterId = $('a', document).filter(function {		return efa_FILTER_PAGE_RE.test(this.getAttribute('href'));	}).attr('href').match(efa_FILTER_PAGE_RE)[2]; const filterPattern = await efa_getFilter(filterId); if (!filterPattern) { // Something went wrong (or we can't access the filter), // bail out return; }	const filterRules = efa_parseRules(filterPattern); filterRules.forEach((rule) => {		const $bullet = $('').attr('style', 'margin-left:' + (10 * rule.indentation + 10) + 'px;');		$(' ').addClass('efa-rule').text(rule.text).appendTo($bullet);		rule.variables.forEach((variable) => { const $efaData = $(' ').addClass('efa-data'); if (efa_HIDDEN_VARS.includes(variable)) { $efaData.append(variable + ': (not shown)'); } else { $efaData.append(variable + ': ' + efa_knownVars[variable]); }			$efaData.appendTo($bullet); });

const rlikeMatch = rule.normedText.match(efa_RLIKE_RE); if (rlikeMatch) { // If this is a regex, try to expand it and generate a link let reText = rlikeMatch[1]; const matchType = rlikeMatch[2]; let re = rlikeMatch[3]; // Whether to apply substitution on the regex side (don't if ) let subRe = true; const reQuoteSearch = re.match(/.*?"(.*)"/); if (reQuoteSearch) { // Remove the quotes around a literal regex re = reQuoteSearch[1]; // Don't attempt substitution since this is a literal subRe = false; }			// Expand variables (or possibly function calls on a variable) // TODO: this is really simplistic (obviously) - strip function calls and get // an exact match for (const entry of Object.entries(efa_knownVars)) { if (reText.includes(entry[0])) { reText = entry[1].toString; }				if (re.includes(entry[0]) && subRe) { re = entry[1].toString; }			}			// abusefilter entries are PCRE and by default use the 'u' flag. // if irlike is being used, add the i flag as well. let flags = 'u'; if (matchType === 'irlike') { flags += 'i'; }			const re101url = `https://regex101.com/?regex=${encodeURIComponent(re)}&testString=${encodeURIComponent(reText)}&flags=${flags}`; $bullet.append(' ').append($('').attr('href', re101url).text('(view at regex101)')); }		$bullet.appendTo($ruleAnchor); }); }

/** * Turn a filter's pattern into a list of rules * * @param {string} pattern Original text pattern * * @return {EditFilterLine[]} List of rules */ function efa_parseRules(pattern) { // Strip all newline characters and split by statement // The second part is taken from https://stackoverflow.com/questions/11502598/how-to-match-something-with-regex-that-is-not-between-two-special-characters // It matches all split characters (&, ;, &) as long as they are _not_ between quotes const filterLines = pattern.replace(/(\r|\n)/g, '').split(/([&;|](?=(?:[^"]*"[^"]*")*[^"]*$))/g);	/** @type {EditFilterLine[]} */	const annotatedFilterLines = [];	filterLines.forEach((line) => {		// Trim, then replace long whitespaces with a single space		const cleanedUpLine = line.trim.replace(/\s+/, ' ');		const annotatedLine = { text: cleanedUpLine, normedText: cleanedUpLine,			variables: [], indentation: 0 };		annotatedFilterLines.push(annotatedLine);	});

// Indentation pass: figure out how deep each statement is nested in parens, // then create a "normed" version which strips the extra paren(s) // While we're in there, save variable assignments let indent = 0; annotatedFilterLines.forEach((line) => {		const openParens = line.text.split(/\(/).length; const closeParens = line.text.split(/\)/).length;		// Because of how we split the strings, a block of indented text will		// always start with an extra open paren on the starting rule, and close		// with an extra one on the ending rule (but we want both of those lines)		// indented		const deltaParens = openParens - closeParens;		if (deltaParens > 0) {			indent += deltaParens;			line.indentation = indent;			// Remove the extra paren from the normed text			line.normedText = line.text.replace('(', ''); } else if (deltaParens < 0) { line.indentation = indent; indent += deltaParens; // Remember, deltaparens is negative here, so add it			// Remove the extra paren from the normed text line.normedText = line.text.replace(/\)(?=[^)]*$)/, ''); } else { line.indentation = indent; }		const varAssignment = line.normedText.match(efa_REGEX_ASSIGNMENT_RE); if (varAssignment) { efa_knownVars[varAssignment[1]] = varAssignment[2]; }	});

// Annotate by going through and identifying variables used in the lines Object.keys(efa_knownVars).forEach((varName) => {		const varRe = new RegExp('\\b' + varName + '\\b');		annotatedFilterLines.forEach((line) => { if (line.text.match(varRe)) { const assignmentMatch = line.text.match(efa_REGEX_ASSIGNMENT_RE); if (assignmentMatch && assignmentMatch[1] === varName) { // Don't list the variable on the line that assigns return; }				line.variables.push(varName); }		});	});	return annotatedFilterLines; }

async function efa_getFilter(filterId) { try { const api = new mw.Api; const response = await api.get({			action: 'query',			list: 'abusefilters',			abfstartid: filterId,			abfendid: filterId,			abfprop: 'pattern'		}); if (response.query.abusefilters.length < 1) { // No match? return ''; }		return response.query.abusefilters[0].pattern;

} catch (error) { console.log(error); return ''; } }

// On document load, check if this page is a edit filter hit - if so, // load the EF stuff $(function {	if (efa_PAGE_NAME_RE.test(mw.config.get('wgPageName'))) {		efa_main;	} }); //