User:Brooke Vibber/ParserPopups.js

// /** * MediaWiki:Gadget-ParserPopups.js * Brion Vibber  * 2011-04-26 * * Initial steps on some experiments to flip between various parsing methods to * compare source, parse trees, and outcomes. * * Adds a fold-out section in the editor (using enhanced toolbar) to swap view of: * - Source (your regular editable text) * - MediaWiki parser (parsed page as full HTML) * - Preprocessor tree (tree view of XML preprocessor tree; shows limited pre-parsing breakdown) * - FakeParser (a very primitive parser class in this gadget) * - FakeParser's parse tree * - FakeParser's output and parse tree side-by-side. * * The parsed views update to match the current editor state when you bump over to them. * In side-by-side view, matching items are highlighted on the two sides, and clicking * will scroll the related side into view if needed. */ (function(mw, $) {

/** * Very primitive hashmap class that allows using objects as keys; * JSON flattening of the key object is used as a hash code, so only * suitable for objects that will be immutable for now. * * Actual final comparison is done using object identity, but the * bucket match is from the JSON, so don't mess around! * * Used to map parse tree nodes to output nodes for the inspector mode. */ function HashMap { this.keyBuckets = {}; this.valBuckets = {}; }

/** * @param {object} keyObj * @return {object} original object, or null if no match found. */ HashMap.prototype.get = function(keyObj) { var key = this.hash(keyObj); if (typeof this.keyBuckets[key] !== 'undefined') { var keys = this.keyBuckets[key], max = keys.length; for (var i = 0; i < max; i++) { if (keyObj === keys[i]) { return this.valBuckets[key][i]; }       }    }    return null; };

/** * @param {object} keyObj * @param {object} val */ HashMap.prototype.put = function(keyObj, val) { var key = this.hash(keyObj); if (typeof this.keyBuckets[key] === 'undefined') { this.keyBuckets[key] = []; this.valBuckets[key] = []; }   this.keyBuckets[key].push(keyObj); this.valBuckets[key].push(val); };

/** * This will do for us for now. :) */ HashMap.prototype.hash = function(keyObj) {   return JSON.stringify(keyObj).substr(0, 40); };

/** * Stub wrapper for using MediaWiki's parser via API */ function MediaWikiParser(context) { this.context = context; }

/** * Run wiki text through the preprocessor to produce a preprocessor parse tree * (XML tree, not JSON). * * @param {string} text * @param {function(tree, error)} callback */ MediaWikiParser.prototype.parseToTree = function(text, callback) { callApi({       action: 'expandtemplates', // not really what we want, but it'll do        title: wgPageName,        text: text,        generatexml: '1',        format: 'json'    }, function(data, xhr) {        if (typeof data.parsetree['*'] === 'string') {            var parser = new DOMParser;            var dom = parser.parseFromString(data.parsetree['*'], 'text/xml');            callback(dom.documentElement);        } else {            alert('Failed to parse!');        }    }); };

/** * @param {object} tree * @param {function(tree, error)} callback */ MediaWikiParser.prototype.expandTree = function(tree, callback) { // no-op! callback(tree, null); };

/** * Run a preprocessor XML parse tree through the final parser. * Since we can't actually ship the XML to MediaWiki, we'll reassemble it * and send the text. :P * * Currently we are not able to map preprocessor nodes to output DOM nodes, * so the inspector mode won't work. * * @param {Node} tree * @param {function(domnode, error)} callback * @param {HashMap} inspectorMap * * @fixme use context object for page title */ MediaWikiParser.prototype.treeToHtml = function(tree, callback, inspectorMap) { var self = this; self.treeToSource(tree, function(src, err) {       if (err) {            return callback(src, err);        }        callApi({ action: 'parse', title: wgPageName, text: src, prop: 'text', pst: 1, format: 'json' }, function(data, xhr) { if (typeof data.parse.text['*'] === 'string') { var html = data.parse.text['*']; var parsed = $(' ' + html + ' ')[0]; callback(parsed, null); } else { callback(null, 'Failed to parse!'); }       });    }); };

/** * Collapse a parse tree back to source, if possible. * Ideally should exactly match the original source; * at minimum the resulting source should parse into * a tree that's identical to the current one. * * @param {Node} tree * @param {function(text, error)} callback */ MediaWikiParser.prototype.treeToSource = function(tree, callback) { // I forget if this actually works, but let's pretend for now! // looks like at least the heads of refs, and templates, need some more tweaking. but close :D //var text = $(tree).text; //callback(text, null);

var collapse, collapseList, collapseChildren; collapseList = function(nodes, sep) { sep = sep || ''; var list = $.map(nodes, function(node, i) {           return collapse(node);        }); return list.join(sep); };   collapseChildren = function(nodes, sep) { sep = sep || ''; if (nodes instanceof Node) { nodes = [node]; }       var list = $.map(nodes, function(node, i) {            return collapseList(node.childNodes);        }); return list.join(sep); };   collapse = function(node) { // Based loosely on PPFrame_DOM::expand in RECOVER_ORIG mode var name = node.nodeName || 'string'; var out, list; if (typeof node === 'string') { out = node; } else if (node.nodeType === Node.TEXT_NODE) { out = node.textContent; } else if (name === 'root') { out = collapseList(node.childNodes); } else if (name === 'template') { out = ''; } else if (name === 'tplarg') { out = ''; } else if (name === 'name') { // temp hack out = collapseList(node.childNodes); } else if (name === 'value') { // temp hack out = collapseList(node.childNodes); } else if (name === 'comment') { // Recover the literal comment out = collapseList(node.childNodes); } else if (name === 'ignore') { out = collapseList(node.childNodes); } else if (name === 'ext') { var close = $(node).children('close'); out = '<' + collapseChildren($(node).children('name,attr')) + (close.length ? '>' : '/>')+ collapseChildren($(node).children('inner')) + collapseChildren(close); } else if (name === 'h') { out = $(node).text; } else { console.log('unrecognized node during expansion', node); out = ''; }       //console.log(name, node, '->', out); return out; };   try { var err = null; var src = collapse(tree); } catch (e) { err = e;   } finally { callback(src, err); } };

/** * @param {ParserContext} context */ function FakeParser(context) { // whee this.context = context; }

/** * @param {string} text * @param {function(tree, error)} callback */ FakeParser.prototype.parseToTree = function(text, callback) { // quick and crappy :D var lines = text.split("\n"); var blocks = []; var matches; /**    * Subparse of inline links within a paragraph etc.     * @param {string} line * @return {object[]} list of content subblocks */   var linksParse = function(line) { var bits = line.split(),           parts = [];        if (bits[0] != ) {            parts.push({                type: 'text',                text: bits[0]            });        }        for (var i = 1; i < bits.length; i++) {            var bit = bits[i];            var bracketPos = bit.indexOf(''); if (bracketPos === -1) { // not a link oh noooooo parts.push({                   type: 'text',                    text: '[[' + bit                });            } else {                var link = bit.substr(0, bracketPos);                var tail = bit.substr(bracketPos + 2);                var linkbits = link.split('|');                if (linkbits.length == 1) {                    parts.push({                        type: 'link',                        target: link                    });                } else {                    parts.push({                        type: 'link',                        target: linkbits[0],                        text: linkbits.slice(1).join('|') // @fixme multiples for images etc                    });                }                if (tail !== ) {                    parts.push({                        type: 'text',                        text: tail                    });                }            }        }        return parts;    };    /**     * Subparse of all inline stuff within a paragraph etc.     * @param {string} line     * @return {object[]} list of content subblocks     */    var inlineParse = function(line) {        var parts = [];        var bits = line.split(']*)>(.*)<\/ref\s*>(.*)/;        var re2 = /^([^>]*)\/>(.*)/;        if (bits[0] != ) {            // text before...            $.merge(parts, linksParse(bits[0]));        }        $.each(bits.slice(1), function(i, bit) {            var matches;            var after;            if ((matches = re.exec(bit)) != null) {                var params = matches[1], text = matches[2];                after = matches[3];                parts.push({                    type: 'ref',                    params: params,                    content: (text == ) ? [] : linksParse(text)                });            } else if ((matches = re2.exec(bit)) != null) {                var params = matches[1];                after = matches[2];                parts.push({                    type: 'ref',                    params: params,                    content: []                });            } else {                after = '<ref' + bit;            }            if (after != ) {                $.merge(parts, linksParse(after));            }        });        return parts;    };	$.each(lines, function(i, line) {		if (line == '') {			blocks.push({				type: 'break'			});		} else if (matches = /^(={1,6})(.*)\1$/.exec(line)) {            blocks.push({                type: 'h',                level: matches[1].length,                text: matches[2]            });		} else {            var parts = inlineParse(line);			blocks.push({				type: 'para',				content: parts			});		}	});	var tree = {		type: 'page',		content: blocks	};	callback(tree, null); };

/** * @param {object} tree * @param {function(tree, error)} callback */ FakeParser.prototype.expandTree = function(tree, callback) { // no-op! callback(tree, null); };

/** * @param {object} tree * @param {function(domnode, error)} callback * @param {HashMap} inspectorMap */ FakeParser.prototype.treeToHtml = function(tree, callback, inspectorMap) { var self = this; var subParseArray = function(listOfTrees, node) { $.each(listOfTrees, function(i, subtree) {			self.treeToHtml(subtree, function(subnode, err) { if (subnode) { node.append(subnode); }			}, inspectorMap);		}); };	var node; switch (tree.type) { case 'page': // A sequence of block-level elements... var page = $(' '); subParseArray(tree.content, page); node = page[0]; break; case 'para': // A single-line paragraph. var para = $(' '); subParseArray(tree.content, para); node = para[0]; break; case 'break': // Just a stub in the parse tree. break; case 'text': // hack hack node = document.createTextNode(tree.text); break; case 'link': var link = $(''); link.text(tree.text || tree.target); link.attr('href', '/wiki/' + tree.target); // hack node = link[0]; break; case 'h': var h = $('').text(tree.text); node = h[0]; break; case 'ref': var ref = $(' [ '); subParseArray(tree.content, ref); ref.append(']'); node = ref[0]; break; default: callback(null, 'Unrecognized parse tree node'); return; }   if (node) { if (node.nodeType == 1) { $(node).data('parseNode', tree); // assign the node for the tree inspector if (inspectorMap) { inspectorMap.put(tree, node); // store for reverse lookup }       }		callback(node); } else { callback(null); // hmmmm } };

/** * Collapse a parse tree back to source, if possible. * Ideally should exactly match the original source; * at minimum the resulting source should parse into * a tree that's identical to the current one. * * @param {object} tree * @param {function(text, error)} callback */ FakeParser.prototype.treeToSource = function(tree, callback) { var self = this; var subParseArray = function(listOfTrees) { var str = ''; $.each(listOfTrees, function(i, subtree) {			self.treeToSource(subtree, function(substr, err) { if (substr) { str += substr; }			});		});       return str; };	var src; switch (tree.type) { case 'page': src = subParseArray(tree.content); break; case 'para': // A single-line paragraph. src = subParseArray(tree.content) + '\n'; break; case 'break': src = '\n'; break; case 'text': // In the real world, there might be escaping. src = tree.text; break; case 'link': src = ;               src += tree.target;            }            src += ; break; case 'h': stub = ''; for (var i = 0; i < tree.level; i++) { stub += '='; }           src = stub + tree.text + stub + '\n'; break; case 'ref': src = ''; src += subParseArray(tree.content); src += ' '; } else { src += '/>'; }           break; default: callback(null, 'Unrecognized parse tree node'); return; }   if (src) { callback(src); } else { callback(null); // hmmmm } };

var apiCallers = {}; var apiCache = {};

function callApi(params, callback) { var api = wgScriptPath + '/api' + wgScriptExtension; var key = JSON.stringify(params); if (key in apiCallers && apiCallers[key]) { apiCallers[key].push(callback); } else if (key in apiCache) { callback(apiCache[key] && apiCache[key]); } else { apiCallers[key] = [callback]; $.ajax({           url: api,            data: params,            type: 'POST',            dataType: 'json',            success: function(data, xhr) {                var callbacks = apiCallers[key];                apiCallers[key] = null;                apiCache[key] = data;                $.each(callbacks, function(i, aCallback) { aCallback(data); });           }        });    } }

function htmlEscape(str) { return str.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;'); }

/** * Render an XML tree into this thingy. * @param {DOMNode} node * @param {jQuery} dest a list object! * @param {HashMap} inspectorMap */ function renderXmlTree(node, dest, inspectorMap) { if (node.nodeType == Node.ELEMENT_NODE) { var base = ' ' + htmlEscape(node.nodeName) + ' ', str = '&lt;' + base, closer; $.each(node.attributes, function(i, attr) {           str += ' ' + htmlEscape(attr.nodeName) + '= "' + htmlEscape(htmlEscape(attr.nodeValue)) + '" ';        }); if (node.childNodes.length == 0) { str += ' /&gt;'; dest.append('' + str + ''); } else { str += '&gt;'; closer = '&lt;/' + base + '&gt;'; var chunk = $('' +                         ' ' + str + ' ' +                          '' +                          ' ' + closer + ' ' +                          ''); var sublist = chunk.find('ul'); dest.append(chunk); $.each(node.childNodes, function(i, child) {               renderXmlTree(child, sublist);            }); }   } else if (node.nodeType == Node.TEXT_NODE) { dest.append($('').text(node.textContent)); } }

/** * Render a JSON tree into this thingy. * @param {mixed} node * @param {jQuery} dest a list object! * @param {HashMap} inspectorMap */ function renderJsonTree(node, dest, inspectorMap) { var type = (typeof node); var chunk, item, sublist; if (type == 'object' && node === null) { dest.append('null'); } else if (type == 'object' && node instanceof Array) { chunk = $(' ' +                 ' [ ' +                  '<ul></ul>' +                  ' ] ' +                  ' '); sublist = chunk.find('ul'); $.each(node, function(i, val) {           item = $('<li></li>');            renderJsonTree(val, item, inspectorMap);            sublist.append(item);        }); dest.append(chunk); } else if (type == 'object') { chunk = $(' ' +                 ' { ' +                  '<ul></ul>' +                  ' } ' +                  ' '); chunk.data('parseNode', node); // assign the node for the tree inspector if (inspectorMap) { // save reverse lookup for the inspector inspectorMap.put(node, chunk[0]); }       sublist = chunk.find('ul'); // hack $.each(node, function(key, val) {           var item = $('<li> ' + htmlEscape('' + key) + ' : </li>');            renderJsonTree(val, item, inspectorMap);            sublist.append(item);        }); dest.append(chunk); } else if (type == 'string') { dest.append(htmlEscape(JSON.stringify(node))); // easy way to escape :)   } else {        dest.append(htmlEscape('' + node));    } }

/** * Render a JSON or XML tree into this thingy. * @param {mixed} node * @param {jQuery} dest a list object! * @param {HashMap} inspectorMap */ function renderTree(node, dest, inspectorMap) { var render; if (node instanceof Node) { render = renderXmlTree; } else { render = renderJsonTree; }   render(node, dest, inspectorMap); }

var onResize = null; $(window).resize(function {   if (onResize) {        onResize;    } }); $('.mw-pp-node').live('click', function {   var ul = $(this.parentNode).find('ul:first');    if (ul.is(":hidden")) {        ul.slideDown;    } else {        ul.slideUp;    } });

var makeMagicBox = function(inside) { $('#mw-parser-popup').remove; // line-height is needed to compensate for oddity in WikiEditor extension, which zeroes the line-height on a parent container var box = $('#wpTextbox1'); var target = $(' <div class="editor" style="line-height: 1.5em; top: 0px; left: 0px; right: 0px; bottom: 0px; border: 1px solid gray">' + inside + ' ').insertAfter(box); $('#wpTextbox1').css('display', 'none');

onResize = function { target.width(box.width) .height(box.height); };   onResize; return target; };

/** * Create two scrollable columns for an 'inspector' display. * @param {jQuery} dest -- jquery obj to receive the target * @return {jQuery} */ var makeInspectorColumns = function(dest) { var h = $('#wpTextbox1').height; // hack var target = $(       ' ').appendTo(dest); return target; };

/** * Set up 'inspector' events to highlight elements with matching parseNode data properties * between the given two sections. * * @param {jQuery} left * @param {jQUery} right */ var setupInspector = function(left, right, leftMap, rightMap) { var makeMagic = function(a, b, inspectorMap) { var match = function(aNode, callback) { var treeNode = $(aNode).data('parseNode'); var bNode = inspectorMap.get(treeNode); callback(aNode, bNode); };       a.delegate('.parseNode', 'mouseenter', function(event) {            match(this, function(node, other) { $(node).addClass('parseNodeHighlight'); $(other).addClass('parseNodeHighlight'); });       }).delegate('.parseNode', 'mouseleave', function(event) {            match(this, function(node, other) { $(node).removeClass('parseNodeHighlight'); $(other).removeClass('parseNodeHighlight'); });       }).delegate('.parseNode', 'click', function(event) {            match(this, function(node, other) { if (other) { // try to scroll the other into view. how... feasible is this? :DD var visibleStart = b.scrollTop; var visibleEnd = visibleStart + b.height; var otherStart = visibleStart + $(other).position.top; var otherEnd = otherStart + $(other).height; if (otherStart > visibleEnd) { b.scrollTop(otherStart); } else if (otherEnd < visibleStart) { b.scrollTop(otherStart); }                   event.preventDefault; return false; }           });        });    };    makeMagic(left, right, rightMap); makeMagic(right, left, leftMap); };

var addParserModes = function(modes, parserClass, className) { modes[className] = { label: className, desc: 'Showing the page rendered with ' + className + '.', render: function(src, dest) { var parser = new parserClass; parser.parseToTree(src, function(tree, err) {               parser.treeToHtml(tree, function(node, err) { dest.append(node); });           });        }	};	modes[className + '-tree'] = { label: className + ' tree', desc: 'Showing the page broken down to parse tree with ' + className + '.', render: function(src, dest) { var parser = new parserClass; parser.parseToTree(src, function(tree, err) {               var target = $('<ul><li></li></ul>').appendTo(dest);                renderTree(tree, target.find('li'));            }); }	};	modes[className + '-roundtrip'] = { label: className + ' round-trip', desc: 'Showing the page as parsed, then returned to source via ' + className + '.', render: function(src, dest) { var parser = new parserClass; parser.parseToTree(src, function(tree, err) {               parser.treeToSource(tree, function(src2, err) { //var target = $(' ').appendTo(dest); var target = $(' ').appendTo(dest); target.text(src2); });           });        }	};	modes[className + '-inspect'] = { label: className + ' inspect', desc: 'Shows ' + className + '\'s HTML output and parse tree side-by-side.', render: function(src, dest) { var parser = new parserClass; var treeMap = new HashMap, renderMap = new HashMap; parser.parseToTree(src, function(tree, err) {               var target = makeInspectorColumns(dest);                var left = target.find('.left'), right = target.find('.right');                renderTree(tree, left, treeMap);                parser.treeToHtml(tree, function(node, err) { right.append(node); setupInspector(left, right, treeMap, renderMap); }, renderMap);           }); }	}; };

$(function {   /* Start trying to add items... */    var editor = $('#wpTextbox1');    if (editor.length > 0 && typeof $.fn.wikiEditor === 'function') {        //$('#wpTextbox1').bind('wikiEditor-toolbar-buildSection-main', function { var modes = { 'source': { label: 'Source', desc: 'Showing the page\'s original wikitext source code, as you are used to editing it.', render: false }/*,           'parsed': { label: 'MediaWiki Parser', desc: 'Showing the page rendered with MediaWiki\'s standard parser.', render: function(src, dest) { getParsed(src, function(html) {                       dest.html(html);                    }); }           },            'preprocessor': { label: 'Preprocessor tree', desc: 'Internal preprocessor parse tree for MediaWiki\'s standard parser. This breaks down to the level of template, parserfunction, and tag hook invocations, but leaves most line-level and block-level syntax as in original source.', render: function(src, dest) { getPreprocessorTree(src, function(xmlSrc) {                       var target = $('<ul></ul>').appendTo(dest);                        dom = (new DOMParser).parseFromString(xmlSrc, 'text/xml');                        renderXmlTree(dom.documentElement, target);                    }); }           }*/        };        addParserModes(modes, MediaWikiParser, 'MediaWikiParser'); addParserModes(modes, FakeParser, 'FakeParser'); window.setTimeout(function {           // Great, now let's hook the booklet buttons... (explicit callbacks would be better)            var hook = function(key, callback) {                // using live since they haven't been created yet...                // 'mouseup' as a hack since the upstream click handler cancels other event handlers                $('#wikiEditor-ui-toolbar .sections .section-parser .index div[rel=' + key + ']').live('mouseup', callback);            };            var pages = {};            $.each(modes, function(name, mode) { pages[name] = { 'layout': 'table', 'label': mode.label, 'rows': [ {							'desc': { text: mode.desc } }					]				};               var render = mode.render; hook(name, function {                   if (mode.render) {                        var target = makeMagicBox('');                        var src = $('#wpTextbox1').val;                        var dest = target.find('div');                        render(src, dest);                    } else {                        $('#mw-parser-popup').remove;                        onResize = null;                        $('#wpTextbox1').css('display', 'block');                    }                }); });           editor.wikiEditor( 'addToToolbar', { 'sections': { 'parser': { 'label': 'Parser', 'type': 'booklet', 'pages': pages }				}           } );

}, 500 );   } else {        mw.log('No wiki editor');    } });

})(mediaWiki, jQuery); //