User:Arbruijn/wiki2html.js

/* Copied from http://en.wikipedia.org/w/index.php?title=User:Pilaf/livepreview.js Indented with help of Mozilla's js function pretty printer. + Small changes to handle ' in link url

// User options var wpUserName=wpUserName||'Wikipedian'; var wpUserSignature=wpUserSignature||wpUserName; var wpShowImages=wpShowImages||false;

// System options var wpLanguageCode=wpLanguageCode||'en'; var wpInterwikiCodes=wpInterwikiCodes||'ab|aa|af|ak|sq|als|am|ang|ar|an|arc|hy|roa-rup|as|ast|av|ay|az|bm|ba|eu|be|bn|bh|bi|bs|br|bg|my|ca|ch|ce|chr|chy|ny|zh|zh-tw|zh-cn|cho|cv|kw|co|cr|hr|cs|da|dv|nl|dz|en|eo|et|ee|fo|fj|fi|fr|fy|ff|gl|ka|de|got|el|kl|gn|gu|ht|ha|haw|he|hz|hi|ho|hu|is|io|ig|id|ia|ie|iu|ik|ga|it|ja|jv|kn|kr|csb|ks|kk|km|ki|rw|rn|tlh|kv|kg|ko|kj|ku|ky|lo|la|lv|li|ln|lt|jbo|nds|lg|lb|mk|mg|ms|ml|mt|gv|mi|minnan|mr|mh|zh-min-nan|mo|mn|mus|nah|na|nv|ne|se|no|nn|oc|or|om|pi|fa|pl|pt|pa|ps|qu|ro|rm|ru|sm|sg|sa|sc|gd|sr|sh|st|tn|sn|scn|simple|sd|si|sk|sl|so|st|es|su|sw|ss|sv|tl|ty|tg|ta|tt|te|th|bo|ti|tpi|to|tokipona|ts|tum|tr|tk|tw|uk|ur|ug|uz|ve|vi|vo|wa|cy|wo|xh|ii|yi|yo|za|zu'; var wpBaseArticlePath=wpBaseArticlePath||'/wiki/'; var wpMathBasePath=wpMathBasePath||'/math/'; var wpImageBasePath=wpImageBasePath||'http://upload.wikimedia.org/wikipedia/'+wpLanguageCode+'/'; var wpImageFallbackPath=wpImageFallbackPath||'http://upload.wikimedia.org/wikipedia/commons/'; var wpDefaultThumbWidth=wpDefaultThumbWidth||180; var wpSkinMagnifyClip=wpSkinMagnifyClip||'/skins/common/images/magnify-clip.png'; var wpUserNamespace=wpUserNamespace||'User'; var wpImageNamespace=wpImageNamespace||'Image'; var wpCategoryNamespace=wpCategoryNamespace||'Category';

function wiki2html(str) { str = strip_cr(str); var w = new WikiCode; w.lines = str.split(/\n/); w.parse; return w.html; }

var wpSignature = "" + wpUserSignature + ""; var wpBlockImage = new RegExp("^\\[\\[" + wpImageNamespace + ":.*?\\|.*?(?:frame|thumbnail|thumb|none|right|left|center)", "i");

function WikiCode { this.lines = new Array; this.html = new String; this._endline = (function (str) {this.html += str; this.lines.shift;}); this.parse = (function {		var p = false;		do {			if (h_match = this.lines[0].match(/^(={1,6})(.*)\1(.*)$/)) {				p = false;				this._endline("" + _parse_inline_nowiki(h_match[2]) + "" + h_match[3]);			} else if (this.lines[0].match(/^[*#:;]/)) {				p = false;				this._parse_list;			} else if (this.lines[0].charAt(0) == " ") {					p = false;					this._parse_pre;			} else if (this.lines[0].substr(0, 2) == "{|") {				p = false;				this._parse_table;			} else if (this.lines[0].match(/^+$/)) {				p = false;				this._endline(" ");			} else if (this.lines[0].match(wpBlockImage)) {				p = false;				this._parse_block_image;			} else {				if (this.lines[0] == "") {					if (p = (this.lines.length > 1 &&					this.lines[1] == "")) {					this._endline(" ");					}				} else {					if (!p) {					this.html += " ";					p = true;					}					this.html += _parse_inline_nowiki(this.lines[0]) + " "; }				this.lines.shift; }		} while (this.lines.length); });	this._parse_list = (function { var prev = new String; var l_match, imatch, dt_match; while (this.lines.length &&			(l_match = this.lines[0].match(/^([*#:;]+)(.*)$/))) { this.lines.shift; imatch = str_imatch(prev, l_match[1]); for (var i = prev.length - 1; i >= imatch; i--) { if (prev.charAt(i) == "*") { this.html += ""; } else { if (prev.charAt(i) == "#") { this.html += ""; } else { this.html += ""; switch (l_match[1].charAt(i)) { case "": case "*": case "#": this.html += ""; default:; }					}				}			}			for (var i = imatch; i < l_match[1].length; i++) { if (l_match[1].charAt(i) == "*") { this.html += ""; } else { if (l_match[1].charAt(i) == "#") { this.html += ""; } else { switch (prev.charAt(i)) { case "": case "*": case "#": this.html += ""; default:; }						this.html += ""; }				}			}			switch (l_match[1].charAt(l_match[1].length - 1)) { case "*": case "#": this.html += "" + _parse_inline_nowiki(l_match[2]); break; case ";": if (dt_match = l_match[2].match(/(.*?) (:.*?)$/)) { this.html += _parse_inline_nowiki(dt_match[1]); this.lines.unshift(dt_match[2]); break; }			 case ":": this.html += _parse_inline_nowiki(l_match[2]); default:; }			prev = l_match[1]; }		for (i = prev.length - 1; i >= 0; i--) { if (prev.charAt(i) == "*") { this.html += ""; } else { if (prev.charAt(i) == "#") { this.html += ""; } else { this.html += ""; }			}		}	});	this._parse_table = (function { var table_match; if (table_match = this.lines[0].match(/^\{\|( .*)$/)) { this._endline(" "); return; case "-": this._endline(""); break; default: this._parse_table_data; }			} else { if (this.lines[0].charAt(0) == "!") { this._parse_table_data; } else { this.lines.shift; }			}		} while (this.lines.length); });	this._parse_table_data = (function { var td_match, td_line; td_match = this.lines.shift.match(/^(\|\+|\||!)((?:([^[|]*?)\|(?!\|))?(.*))$/); if (td_match[1] == "|+") { this.html += "<caption"; } else { this.html += "<t" + ((td_match[1] == "|") ? "d" : "h"); }		if (typeof td_match[3] != "undefined") { this.html += " " + td_match[3]; td_line = td_match[4].split("||"); } else { td_line = td_match[2].split("||"); }		this.html += ">"; while (td_line.length > 1) { this.lines.unshift(td_match[1] + td_line.pop); }		this.html += _parse_inline_nowiki(td_line[0]); var td = new WikiCode; var table_count = 0; while (this.lines.length) { if (this.lines[0].charAt(0) == "|") { if (table_count == 0) { break; } else { if (this.lines[0].charAt(1) == "}") { table_count--; }				}			} else { if (this.lines[0].charAt(0) == "!" && table_count == 0) { break; } else { if (this.lines[0].substr(0, 2) == "{|") { table_count++; }				}			}			td.lines.push(this.lines.shift); }		if (td.lines.length) { td.parse; }		this.html += td.html; });	this._parse_pre = (function { this.html += " "; do { this._endline(_parse_inline_nowiki(this.lines[0].substring(1, this.lines[0].length)) + "\n"); } while (this.lines.length && this.lines[0].charAt(0) == " "); this.html += " "; });	this._parse_block_image = (function {this.html += _parse_image(this.lines.shift);}); }

function _parse_image(str) { var attr = str.substring(wpImageNamespace.length + 3, str.length - 2).split(/\s*\|\s*/); var filename = attr[0]; var caption = attr[attr.length - 1]; var width, w_match; var thumb = false; var frame = false; var center = false; var align = ""; var html = ""; do { if (w_match = attr[0].match(/^(\d*)px$/)) { width = w_match[1]; } else { switch (attr[0]) { case "thumb": case "thumbnail": thumb = true; case "frame": frame = true; break; case "none": case "right": case "left": center = false; align = attr[0]; break; case "center": center = true; align = "none"; default:; }		}		attr.shift; } while (attr.length); if (frame) { if (align == "") { align = "right"; }		html += "<div class='thumb t" + align + "'>"; if (thumb) { if (!width) { width = wpDefaultThumbWidth; }			html += "<div style='width:" + (2 + parseInt(width)) + "px;'>"; html += _make_image(filename, caption, width); html += " <a href='" + wpBaseArticlePath + wpImageNamespace + ":" + filename + "' class='internal' title='Enlarge'><img src='" + wpSkinMagnifyClip + "' /></a> " + _parse_inline_nowiki(caption) + " "; } else { html += " "; html += _make_image(filename, caption); html += " " + _parse_inline_nowiki(caption) + " "; }		html += " "; } else { if (align != "") { html += "<div class='float" + align + "'> " + _make_image(filename, caption, width) + " "; } else { return _make_image(filename, caption, width); }	}	if (center) { return " " + html + " "; } else { return html; } }

function _parse_inline_nowiki(str) { var start, lastend = 0; var substart = 0, nestlev = 0, open, close, subloop; var html = ""; while (-1 != (start = str.indexOf(" ", substart))) { html += _parse_inline_wiki(str.substring(lastend, start)); start += 8; substart = start; subloop = true; do { open = str.indexOf(" ", substart); close = str.indexOf(" ", substart); if (close <= open || open == -1) { if (close == -1) { return html + html_entities(str.substr(start)); }				substart = close + 9; if (nestlev) { nestlev--; } else { lastend = substart; html += html_entities(str.substring(start, lastend - 9)); subloop = false; }			} else { substart = open + 8; nestlev++; }		} while (subloop); }	return html + _parse_inline_wiki(str.substr(lastend)); }

function _make_image(filename, caption, width) { filename = filename.charAt(0).toUpperCase + filename.substr(1); filename = filename.replace(/ /g, "_"); var md5 = hex_md5(filename); var source = md5.charAt(0) + "/" + md5.substr(0, 2) + "/" + filename; var img; if (wpShowImages) { if (width) { width = "width='" + width + "px'"; }		img = "<img onerror='this.onerror=null;this.src=\"" + wpImageFallbackPath + source + "\";' src='" + wpImageBasePath + source + "' alt='" + caption + "' " + width + "/>"; } else { img = wpImageNamespace + ":" + filename + " <em style='color:red;'>(images disabled) "; }	caption = _strip_inline_wiki(caption); return "<a class='image' title='" + caption + "' href='" + wpBaseArticlePath + wpImageNamespace + ":" + filename + "'>" + img + "</a>"; }

function _parse_inline_images(str) { var start, substart = 0, nestlev = 0; var loop, close, open, wiki, html; while (-1 != (start = str.indexOf("", substart))) {		if (str.substr(start + 2).match(RegExp("^" + wpImageNamespace + ":", "i"))) {			loop = true;			substart = start;			do {				substart += 2;				close = str.indexOf("", substart); open = str.indexOf("[[", substart);				if (close <= open || open == -1) {					if (close == -1) {						return str;					}					substart = close;					if (nestlev) {						nestlev--;					} else {						wiki = str.substring(start, close + 2);						html = _parse_image(wiki);						str = str.replace(wiki, html);						substart = start + html.length;						loop = false;					}				} else {					substart = open;					nestlev++;				}			} while (loop);		} else {			break;		}	}	return str; }

function _parse_inline_wiki(str) { var aux_match, math_md5; str = _parse_inline_images(str); while (aux_match = str.match(/<(?:)math>(.*?)<\/math>/i)) { math_md5 = hex_md5(aux_match[1]); str = str.replace(aux_match[0], "<img src='" + wpMathBasePath + math_md5 + ".png" + "' />"); }	return str.replace(/(.*?)(.*?)/g, " $1 $2 ") .replace(/(.*?)(.*?)/g, " $1 $2 ") .replace(/(.*?)(.*?)/g, " $1 $2 ") .replace(/(.*?)/g, " $1 ") .replace(/(.*?)/g, " $1 ") .replace(/~{5}(?!~)/g, Date) .replace(/~{4}(?!~)/g, wpSignature + " " + Date) .replace(/~{3}(?!~)/g, wpSignature) .replace(RegExp("\\[\\[:((?:" + wpCategoryNamespace + "|" + wpInterwikiCodes + "):.*?)\\]\\]", "gi"), 			function(x, lnk) {				return "<a href='" + wpBaseArticlePath + lnk.replace(/'/g,"&#39;") + "'>" + lnk + "</a>"; }) .replace(RegExp("\\[\\[(?:" + wpCategoryNamespace + "|" + wpInterwikiCodes + "):.*?\\]\\]", "gi"), "") .replace(/\[\[([^|]*?)\]\](\w*)/g,			function(x, lnk, suffix) {				return "<a href='" + wpBaseArticlePath + lnk.replace(/'/g,"&#39;") + "'>" + lnk + suffix + "</a>"; }) .replace(/\[\[(.*?)\|([^\]]+?)\]\](\w*)/g,			function(x, lnk, text, suffix) {				return "<a href='" + wpBaseArticlePath + lnk.replace(/'/g,"&#39;") + "'>" + text + suffix + "</a>"; }) .replace(/\[\[([^\]]*?:)?(.*?)( *\(.*?\))?\|\]\]/g,			"<a href='" + wpBaseArticlePath + "$1$2$3'>$2</a>") .replace(/\[(http|news|ftp|mailto|gopher|irc):(\/*)([^\]]*?) (.*?)\]/g,			"<a href='$1:$2$3'>$4</a>") .replace(/\[http:\/\/(.*?)\]/g, "<a href='http://$1'>[#]</a>") .replace(/\[(news|ftp|mailto|gopher|irc):(\/*)(.*?)\]/g,			"<a href='$1:$2$3'>$1:$2$3</a>") .replace(/(^| )(http|news|ftp|mailto|gopher|irc):(\/*)([^ $]*)/g,			"$1<a href='$2:$3$4'>$2:$3$4</a>") .replace("", "").replace("", ""); }

function _strip_inline_wiki(str) { return str.replace(/\[\^\*\|(.*?)\]\]/g, "$1").replace(/\[\[(.*?)\]\]/g, "$1").replace(/(.*?)/g, "$1"); }

function max(a, b) { if (a > b) { return a;	} else { return b;	} }

function min(a, b) { if (a < b) { return a;	} else { return b;	} }

function str_imatch(str_a, str_b) { var lim = min(str_a.length, str_b.length); for (var i = 0; i < lim; i++) { if (str_a.charAt(i) != str_b.charAt(i)) { return i;		} }	return i; }

function strip_cr(str) { return str.replace(/\n\r/g, "\n").replace(/\r/g, ""); }

function html_entities(str) { return str.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;"); }

var chrsz = 8; var hex_tab = "0123456789abcdef";

function hex_md5(s) { return binl2hex(core_md5(str2binl(s), s.length * chrsz)); }

function core_md5(x, len) { x[len >> 5] |= 128 << ((len) % 32); x[(((len + 64) >>> 9) << 4) + 14] = len; var a = 1732584193; var b = -271733879; var c = -1732584194; var d = 271733878; for (var i = 0; i < x.length; i += 16) { var olda = a;		var oldb = b;		var oldc = c;		var oldd = d;		a = md5_ff(a, b, c, d, x[i + 0], 7, -680876936); d = md5_ff(d, a, b, c, x[i + 1], 12, -389564586); c = md5_ff(c, d, a, b, x[i + 2], 17, 606105819); b = md5_ff(b, c, d, a, x[i + 3], 22, -1044525330); a = md5_ff(a, b, c, d, x[i + 4], 7, -176418897); d = md5_ff(d, a, b, c, x[i + 5], 12, 1200080426); c = md5_ff(c, d, a, b, x[i + 6], 17, -1473231341); b = md5_ff(b, c, d, a, x[i + 7], 22, -45705983); a = md5_ff(a, b, c, d, x[i + 8], 7, 1770035416); d = md5_ff(d, a, b, c, x[i + 9], 12, -1958414417); c = md5_ff(c, d, a, b, x[i + 10], 17, -42063); b = md5_ff(b, c, d, a, x[i + 11], 22, -1990404162); a = md5_ff(a, b, c, d, x[i + 12], 7, 1804603682); d = md5_ff(d, a, b, c, x[i + 13], 12, -40341101); c = md5_ff(c, d, a, b, x[i + 14], 17, -1502002290); b = md5_ff(b, c, d, a, x[i + 15], 22, 1236535329); a = md5_gg(a, b, c, d, x[i + 1], 5, -165796510); d = md5_gg(d, a, b, c, x[i + 6], 9, -1069501632); c = md5_gg(c, d, a, b, x[i + 11], 14, 643717713); b = md5_gg(b, c, d, a, x[i + 0], 20, -373897302); a = md5_gg(a, b, c, d, x[i + 5], 5, -701558691); d = md5_gg(d, a, b, c, x[i + 10], 9, 38016083); c = md5_gg(c, d, a, b, x[i + 15], 14, -660478335); b = md5_gg(b, c, d, a, x[i + 4], 20, -405537848); a = md5_gg(a, b, c, d, x[i + 9], 5, 568446438); d = md5_gg(d, a, b, c, x[i + 14], 9, -1019803690); c = md5_gg(c, d, a, b, x[i + 3], 14, -187363961); b = md5_gg(b, c, d, a, x[i + 8], 20, 1163531501); a = md5_gg(a, b, c, d, x[i + 13], 5, -1444681467); d = md5_gg(d, a, b, c, x[i + 2], 9, -51403784); c = md5_gg(c, d, a, b, x[i + 7], 14, 1735328473); b = md5_gg(b, c, d, a, x[i + 12], 20, -1926607734); a = md5_hh(a, b, c, d, x[i + 5], 4, -378558); d = md5_hh(d, a, b, c, x[i + 8], 11, -2022574463); c = md5_hh(c, d, a, b, x[i + 11], 16, 1839030562); b = md5_hh(b, c, d, a, x[i + 14], 23, -35309556); a = md5_hh(a, b, c, d, x[i + 1], 4, -1530992060); d = md5_hh(d, a, b, c, x[i + 4], 11, 1272893353); c = md5_hh(c, d, a, b, x[i + 7], 16, -155497632); b = md5_hh(b, c, d, a, x[i + 10], 23, -1094730640); a = md5_hh(a, b, c, d, x[i + 13], 4, 681279174); d = md5_hh(d, a, b, c, x[i + 0], 11, -358537222); c = md5_hh(c, d, a, b, x[i + 3], 16, -722521979); b = md5_hh(b, c, d, a, x[i + 6], 23, 76029189); a = md5_hh(a, b, c, d, x[i + 9], 4, -640364487); d = md5_hh(d, a, b, c, x[i + 12], 11, -421815835); c = md5_hh(c, d, a, b, x[i + 15], 16, 530742520); b = md5_hh(b, c, d, a, x[i + 2], 23, -995338651); a = md5_ii(a, b, c, d, x[i + 0], 6, -198630844); d = md5_ii(d, a, b, c, x[i + 7], 10, 1126891415); c = md5_ii(c, d, a, b, x[i + 14], 15, -1416354905); b = md5_ii(b, c, d, a, x[i + 5], 21, -57434055); a = md5_ii(a, b, c, d, x[i + 12], 6, 1700485571); d = md5_ii(d, a, b, c, x[i + 3], 10, -1894986606); c = md5_ii(c, d, a, b, x[i + 10], 15, -1051523); b = md5_ii(b, c, d, a, x[i + 1], 21, -2054922799); a = md5_ii(a, b, c, d, x[i + 8], 6, 1873313359); d = md5_ii(d, a, b, c, x[i + 15], 10, -30611744); c = md5_ii(c, d, a, b, x[i + 6], 15, -1560198380); b = md5_ii(b, c, d, a, x[i + 13], 21, 1309151649); a = md5_ii(a, b, c, d, x[i + 4], 6, -145523070); d = md5_ii(d, a, b, c, x[i + 11], 10, -1120210379); c = md5_ii(c, d, a, b, x[i + 2], 15, 718787259); b = md5_ii(b, c, d, a, x[i + 9], 21, -343485551); a = safe_add(a, olda); b = safe_add(b, oldb); c = safe_add(c, oldc); d = safe_add(d, oldd); }	return Array(a, b, c, d); }

function md5_cmn(q, a, b, x, s, t) { return safe_add(bit_rol(safe_add(safe_add(a, q), safe_add(x, t)), s), b); }

function md5_ff(a, b, c, d, x, s, t) { return md5_cmn((b & c) | ((~b) & d), a, b, x, s, t); }

function md5_gg(a, b, c, d, x, s, t) { return md5_cmn((b & d) | (c & (~d)), a, b, x, s, t); }

function md5_hh(a, b, c, d, x, s, t) { return md5_cmn(b ^ c ^ d, a, b, x, s, t); }

function md5_ii(a, b, c, d, x, s, t) { return md5_cmn(c ^ (b | (~d)), a, b, x, s, t); }

function safe_add(x, y) { var lsw = (x & 65535) + (y & 65535); var msw = (x >> 16) + (y >> 16) + (lsw >> 16); return (msw << 16) | (lsw & 65535); }

function bit_rol(num, cnt) { return (num << cnt) | (num >>> (32 - cnt)); }

function str2binl(str) { var bin = Array; var mask = (1 << chrsz) - 1; for (var i = 0; i < str.length * chrsz; i += chrsz) { bin[i >> 5] |= (str.charCodeAt(i / chrsz) & mask) << (i % 32); }	return bin; }

function binl2hex(binarray) { var str = ""; for (var i = 0; i < binarray.length * 4; i++) { str += hex_tab.charAt((binarray[i >> 2] >> ((i % 4) * 8 + 4)) & 15) + hex_tab.charAt((binarray[i >> 2] >> ((i % 4) * 8)) & 15); }	return str; }