User:Pilaf~enwiki/InstaView/Devel

/*

'''This version is not functional. Cacycle 01:48, 1 April 2007 (UTC)'''

InstaView - a Mediawiki to HTML converter in JavaScript Version 0.6.2 Last update: 11:11, 24 July 2006 (UTC) by Shtriter Andrew, http://meta.wikimedia.org/wiki/User:Shtriter Copyright (C) Pedro Fayolle 2005-2006 http://en.wikipedia.org/wiki/User:Pilaf Distributed under the BSD license

0.6.4

 * MD5 code is moved to md5.js
 * Multiple changes to parse_inline_wiki(str) function:
 * Changed categories handling: categories are displayed as catlinks now, not just deleted...
 * Added title attribute for all internal links
 * Apostrophes (') in a tags changed to double quotes (")
 * Fixed 2 bugs with trails of internal links (watch the Known bugs).
 * Fixed misrendering of short external links ([http://...] -> [1]): [#] in label changed with the ordered number.
 * Multiple changes to make_image(..) function
 * Fixed undefined width bug: if width wasn't defined it doesn't added to output (the word undefined was earlier).
 * Apostrophes (') in tags changed to double quotes (")
 * Added longdesc attribute
 * onError attribute moved from the beggining to the end of the img tag
 * Removed px suffix from the width attribute

0.6.3

 * Multiple changes to IV's output. There are (almost) no diffs between MW and IV for:
 * paragraphs
 * tables
 * tags
 * limple lists (not heavily tested yet)
 * Lot's of unnecessary linebreaks deleted.

0.6.2

 * The linebreaks are preserved now (customisable, can be used to output endline's tokens)
 * Added closing tags - almost XHTML compatible
 * Fix misrendering of the folowing elements:
 * Full support for
 * Content between tags
 * Tables on the lines that start with space (if there's no sp_lines before - 2do)

0.6.1

 * Fixed problem caused by \r characters
 * Improved inline formatting parser

0.6

 * Changed name to InstaView
 * Some major code reorganizations and factored out some common functions
 * Handled conversion of relative links (i.e. /foo)
 * Fixed misrendering of adjacent definition list items !!! Not fully
 * Fixed bug in table headings handling
 * Changed date format in signatures to reflect Mediawiki's output
 * Fixed handling of Image: ...
 * Updated MD5 function (hopefully it will work with UTF-8)
 * Fixed bug in handling of links inside images

To do:

 * 1) Add standart namespaces handling in addition to the local ns (i.e. '..' + InstaView.conf.locale.image + '|Image')
 * The reason is that Image, Category and other standart ns work in non-english wikis too.


 * 1) Urlencode the article name in href attribute
 * 2) Improve html-tags handling (i.e. don't parse block elements and tag)
 * 3) Improve transcluding (through AJAX)
 * 4) * Handle and tags depending on the URL (the page is transcluded or not)...
 * 5) * Substitute MagicWords (store the list in iw.conf?) and template's arguments
 * 6) Fix misrendering of the folowing elements:
 * 7) * Nested definition lists
 * 8) Validate the XHTML of output
 * 9) Support for coloured links (AJAX)
 * 10) Better support for (Ajax).
 * 11) Parser-based (as opposed to RegExp-based) inline wikicode handling (make it one-pass and bullet-proof)

Known bugs - for discussion

 * 1) Non-english words are ignored due to \w
 * Solved for cyrilic: \w -> [\wа-яё]
 * Use \0n sintax for other?
 * 1) Upcase chars should be ignored, they dont
 * Solved: \w - > [a-z]
 * Compare with MW's regexp

' + parse_inline_wiki(ll[0].substr(ll[0].indexOf('}')+1)));	return				case '-': endl(f('', $(/\|-*(.*)/)[1])); break				default: parse_table_data			} else if ($('!')) parse_table_data			//else sh;			// add new line token and shift the array of lines			/**/else endl('')		}	}	function parse_table_data	{		var td_line, match_i		// 1: "|+", '|' or '!'		// 2: Full string:		// all the chars before the "|" and "[" but not "||" if such pattern exists in the string ; and any way - the rest of the line		// (?: .. ) and (?! .. ) doesn't save the matches. So they aren't counted.		// The first matches for pattern agter the colon but not saves it.		// The second matches ig the pattern given after ! doesn't match the string.		// So:		// (?: .. )?(.*)$ - if the line before the '|' exists (see #3) return "substring"+"the_rest_of_the_line" ; otherwise return "the rest of the line" only		// (?!\|) - not a "|"		// 3: attribute - any minimal (maybe even 0-length) number of occurances of neither "[" nor "|" characters		// 4: The rest of the line - can be less that #2 - any chars till the end of the line 		var td_match = sh.match(/^\s*(\|\+|\||!)((?:([^[|]*?)\|(?!\|))?(.*))$/)		ltrim;		//alert('td_match =\n' + td_match);		if (td_match[1] == '|+') ps('')		//ps('>' + InstaView.br)		if (td_match[1] != '|+') {			// use || or !! as a cell separator depending on context			// NOTE: when split is passed a regexp make sure to use non-capturing brackets td_line = td_match[match_i].split((td_match[1] == '|')? '||': /(?:\|\||!!)/) ps(parse_inline_wiki(td_line.shift)) while (td_line.length) ll.unshift(td_match[1] + td_line.pop) } else ps(td_match[match_i]) var tc = 0, td = [] while (remain) { if ($('|')) { if (!tc) break // we're at the outer-most level (no nested tables), skip to td parse else if (_(1)=='}') tc-- }		else if (!tc && $('!')) break else if ($('{|')) tc++ td.push(sh); ltrim; }		if (td.length) ps(InstaView.convert(td)) //add closing or and new line token /**/ps('' + InstaView.br); }	function parse_sp_lines {		//close paragraph if it was opened /**/endP;//p=0 ps(' '); while (_(0)==' ' && remain) { /*if ( !remain || dont_parse ) break; this.inPre = true; */			endl(parse_inline_wiki(ll[0].substring(1))); }		if (this.inPre) ps(''); }	function parse_block_image {		//ps(parse_image(sh)) //add new line token /**/ps(parse_image(sh) + InstaView.br) }

function parse_image(str) {		// get what's in between "" var tag = str.substring(InstaView.conf.locale.image.length + 3, str.length - 2); var width; var attr = [], filename, caption = ''; var thumb=0, frame=0, center=0; var align=''; if (tag.match(/\|/)) { // manage nested links var nesting = 0; var last_attr; for (var i = tag.length-1; i > 0; i--) { if (tag.charAt(i) == '|' && !nesting) { last_attr = tag.substr(i+1); tag = tag.substring(0, i); break; } else switch (tag.substr(i-1, 2)) { case ']]': nesting++; i--; break; case '[[':						nesting--;						i--;				}			}			attr = tag.split(/\s*\|\s*/);			attr.push(last_attr);			filename = attr.shift;			var w_match;			for (attr.length; attr.shift)			if (w_match = attr[0].match(/^(\d*)px$/)) width = w_match[1]			else switch(attr[0]) {				case 'thumb':				case 'thumbnail':					thumb=true;				case 'frame':					frame=true;					break;				case 'none':				case 'right':				case 'left':					center=false;					align=attr[0];					break;				case 'center':					center=true;					align='none';					break;				default:					if (attr.length == 1) caption = attr[0];			}		} else filename = tag;		var o=;		if (frame) {			if (align==) align = 'right';			o += f('', align);			if (thumb) {				if (!width) width = InstaView.conf.wiki.default_thumb_width;				o += f('?', 2+width*1, make_image(filename, caption, width)) +					f('  ? ',						InstaView.conf.paths.articles + InstaView.conf.locale.image + ':' + filename,						InstaView.conf.paths.magnify_icon,						parse_inline_wiki(caption)					)			} else {				o += ' ' + make_image(filename, caption) + f(' ? ', parse_inline_wiki(caption))			}			o += '  ';		} else if (align != ) {			o += f(' ?  ', align, make_image(filename, caption, width));		} else {			return make_image(filename, caption, width);		}		//alert(width);		return center? f(' ? ', o): o;	}	function make_image(filename, caption, width)	{	 	// uppercase first letter in file name		filename = filename.charAt(0).toUpperCase + filename.substr(1);		// replace spaces with underscores		filename = filename.replace(/ /g, '_');		caption = strip_inline_wiki(caption);		var md5 = hex_md5(filename);		var source = md5.charAt(0) + '/' + md5.substr(0,2) + '/' + filename;		width = (width) ? 'width="' + width + '"' : ;		var img = f('', InstaView.conf.paths.images + source, (caption!=)? 'alt="' + caption + '"' : , InstaView.conf.paths.articles + InstaView.conf.locale.image + ':' + filename, width, InstaView.conf.paths.images_fallback + source);		return f('?', InstaView.conf.paths.articles + InstaView.conf.locale.image + ':' + filename, (caption!=)? 'title="' + caption + '"' : , img);	}	function parse_inline_images(str)	{		var start, substart=0, nestlev=0;		var loop, close, open, wiki, html;		while (-1 != (start=str.indexOf(, substart))) {			if(str.substr(start+2).match(RegExp('^' + InstaView.conf.locale.image + ':','i'))) {				loop=true;				substart=start;				do {					substart+=2;					close=str.indexOf(,substart);					open=str.indexOf('[[',substart);					if (close<=open||open==-1) {						if (close==-1) return str;						substart=close;						if (nestlev) {							nestlev--;						} else {							wiki=str.substring(start,close+2);							html=parse_image(wiki);							str=str.replace(wiki,html);							substart=start+html.length;							loop=false;						}					} else {						substart=open;						nestlev++;					}				} while (loop)			} else break;		}		return str;	}	// the output of this function doesn't respect the FILO structure of HTML	// but since most browsers can handle it I'll save myself the hassle	function parse_inline_formatting(str)	{		var em,st,i,li,o=;		while ((i=str.indexOf("",li))+1) {			o += str.substring(li,i);			li=i+2;			if (str.charAt(i+2)=="'") {				li++;				st=!st;				//o+=st?' ':' ';				//MW uses  and 				/**/o+=st?:;

} else { em=!em; //o+=em?' ':' '; /**/o+=em?':'; }		}		return o+str.substr(li); }	function parse_inline_wiki(str) {		var aux_match; str = parse_inline_images(str); str = parse_inline_formatting(str); // math while (aux_match = str.match(/<(?:)math>(.*?)<\/math>/i)) { var math_md5 = hex_md5(aux_match[1]); str = str.replace(aux_match[0], f('', InstaView.conf.paths.math+math_md5)); }		// Build a Mediawiki-formatted date string var date = new Date; var minutes = date.getUTCMinutes; if (minutes < 10) minutes = '0' + minutes; var date = f("?:?, ? ? ? (UTC)", date.getUTCHours, minutes, date.getUTCDate, InstaView.conf.locale.months[date.getUTCMonth], date.getUTCFullYear); //alert('Строка' + urlencode('Строка') ); // text formatting return str. // signatures replace(/~{5}(?!~)/g, date). replace(/~{4}(?!~)/g, InstaView.conf.user.name+' '+date). replace(/~{3}(?!~)/g, InstaView.conf.user.name). /*			//**********************************			// and replace(/\{\{\{(.*?)(?:\|(.*?))?\}\}\}/g, this.replaceArguments). // 			replace(/\{\{([^\]]*?:)?(.*?)(?:\|(.*?))?\}\}/g, this.replaceTemplates). //**********************************				// 2Do: Urlencode the article name in href attribute // Category:..., Image:..., etc... replace(RegExp('\\[\\[:((?:'+InstaView.conf.locale.category+'|'+InstaView.conf.locale.image+'|'+InstaView.conf.wiki.interwiki+'):.*?)\\]\\]','gi'), '<a href="'+InstaView.conf.paths.articles+'$1" title="$1">$1</a>'). replace(RegExp('\\[\\[('+InstaView.conf.locale.category+'|'+InstaView.conf.wiki.interwiki+'):(.*?)\\]\\]','gi'), ' <a href="'+InstaView.conf.paths.articles+'$1:$2" title="$1:$2">$2</a> '). // /Relative links replace(/\[\[(\/[^|]*?)\]\]/g, f('<a href="?$1" title="$1">$1</a>', location)). // Relative links replace(/\[\[(\/.*?)\|(.+?)\]\]/g, f('<a href="?$1" title="$1">$2</a>', location)). // Bug: Non-english words are ignored due to \w // 	Solved for cyrilic: \w -> [\wа-яё] // Bug: Upcase chars should be ignored, they dont //	Solved: \w - > [a-z] // Common linkswith_trail replace(/\[\[([^|]*?)\]\]([a-zа-яё]*)/g, f('<a href="?$1" title="$1">$1$2</a>', InstaView.conf.paths.articles)). // Linkswith_trail replace(/\[\[(.*?)\|([^\]]+?)\]\]([a-zа-яё]*)/g, f('<a href="?$1" title="$1">$2$3</a>', InstaView.conf.paths.articles)). // Stripped:Namespace replace(/\[\[([^\]]*?:)?(.*?)( *\(.*?\))?\|\]\]/g, f('<a href="?$1$2$3" title="$1$2$3">$2</a>', InstaView.conf.paths.articles)). // External links replace(/\[(https?|news|ftp|mailto|gopher|irc):(\/*)([^\]]*?) (.*?)\]/g, '<a href="$1:$2$3">$4</a>'). replace(/\[http:\/\/(.*?)\]/g, '<a href="http://$1">[#]</a>'). replace(/\[(news|ftp|mailto|gopher|irc):(\/*)(.*?)\]/g, '<a href="$1:$2$3">$1:$2$3</a>'). replace(/(^| )(https?|news|ftp|mailto|gopher|irc):(\/*)([^ $]*)/g, '$1<a href="$2:$3$4">$2:$3$4</a>'). replace(,). replace(,); }	function strip_inline_wiki(str) {		return str .replace(/\[\^\*\|(.*?)\]\]/g,'$1') .replace(/\[\[(.*?)\]\]/g,'$1') .replace(/(.*?)/g,'$1'); }	// begin parsing do { parse_nowiki; parse_pre; //dont_parse; if (!remain) break; //parse headings if ($(/^(={1,6})(.*)\1(.*)$/)) { //close paragraph if it was opened /**/endP;//p=0 endl(f('<h?>?</h?>?', $r[1].length, parse_inline_wiki($r[2]), $r[1].length, $r[3])) //alert('headings processed:\n' + o)		//parse lists } else if ($(/^[*#:;]/)) { //close paragraph if it was opened /**/endP;//p=0 /**/ps(InstaView.br); parse_list //alert('lists processed:\n' + o)		//parse tables } else if ($(/^(?:\s*)\{\|/)) { //alert('ll[0] = "' + ll[0] + '"') //close paragraph if it was opened /**/endP;//p=0 parse_table //alert('tables processed:\n' + o)		//parse lines that start with space } else if ( _(0) == ' ') { parse_sp_lines //alert('spaced lines processed:\n' + o)		//do the } else if ($(/^+$/)) { //close paragraph if it was opened /**/endP;//p=0 endl(' ') //alert('horizontal lines processed:\n' + o)		//parse images } else if ($(InstaView.BLOCK_IMAGE)) { //close paragraph if it was opened /**/endP;//p=0 parse_block_image //alert('images processed:\n' + o)		} else { //escape < /pre > tag cause there's no opening tag and it should be treaten as text ll[0] = ll[0].replace( '</'+'pre>', '&lt;/pre&gt;') // handle paragraphs if (trim(ll[0]) == '') { //blank line //if (p = (remain>1 && ll[1]==(''))) endl(' ') //if para was opened - close it				//if 2 empty strings - add hard line break if ( endP ) if ( remain>1 ){ ps(' '); p=1; /**/	if (trim(ll[1])==('') ) /**/		{sh; ps(' '); p=1} } else break; } else { if(!p) { ps(' ') p=1 }				if (remain<2) { //alert('The last line:' + ll[0]); ps(parse_inline_wiki(ll[0])); endP; break; } else ps(parse_inline_wiki(ll[0])); //add new line token //ps(parse_inline_wiki(ll[0]) + InstaView.br) //alert('Add inblockElem - in order to disable the creation of new paragraph' + 				//'for such elements as:\n tables, headings, lists and so on.' );

}			//alert('paragraphs processed:\n' + o)			//sh; //add new line token and shift the array of lines /**/endl(''); }	} while (remain) //add closing /**/o = o.replace(/(<\/t[dh]>\s*)(<tr (.*)>|<\/table>)/gim, '$1 '+ InstaView.br +'$2') /**/.replace(/ /gim, ' ') //escape closing < / nowiki > tags /**/.replace(/<\/nowiki>/i, '&lt;/nowiki&gt;'); CatLinks = o.match(/ (.*?)<\/span>/gim); if (CatLinks) {		o += ' '; for (i in CatLinks) {			alert(CatLinks[i]); o += CatLinks[i].replace(' style="display:none"', ''); if ( i != CatLinks.length-1 ) { alert(i); alert(CatLinks.length-1); o += ' | ';} }		o += ' '; }	/*// - maybe faster, but don't work :(((.	ShortExtLinks = o.match('[#]');	for (i in ShortExtLinks)		{		alert(ShortExtLinks[i] + ' ' + i);		o.replace(/\[#\]/m, '[' + i + ']');		}	*/	i=0;	while ( /\[#\]/m.test(o) ) o=o.replace(/\[#\]/m, '[' + ++i + ']');	//alert('"'+o+'"');	return o }