User:EpochFail/Diff.js

Diff = new Object

/**	 * Split Text * 	 * Splits text into words */	Diff.splitText = function(text){ // convert strange spaces text = text.replace(/[\t\u000b\u00a0\u2028\u2029]+/g, ' ') var pattern = /[\w]+|\[\[|\]\]|\{\{|\}\}|\n+| +|&\w+;|'||=+|\{\||\|\}|\|\-|./g var wordList = [] var result do { result = pattern.exec(text) if (result != null) { wordList.push(result[0]) }		} while (result != null) return wordList }	/**	 * Diff text *	 * Performs a diff over an old and new set of text */	Diff.diffText = function(oldText, newText){ return this.diffWords(this.splitText(oldText), this.splitText(newText)) }	/**	 * Diff Words *	 * Performs a diff over an old and new word list recursively. */	Diff.diffWords = function(oldWords, newWords, newStart, newEnd, oldStart, oldEnd, recursionLevel) { var symbol = new Object; symbol.newCtr = [] symbol.oldCtr = [] symbol.toNew = [] symbol.toOld = [] var text = { "oldWords": oldWords, "newWords": newWords }		// set defaults newStart = newStart || 0 newEnd = newEnd || text.newWords.length oldStart = oldStart || 0 oldEnd = oldEnd || text.oldWords.length recursionLevel = recursionLevel || 0 // limit recursion depth if (recursionLevel > 10) { return text }		// pass 1: parse new text into symbol table s		var word; for (var i = newStart; i < newEnd; i ++) { word = text.newWords[i] // add new entry to symbol table if ( symbol[word] == null) { symbol[word] = { newCtr: 0, oldCtr: 0, toNew: null, toOld: null }; }		// increment symbol table word counter for new text symbol[word].newCtr ++ // add last word number in new text symbol[word].toNew = i		} // pass 2: parse old text into symbol table for (var j = oldStart; j < oldEnd; j ++) { word = text.oldWords[j] // add new entry to symbol table if ( symbol[word] == null) { symbol[word] = { newCtr: 0, oldCtr: 0, toNew: null, toOld: null } }		// increment symbol table word counter for old text symbol[word].oldCtr++ // add last word number in old text symbol[word].toOld = j		} // pass 3: connect unique words for (var i in symbol) { // find words in the symbol table that occur only once in both versions if ( (symbol[i].newCtr == 1) && (symbol[i].oldCtr == 1) ) { var toNew = symbol[i].toNew var toOld = symbol[i].toOld // do not use spaces as unique markers if ( ! /\s/.test( text.newWords[toNew] ) ) { // connect from new to old and from old to new text.newToOld[toNew] = toOld text.oldToNew[toOld] = toNew }			}		}		// pass 4: connect adjacent identical words downwards for (var i = newStart; i < newEnd - 1; i ++) { // find already connected pairs if (text.newToOld[i] != null) { j = text.newToOld[i] // check if the following words are not yet connected if ( (text.newToOld[i + 1] == null) && (text.oldToNew[j + 1] == null) ) { // if the following words are the same connect them if ( text.newWords[i + 1] == text.oldWords[j + 1] ) { text.newToOld[i + 1] = j + 1 text.oldToNew[j + 1] = i + 1 }				}			}		}		// pass 5: connect adjacent identical words upwards for (var i = newEnd - 1; i > newStart; i --) { // find already connected pairs if (text.newToOld[i] != null) { j = text.newToOld[i] // check if the preceeding words are not yet connected if ( (text.newToOld[i - 1] == null) && (text.oldToNew[j - 1] == null) ) { // if the preceeding words are the same connect them if ( text.newWords[i - 1] == text.oldWords[j - 1] ) { text.newToOld[i - 1] = j - 1 text.oldToNew[j - 1] = i - 1 }				}			}		}		// recursively diff still unresolved regions downwards if (wDiffRecursiveDiff) { i = newStart j = oldStart while (i < newEnd) { if (text.newToOld[i - 1] != null) { j = text.newToOld[i - 1] + 1 }		// check for the start of an unresolved sequence if ( (text.newToOld[i] == null) && (text.oldToNew[j] == null) ) { // determine the ends of the sequences var iStart = i					var iEnd = i					while ( (text.newToOld[iEnd] == null) && (iEnd < newEnd) ) { iEnd++ }					var iLength = iEnd - iStart var jStart = j					var jEnd = j					while ( (text.oldToNew[jEnd] == null) && (jEnd < oldEnd) ) { jEnd ++; }					var jLength = jEnd - jStart // recursively diff the unresolved sequence if ( (iLength > 0) && (jLength > 0) ) { if ( (iLength > 1) || (jLength > 1) ) { if ( (iStart != newStart) || (iEnd != newEnd) || (jStart != oldStart) || (jEnd != oldEnd) ) { WDiffText(text, iStart, iEnd, jStart, jEnd, recursionLevel + 1) }						}					}					i = iEnd }				else { i ++ }			}		}		// recursively diff still unresolved regions upwards if (wDiffRecursiveDiff) { i = newEnd - 1 j = oldEnd - 1 while (i >= newStart) { if (text.newToOld[i + 1] != null) { j = text.newToOld[i + 1] - 1 }		// check for the start of an unresolved sequence if ( (text.newToOld[i] == null) && (text.oldToNew[j] == null) ) { // determine the ends of the sequences var iStart = i					var iEnd = i + 1 while ( (text.newToOld[iStart - 1] == null) && (iStart >= newStart) ) { iStart -- }					var iLength = iEnd - iStart var jStart = j					var jEnd = j + 1 while ( (text.oldToNew[jStart - 1] == null) && (jStart >= oldStart) ) { jStart -- }					var jLength = jEnd - jStart // recursively diff the unresolved sequence if ( (iLength > 0) && (jLength > 0) ) { if ( (iLength > 1) || (jLength > 1) ) { if ( (iStart != newStart) || (iEnd != newEnd) || (jStart != oldStart) || (jEnd != oldEnd) ) { WDiffText(text, iStart, iEnd, jStart, jEnd, recursionLevel + 1) }						}					}					i = iStart - 1 }				else { i-- }			}		}		return text }