All Downloads are FREE. Search and download functionalities are using the official Maven repository.

report.js.wikEd.diff.js Maven / Gradle / Ivy

Go to download

Compares XML XSD schemas providing simple and straight differences report

There is a newer version: 1.1.0
Show newest version
// 

// ==UserScript==
// @name        wikEd diff
// @version     1.2.4
// @date        October 23, 2014
// @description improved word-based diff library with block move detection
// @homepage    https://en.wikipedia.org/wiki/User:Cacycle/diff
// @source      https://en.wikipedia.org/wiki/User:Cacycle/diff.js
// @author      Cacycle (https://en.wikipedia.org/wiki/User:Cacycle)
// @license     released into the public domain
// ==/UserScript==

/**
 * wikEd diff: inline-style difference engine with block move support
 *
 * Improved JavaScript diff library that returns html/css-formatted new text version with
 * highlighted deletions, insertions, and block moves. It is compatible with all browsers and is
 * not dependent on external libraries.
 *
 * WikEdDiff.php and the JavaScript library wikEd diff are synced one-to-one ports. Changes and
 * fixes are to be applied to both versions.
 *
 * JavaScript library (mirror): https://en.wikipedia.org/wiki/User:Cacycle/diff
 * JavaScript online tool: http://cacycle.altervista.org/wikEd-diff-tool.html
 * MediaWiki extension: https://www.mediawiki.org/wiki/Extension:wikEdDiff
 *
 * This difference engine applies a word-based algorithm that uses unique words as anchor points
 * to identify matching text and moved blocks (Paul Heckel: A technique for isolating differences
 * between files. Communications of the ACM 21(4):264 (1978)).
 *
 * Additional features:
 *
 * - Visual inline style, changes are shown in a single output text
 * - Block move detection and highlighting
 * - Resolution down to characters level
 * - Unicode and multilingual support
 * - Stepwise split (paragraphs, lines, sentences, words, characters)
 * - Recursive diff
 * - Optimized code for resolving unmatched sequences
 * - Minimization of length of moved blocks
 * - Alignment of ambiguous unmatched sequences to next line break or word border
 * - Clipping of unchanged irrelevant parts from the output (optional)
 * - Fully customizable
 * - Text split optimized for MediaWiki source texts
 * - Well commented and documented code
 *
 * Datastructures (abbreviations from publication):
 *
 * class WikEdDiffText:  diff text object (new or old version)
 *   .text                 text of version
 *   .words[]              word count table
 *   .first                index of first token in tokens list
 *   .last                 index of last token in tokens list
 *
 *   .tokens[]:          token list for new or old string (doubly-linked list) (N and O)
 *     .prev               previous list item
 *     .next               next list item
 *     .token              token string
 *     .link               index of corresponding token in new or old text (OA and NA)
 *     .number             list enumeration number
 *     .unique             token is unique word in text
 *
 * class WikEdDiff:      diff object
 *   .config[]:            configuration settings, see top of code for customization options
 *      .regExp[]:            all regular expressions
 *          .split             regular expressions used for splitting text into tokens
 *      .htmlCode            HTML code fragments used for creating the output
 *      .msg                 output messages
 *   .newText              new text
 *   .oldText              old text
 *   .maxWords             word count of longest linked block
 *   .html                 diff html
 *   .error                flag: result has not passed unit tests
 *   .bordersDown[]        linked region borders downwards, [new index, old index]
 *   .bordersUp[]          linked region borders upwards, [new index, old index]
 *   .symbols:             symbols table for whole text at all refinement levels
 *     .token[]              hash table of parsed tokens for passes 1 - 3, points to symbol[i]
 *     .symbol[]:            array of objects that hold token counters and pointers:
 *       .newCount             new text token counter (NC)
 *       .oldCount             old text token counter (OC)
 *       .newToken             token index in text.newText.tokens
 *       .oldToken             token index in text.oldText.tokens
 *     .linked               flag: at least one unique token pair has been linked
 *
 *   .blocks[]:            array, block data (consecutive text tokens) in new text order
 *     .oldBlock             number of block in old text order
 *     .newBlock             number of block in new text order
 *     .oldNumber            old text token number of first token
 *     .newNumber            new text token number of first token
 *     .oldStart             old text token index of first token
 *     .count                number of tokens
 *     .unique               contains unique linked token
 *     .words                word count
 *     .chars                char length
 *     .type                 '=', '-', '+', '|' (same, deletion, insertion, mark)
 *     .section              section number
 *     .group                group number of block
 *     .fixed                belongs to a fixed (not moved) group
 *     .moved                moved block group number corresponding with mark block
 *     .text                 text of block tokens
 *
 *   .sections[]:          array, block sections with no block move crosses outside a section
 *     .blockStart           first block in section
 *     .blockEnd             last block in section

 *   .groups[]:            array, section blocks that are consecutive in old text order
 *     .oldNumber            first block oldNumber
 *     .blockStart           first block index
 *     .blockEnd             last block index
 *     .unique               contains unique linked token
 *     .maxWords             word count of longest linked block
 *     .words                word count
 *     .chars                char count
 *     .fixed                not moved from original position
 *     .movedFrom            group position this group has been moved from
 *     .color                color number of moved group
 *
 *   .fragments[]:         diff fragment list ready for markup, abstraction layer for customization
 *     .text                 block or mark text
 *     .color                moved block or mark color number
 *     .type                 '=', '-', '+'   same, deletion, insertion
 *                           '<', '>'        mark left, mark right
 *                           '(<', '(>', ')' block start and end
 *                           '~', ' ~', '~ ' omission indicators
 *                           '[', ']', ','   fragment start and end, fragment separator
 *                           '{', '}'        container start and end
 *
 */

// JSHint options
/* jshint -W004, -W100, newcap: true, browser: true, jquery: true, sub: true, bitwise: true,
	curly: true, evil: true, forin: true, freeze: true, globalstrict: true, immed: true,
	latedef: true, loopfunc: true, quotmark: single, strict: true, undef: true */
/* global console */

// Turn on ECMAScript 5 strict mode
'use strict';

/** Define global objects. */
var wikEdDiffConfig;
var WED;


/**
 * wikEd diff main class.
 *
 * @class WikEdDiff
 */
var WikEdDiff = function () {

	/** @var array config Configuration and customization settings. */
	this.config = {

		/** Core diff settings (with default values). */

		/**
		 * @var bool config.fullDiff
		 *   Show complete un-clipped diff text (false)
		 */
		'fullDiff': true,

		/**
		 * @var bool config.showBlockMoves
		 *   Enable block move layout with highlighted blocks and marks at the original positions (true)
		 */
		'showBlockMoves': true,

		/**
		 * @var bool config.charDiff
		 *   Enable character-refined diff (true)
		 */
		'charDiff': true,

		/**
		 * @var bool config.repeatedDiff
		 *   Enable repeated diff to resolve problematic sequences (true)
		 */
		'repeatedDiff': true,

		/**
		 * @var bool config.recursiveDiff
		 *   Enable recursive diff to resolve problematic sequences (true)
		 */
		'recursiveDiff': true,

		/**
		 * @var int config.recursionMax
		 *   Maximum recursion depth (10)
		 */
		'recursionMax': 10,

		/**
		 * @var bool config.unlinkBlocks
		 *   Reject blocks if they are too short and their words are not unique,
		 *   prevents fragmentated diffs for very different versions (true)
		 */
		'unlinkBlocks': true,

		/**
		 * @var int config.unlinkMax
		 *   Maximum number of rejection cycles (5)
		 */
		'unlinkMax': 5,

		/**
		 * @var int config.blockMinLength
		 *   Reject blocks if shorter than this number of real words (3)
		 */
		'blockMinLength': 3,

		/**
		 * @var bool config.coloredBlocks
		 *   Display blocks in differing colors (rainbow color scheme) (false)
		 */
		'coloredBlocks': false,

		/**
		 * @var bool config.coloredBlocks
		 *   Do not use UniCode block move marks (legacy browsers) (false)
		 */
		'noUnicodeSymbols': false,

		/**
		 * @var bool config.stripTrailingNewline
		 *   Strip trailing newline off of texts (true in .js, false in .php)
		 */
		'stripTrailingNewline': true,

		/**
		 * @var bool config.debug
		 *   Show debug infos and stats (block, group, and fragment data) in debug console (false)
		 */
		'debug': false,

		/**
		 * @var bool config.timer
		 *   Show timing results in debug console (false)
		 */
		'timer': false,

		/**
		 * @var bool config.unitTesting
		 *   Run unit tests to prove correct working, display results in debug console (false)
		 */
		'unitTesting': false,

		/** RegExp character classes. */

		// UniCode letter support for regexps
		// From http://xregexp.com/addons/unicode/unicode-base.js v1.0.0
		'regExpLetters':
			'a-zA-Z0-9' + (
				'00AA00B500BA00C0-00D600D8-00F600F8-02C102C6-02D102E0-02E402EC02EE0370-037403760377037A-' +
				'037D03860388-038A038C038E-03A103A3-03F503F7-0481048A-05270531-055605590561-058705D0-05EA' +
				'05F0-05F20620-064A066E066F0671-06D306D506E506E606EE06EF06FA-06FC06FF07100712-072F074D-' +
				'07A507B107CA-07EA07F407F507FA0800-0815081A082408280840-085808A008A2-08AC0904-0939093D' +
				'09500958-09610971-09770979-097F0985-098C098F09900993-09A809AA-09B009B209B6-09B909BD09CE' +
				'09DC09DD09DF-09E109F009F10A05-0A0A0A0F0A100A13-0A280A2A-0A300A320A330A350A360A380A39' +
				'0A59-0A5C0A5E0A72-0A740A85-0A8D0A8F-0A910A93-0AA80AAA-0AB00AB20AB30AB5-0AB90ABD0AD00AE0' +
				'0AE10B05-0B0C0B0F0B100B13-0B280B2A-0B300B320B330B35-0B390B3D0B5C0B5D0B5F-0B610B710B83' +
				'0B85-0B8A0B8E-0B900B92-0B950B990B9A0B9C0B9E0B9F0BA30BA40BA8-0BAA0BAE-0BB90BD00C05-0C0C' +
				'0C0E-0C100C12-0C280C2A-0C330C35-0C390C3D0C580C590C600C610C85-0C8C0C8E-0C900C92-0CA80CAA-' +
				'0CB30CB5-0CB90CBD0CDE0CE00CE10CF10CF20D05-0D0C0D0E-0D100D12-0D3A0D3D0D4E0D600D610D7A-' +
				'0D7F0D85-0D960D9A-0DB10DB3-0DBB0DBD0DC0-0DC60E01-0E300E320E330E40-0E460E810E820E840E87' +
				'0E880E8A0E8D0E94-0E970E99-0E9F0EA1-0EA30EA50EA70EAA0EAB0EAD-0EB00EB20EB30EBD0EC0-0EC4' +
				'0EC60EDC-0EDF0F000F40-0F470F49-0F6C0F88-0F8C1000-102A103F1050-1055105A-105D106110651066' +
				'106E-10701075-1081108E10A0-10C510C710CD10D0-10FA10FC-1248124A-124D1250-12561258125A-125D' +
				'1260-1288128A-128D1290-12B012B2-12B512B8-12BE12C012C2-12C512C8-12D612D8-13101312-1315' +
				'1318-135A1380-138F13A0-13F41401-166C166F-167F1681-169A16A0-16EA1700-170C170E-17111720-' +
				'17311740-17511760-176C176E-17701780-17B317D717DC1820-18771880-18A818AA18B0-18F51900-191C' +
				'1950-196D1970-19741980-19AB19C1-19C71A00-1A161A20-1A541AA71B05-1B331B45-1B4B1B83-1BA0' +
				'1BAE1BAF1BBA-1BE51C00-1C231C4D-1C4F1C5A-1C7D1CE9-1CEC1CEE-1CF11CF51CF61D00-1DBF1E00-1F15' +
				'1F18-1F1D1F20-1F451F48-1F4D1F50-1F571F591F5B1F5D1F5F-1F7D1F80-1FB41FB6-1FBC1FBE1FC2-1FC4' +
				'1FC6-1FCC1FD0-1FD31FD6-1FDB1FE0-1FEC1FF2-1FF41FF6-1FFC2071207F2090-209C21022107210A-2113' +
				'21152119-211D212421262128212A-212D212F-2139213C-213F2145-2149214E218321842C00-2C2E2C30-' +
				'2C5E2C60-2CE42CEB-2CEE2CF22CF32D00-2D252D272D2D2D30-2D672D6F2D80-2D962DA0-2DA62DA8-2DAE' +
				'2DB0-2DB62DB8-2DBE2DC0-2DC62DC8-2DCE2DD0-2DD62DD8-2DDE2E2F300530063031-3035303B303C3041-' +
				'3096309D-309F30A1-30FA30FC-30FF3105-312D3131-318E31A0-31BA31F0-31FF3400-4DB54E00-9FCC' +
				'A000-A48CA4D0-A4FDA500-A60CA610-A61FA62AA62BA640-A66EA67F-A697A6A0-A6E5A717-A71FA722-' +
				'A788A78B-A78EA790-A793A7A0-A7AAA7F8-A801A803-A805A807-A80AA80C-A822A840-A873A882-A8B3' +
				'A8F2-A8F7A8FBA90A-A925A930-A946A960-A97CA984-A9B2A9CFAA00-AA28AA40-AA42AA44-AA4BAA60-' +
				'AA76AA7AAA80-AAAFAAB1AAB5AAB6AAB9-AABDAAC0AAC2AADB-AADDAAE0-AAEAAAF2-AAF4AB01-AB06AB09-' +
				'AB0EAB11-AB16AB20-AB26AB28-AB2EABC0-ABE2AC00-D7A3D7B0-D7C6D7CB-D7FBF900-FA6DFA70-FAD9' +
				'FB00-FB06FB13-FB17FB1DFB1F-FB28FB2A-FB36FB38-FB3CFB3EFB40FB41FB43FB44FB46-FBB1FBD3-FD3D' +
				'FD50-FD8FFD92-FDC7FDF0-FDFBFE70-FE74FE76-FEFCFF21-FF3AFF41-FF5AFF66-FFBEFFC2-FFC7FFCA-' +
				'FFCFFFD2-FFD7FFDA-FFDC'
			).replace( /(\w{4})/g, '\\u$1' ),

		// New line characters without and with \n and \r
		'regExpNewLines': '\\u0085\\u2028',
		'regExpNewLinesAll': '\\n\\r\\u0085\\u2028',

		// Breaking white space characters without \n, \r, and \f
		'regExpBlanks': ' \\t\\x0b\\u2000-\\u200b\\u202f\\u205f\\u3000',

		// Full stops without '.'
		'regExpFullStops':
			'\\u0589\\u06D4\\u0701\\u0702\\u0964\\u0DF4\\u1362\\u166E\\u1803\\u1809' +
			'\\u2CF9\\u2CFE\\u2E3C\\u3002\\uA4FF\\uA60E\\uA6F3\\uFE52\\uFF0E\\uFF61',

		// New paragraph characters without \n and \r
		'regExpNewParagraph': '\\f\\u2029',

		// Exclamation marks without '!'
		'regExpExclamationMarks':
			'\\u01C3\\u01C3\\u01C3\\u055C\\u055C\\u07F9\\u1944\\u1944' +
			'\\u203C\\u203C\\u2048\\u2048\\uFE15\\uFE57\\uFF01',

		// Question marks without '?'
		'regExpQuestionMarks':
			'\\u037E\\u055E\\u061F\\u1367\\u1945\\u2047\\u2049' +
			'\\u2CFA\\u2CFB\\u2E2E\\uA60F\\uA6F7\\uFE56\\uFF1F',

		/** Clip settings. */

		// Find clip position: characters from right
		'clipHeadingLeft':      1500,
		'clipParagraphLeftMax': 1500,
		'clipParagraphLeftMin':  500,
		'clipLineLeftMax':      1000,
		'clipLineLeftMin':       500,
		'clipBlankLeftMax':     1000,
		'clipBlankLeftMin':      500,
		'clipCharsLeft':         500,

		// Find clip position: characters from right
		'clipHeadingRight':      1500,
		'clipParagraphRightMax': 1500,
		'clipParagraphRightMin':  500,
		'clipLineRightMax':      1000,
		'clipLineRightMin':       500,
		'clipBlankRightMax':     1000,
		'clipBlankRightMin':      500,
		'clipCharsRight':         500,

		// Maximum number of lines to search for clip position
		'clipLinesRightMax': 10,
		'clipLinesLeftMax': 10,

		// Skip clipping if ranges are too close
		'clipSkipLines': 5,
		'clipSkipChars': 1000,

		// Css stylesheet
		'cssMarkLeft': '◀',
		'cssMarkRight': '▶',
		'stylesheet':

			// Insert
			'.wikEdDiffInsert {' +
			'font-weight: bold; background-color: #bbddff; ' +
			'color: #222; border-radius: 0.25em; padding: 0.2em 1px; ' +
			'} ' +
			'.wikEdDiffInsertBlank { background-color: #66bbff; } ' +
			'.wikEdDiffFragment:hover .wikEdDiffInsertBlank { background-color: #bbddff; } ' +

			// Delete
			'.wikEdDiffDelete {' +
			'font-weight: bold; background-color: #ffe49c; ' +
			'color: #222; border-radius: 0.25em; padding: 0.2em 1px; ' +
			'} ' +
			'.wikEdDiffDeleteBlank { background-color: #ffd064; } ' +
			'.wikEdDiffFragment:hover .wikEdDiffDeleteBlank { background-color: #ffe49c; } ' +

			// Block
			'.wikEdDiffBlock {' +
			'font-weight: bold; background-color: #e8e8e8; ' +
			'border-radius: 0.25em; padding: 0.2em 1px; margin: 0 1px; ' +
			'} ' +
			'.wikEdDiffBlock { } ' +
			'.wikEdDiffBlock0 { background-color: #ffff80; } ' +
			'.wikEdDiffBlock1 { background-color: #d0ff80; } ' +
			'.wikEdDiffBlock2 { background-color: #ffd8f0; } ' +
			'.wikEdDiffBlock3 { background-color: #c0ffff; } ' +
			'.wikEdDiffBlock4 { background-color: #fff888; } ' +
			'.wikEdDiffBlock5 { background-color: #bbccff; } ' +
			'.wikEdDiffBlock6 { background-color: #e8c8ff; } ' +
			'.wikEdDiffBlock7 { background-color: #ffbbbb; } ' +
			'.wikEdDiffBlock8 { background-color: #a0e8a0; } ' +
			'.wikEdDiffBlockHighlight {' +
			'background-color: #777; color: #fff; ' +
			'border: solid #777; border-width: 1px 0; ' +
			'} ' +

			// Mark
			'.wikEdDiffMarkLeft, .wikEdDiffMarkRight {' +
			'font-weight: bold; background-color: #ffe49c; ' +
			'color: #666; border-radius: 0.25em; padding: 0.2em; margin: 0 1px; ' +
			'} ' +
			'.wikEdDiffMarkLeft:before { content: "{cssMarkLeft}"; } ' +
			'.wikEdDiffMarkRight:before { content: "{cssMarkRight}"; } ' +
			'.wikEdDiffMarkLeft.wikEdDiffNoUnicode:before { content: "<"; } ' +
			'.wikEdDiffMarkRight.wikEdDiffNoUnicode:before { content: ">"; } ' +
			'.wikEdDiffMark { background-color: #e8e8e8; color: #666; } ' +
			'.wikEdDiffMark0 { background-color: #ffff60; } ' +
			'.wikEdDiffMark1 { background-color: #c8f880; } ' +
			'.wikEdDiffMark2 { background-color: #ffd0f0; } ' +
			'.wikEdDiffMark3 { background-color: #a0ffff; } ' +
			'.wikEdDiffMark4 { background-color: #fff860; } ' +
			'.wikEdDiffMark5 { background-color: #b0c0ff; } ' +
			'.wikEdDiffMark6 { background-color: #e0c0ff; } ' +
			'.wikEdDiffMark7 { background-color: #ffa8a8; } ' +
			'.wikEdDiffMark8 { background-color: #98e898; } ' +
			'.wikEdDiffMarkHighlight { background-color: #777; color: #fff; } ' +

			// Wrappers
			'.wikEdDiffContainer { } ' +
			'.wikEdDiffFragment {' +
			'white-space: pre-wrap; background: #fff; border: #bbb solid; ' +
			'border-width: 1px 1px 1px 0.5em; border-radius: 0.5em; font-family: sans-serif; ' +
			'font-size: 88%; line-height: 1.6; box-shadow: 2px 2px 2px #ddd; padding: 1em; margin: 0; ' +
			'} ' +
			'.wikEdDiffNoChange { background: #f0f0f0; border: 1px #bbb solid; border-radius: 0.5em; ' +
			'line-height: 1.6; box-shadow: 2px 2px 2px #ddd; padding: 0.5em; margin: 1em 0; ' +
			'text-align: center; ' +
			'} ' +
			'.wikEdDiffSeparator { margin-bottom: 1em; } ' +
			'.wikEdDiffOmittedChars { } ' +

			// Newline
			'.wikEdDiffNewline:before { content: "¶"; color: transparent; } ' +
			'.wikEdDiffBlock:hover .wikEdDiffNewline:before { color: #aaa; } ' +
			'.wikEdDiffBlockHighlight .wikEdDiffNewline:before { color: transparent; } ' +
			'.wikEdDiffBlockHighlight:hover .wikEdDiffNewline:before { color: #ccc; } ' +
			'.wikEdDiffBlockHighlight:hover .wikEdDiffInsert .wikEdDiffNewline:before, ' +
			'.wikEdDiffInsert:hover .wikEdDiffNewline:before' +
			'{ color: #999; } ' +
			'.wikEdDiffBlockHighlight:hover .wikEdDiffDelete .wikEdDiffNewline:before, ' +
			'.wikEdDiffDelete:hover .wikEdDiffNewline:before' +
			'{ color: #aaa; } ' +

			// Tab
			'.wikEdDiffTab { position: relative; } ' +
			'.wikEdDiffTabSymbol { position: absolute; top: -0.2em; } ' +
			'.wikEdDiffTabSymbol:before { content: "→"; font-size: smaller; color: #ccc; } ' +
			'.wikEdDiffBlock .wikEdDiffTabSymbol:before { color: #aaa; } ' +
			'.wikEdDiffBlockHighlight .wikEdDiffTabSymbol:before { color: #aaa; } ' +
			'.wikEdDiffInsert .wikEdDiffTabSymbol:before { color: #aaa; } ' +
			'.wikEdDiffDelete .wikEdDiffTabSymbol:before { color: #bbb; } ' +

			// Space
			'.wikEdDiffSpace { position: relative; } ' +
			'.wikEdDiffSpaceSymbol { position: absolute; top: -0.2em; left: -0.05em; } ' +
			'.wikEdDiffSpaceSymbol:before { content: "·"; color: transparent; } ' +
			'.wikEdDiffBlock:hover .wikEdDiffSpaceSymbol:before { color: #999; } ' +
			'.wikEdDiffBlockHighlight .wikEdDiffSpaceSymbol:before { color: transparent; } ' +
			'.wikEdDiffBlockHighlight:hover .wikEdDiffSpaceSymbol:before { color: #ddd; } ' +
			'.wikEdDiffBlockHighlight:hover .wikEdDiffInsert .wikEdDiffSpaceSymbol:before,' +
			'.wikEdDiffInsert:hover .wikEdDiffSpaceSymbol:before ' +
			'{ color: #888; } ' +
			'.wikEdDiffBlockHighlight:hover .wikEdDiffDelete .wikEdDiffSpaceSymbol:before,' +
			'.wikEdDiffDelete:hover .wikEdDiffSpaceSymbol:before ' +
			'{ color: #999; } ' +

			// Error
			'.wikEdDiffError .wikEdDiffFragment,' +
			'.wikEdDiffError .wikEdDiffNoChange' +
			'{ background: #faa; }'
	};

	/** Add regular expressions to configuration settings. */

	this.config.regExp = {

		// RegExps for splitting text
		'split': {

			// Split into paragraphs, after double newlines
			'paragraph': new RegExp(
				'(\\r\\n|\\n|\\r){2,}|[' +
				this.config.regExpNewParagraph +
				']',
				'g'
			),

			// Split into lines
			'line': new RegExp(
				'\\r\\n|\\n|\\r|[' +
				this.config.regExpNewLinesAll +
				']',
				'g'
			),

			// Split into sentences /[^ ].*?[.!?:;]+(?= |$)/
			'sentence': new RegExp(
				'[^' +
				this.config.regExpBlanks +
				'].*?[.!?:;' +
				this.config.regExpFullStops +
				this.config.regExpExclamationMarks +
				this.config.regExpQuestionMarks +
				']+(?=[' +
				this.config.regExpBlanks +
				']|$)',
				'g'
			),

			// Split into inline chunks
			'chunk': new RegExp(
				'\\[\\[[^\\[\\]\\n]+\\]\\]|' +       // [[wiki link]]
				'\\{\\{[^\\{\\}\\n]+\\}\\}|' +       // {{template}}
				'\\[[^\\[\\]\\n]+\\]|' +             // [ext. link]
				'<\\/?[^<>\\[\\]\\{\\}\\n]+>|' +     // 
				'\\[\\[[^\\[\\]\\|\\n]+\\]\\]\\||' + // [[wiki link|
				'\\{\\{[^\\{\\}\\|\\n]+\\||' +       // {{template|
				'\\b((https?:|)\\/\\/)[^\\x00-\\x20\\s"\\[\\]\\x7f]+', // link
				'g'
			),

			// Split into words, multi-char markup, and chars
			// regExpLetters speed-up: \\w+
			'word': new RegExp(
				'(\\w+|[_' +
				this.config.regExpLetters +
				'])+([\'’][_' +
				this.config.regExpLetters +
				']*)*|\\[\\[|\\]\\]|\\{\\{|\\}\\}|&\\w+;|\'\'\'|\'\'|==+|\\{\\||\\|\\}|\\|-|.',
				'g'
			),

			// Split into chars
			'character': /./g
		},

		// RegExp to detect blank tokens
		'blankOnlyToken': new RegExp(
			'[^' +
			this.config.regExpBlanks +
			this.config.regExpNewLinesAll +
			this.config.regExpNewParagraph +
			']'
		),

		// RegExps for sliding gaps: newlines and space/word breaks
		'slideStop': new RegExp(
			'[' +
			this.config.regExpNewLinesAll +
			this.config.regExpNewParagraph +
			']$'
		),
		'slideBorder': new RegExp(
			'[' +
			this.config.regExpBlanks +
			']$'
		),

		// RegExps for counting words
		'countWords': new RegExp(
			'(\\w+|[_' +
			this.config.regExpLetters +
			'])+([\'’][_' +
			this.config.regExpLetters +
			']*)*',
			'g'
		),
		'countChunks': new RegExp(
			'\\[\\[[^\\[\\]\\n]+\\]\\]|' +       // [[wiki link]]
			'\\{\\{[^\\{\\}\\n]+\\}\\}|' +       // {{template}}
			'\\[[^\\[\\]\\n]+\\]|' +             // [ext. link]
			'<\\/?[^<>\\[\\]\\{\\}\\n]+>|' +     // 
			'\\[\\[[^\\[\\]\\|\\n]+\\]\\]\\||' + // [[wiki link|
			'\\{\\{[^\\{\\}\\|\\n]+\\||' +       // {{template|
			'\\b((https?:|)\\/\\/)[^\\x00-\\x20\\s"\\[\\]\\x7f]+', // link
			'g'
		),

		// RegExp detecting blank-only and single-char blocks
		'blankBlock': /^([^\t\S]+|[^\t])$/,

		// RegExps for clipping
		'clipLine': new RegExp(
			'[' + this.config.regExpNewLinesAll +
			this.config.regExpNewParagraph +
			']+',
			'g'
		),
		'clipHeading': new RegExp(
			'( ^|\\n)(==+.+?==+|\\{\\||\\|\\}).*?(?=\\n|$)', 'g' ),
		'clipParagraph': new RegExp(
			'( (\\r\\n|\\n|\\r){2,}|[' +
			this.config.regExpNewParagraph +
			'])+',
			'g'
		),
		'clipBlank': new RegExp(
			'[' +
			this.config.regExpBlanks + ']+',
			'g'
		),
		'clipTrimNewLinesLeft': new RegExp(
			'[' +
			this.config.regExpNewLinesAll +
			this.config.regExpNewParagraph +
			']+$',
			'g'
		),
		'clipTrimNewLinesRight': new RegExp(
			'^[' +
			this.config.regExpNewLinesAll +
			this.config.regExpNewParagraph +
			']+',
			'g'
		),
		'clipTrimBlanksLeft': new RegExp(
			'[' +
			this.config.regExpBlanks +
			this.config.regExpNewLinesAll +
			this.config.regExpNewParagraph +
			']+$',
			'g'
		),
		'clipTrimBlanksRight': new RegExp(
			'^[' +
			this.config.regExpBlanks +
			this.config.regExpNewLinesAll +
			this.config.regExpNewParagraph +
			']+',
			'g'
		)
	};

	/** Add messages to configuration settings. */

	this.config.msg = {
    'wiked-diff-empty': '(No difference)',
    'wiked-diff-same':  '=',
    'wiked-diff-ins':   '+',
    'wiked-diff-del':   '-',
    'wiked-diff-block-left':  '◀',
    'wiked-diff-block-right': '▶',
    'wiked-diff-block-left-nounicode':  '<',
    'wiked-diff-block-right-nounicode': '>',
		'wiked-diff-error': 'Error: diff not consistent with versions!'
	};

	/**
	 * Add output html fragments to configuration settings.
	 * Dynamic replacements:
	 *   {number}: class/color/block/mark/id number
	 *   {title}: title attribute (popup)
	 *   {nounicode}: noUnicodeSymbols fallback
	 */
	this.config.htmlCode = {
		'noChangeStart':
			'
', 'noChangeEnd': '
', 'containerStart': '
', 'containerEnd': '
', 'fragmentStart': '
',
		'fragmentEnd': '
', 'separator': '
', 'insertStart': '', 'insertStartBlank': '', 'insertEnd': '', 'deleteStart': '', 'deleteStartBlank': '', 'deleteEnd': '', 'blockStart': '', 'blockColoredStart': '', 'blockEnd': '', 'markLeft': '', 'markLeftColored': '', 'markRight': '', 'markRightColored': '', 'newline': '\n', 'tab': '\t', 'space': ' ', 'omittedChars': '', 'errorStart': '
', 'errorEnd': '
' }; /* * Add JavaScript event handler function to configuration settings * Highlights corresponding block and mark elements on hover and jumps between them on click * Code for use in non-jQuery environments and legacy browsers (at least IE 8 compatible) * * @option Event|undefined event Browser event if available * @option element Node DOM node * @option type string Event type */ this.config.blockHandler = function ( event, element, type ) { // IE compatibility if ( event === undefined && window.event !== undefined ) { event = window.event; } // Get mark/block elements var number = element.id.replace( /\D/g, '' ); var block = document.getElementById( 'wikEdDiffBlock' + number ); var mark = document.getElementById( 'wikEdDiffMark' + number ); if ( block === null || mark === null ) { return; } // Highlight corresponding mark/block pairs if ( type === 'mouseover' ) { element.onmouseover = null; element.onmouseout = function ( event ) { window.wikEdDiffBlockHandler( event, element, 'mouseout' ); }; element.onclick = function ( event ) { window.wikEdDiffBlockHandler( event, element, 'click' ); }; block.className += ' wikEdDiffBlockHighlight'; mark.className += ' wikEdDiffMarkHighlight'; } // Remove mark/block highlighting if ( type === 'mouseout' || type === 'click' ) { element.onmouseout = null; element.onmouseover = function ( event ) { window.wikEdDiffBlockHandler( event, element, 'mouseover' ); }; // Reset, allow outside container (e.g. legend) if ( type !== 'click' ) { block.className = block.className.replace( / wikEdDiffBlockHighlight/g, '' ); mark.className = mark.className.replace( / wikEdDiffMarkHighlight/g, '' ); // GetElementsByClassName var container = document.getElementById( 'wikEdDiffContainer' ); if ( container !== null ) { var spans = container.getElementsByTagName( 'span' ); var spansLength = spans.length; for ( var i = 0; i < spansLength; i ++ ) { if ( spans[i] !== block && spans[i] !== mark ) { if ( spans[i].className.indexOf( ' wikEdDiffBlockHighlight' ) !== -1 ) { spans[i].className = spans[i].className.replace( / wikEdDiffBlockHighlight/g, '' ); } else if ( spans[i].className.indexOf( ' wikEdDiffMarkHighlight') !== -1 ) { spans[i].className = spans[i].className.replace( / wikEdDiffMarkHighlight/g, '' ); } } } } } } // Scroll to corresponding mark/block element if ( type === 'click' ) { // Get corresponding element var corrElement; if ( element === block ) { corrElement = mark; } else { corrElement = block; } // Get element height (getOffsetTop) var corrElementPos = 0; var node = corrElement; do { corrElementPos += node.offsetTop; } while ( ( node = node.offsetParent ) !== null ); // Get scroll height var top; if ( window.pageYOffset !== undefined ) { top = window.pageYOffset; } else { top = document.documentElement.scrollTop; } // Get cursor pos var cursor; if ( event.pageY !== undefined ) { cursor = event.pageY; } else if ( event.clientY !== undefined ) { cursor = event.clientY + top; } // Get line height var line = 12; if ( window.getComputedStyle !== undefined ) { line = parseInt( window.getComputedStyle( corrElement ).getPropertyValue( 'line-height' ) ); } // Scroll element under mouse cursor window.scroll( 0, corrElementPos + top - cursor + line / 2 ); } return; }; /** Internal data structures. */ /** @var WikEdDiffText newText New text version object with text and token list */ this.newText = null; /** @var WikEdDiffText oldText Old text version object with text and token list */ this.oldText = null; /** @var object symbols Symbols table for whole text at all refinement levels */ this.symbols = { token: [], hashTable: {}, linked: false }; /** @var array bordersDown Matched region borders downwards */ this.bordersDown = []; /** @var array bordersUp Matched region borders upwards */ this.bordersUp = []; /** @var array blocks Block data (consecutive text tokens) in new text order */ this.blocks = []; /** @var int maxWords Maximal detected word count of all linked blocks */ this.maxWords = 0; /** @var array groups Section blocks that are consecutive in old text order */ this.groups = []; /** @var array sections Block sections with no block move crosses outside a section */ this.sections = []; /** @var object timer Debug timer array: string 'label' => float milliseconds. */ this.timer = {}; /** @var array recursionTimer Count time spent in recursion level in milliseconds. */ this.recursionTimer = []; /** Output data. */ /** @var bool error Unit tests have detected a diff error */ this.error = false; /** @var array fragments Diff fragment list for markup, abstraction layer for customization */ this.fragments = []; /** @var string html Html code of diff */ this.html = ''; /** * Constructor, initialize settings, load js and css. * * @param[in] object wikEdDiffConfig Custom customization settings * @param[out] object config Settings */ this.init = function () { // Import customizations from wikEdDiffConfig{} if ( typeof wikEdDiffConfig === 'object' ) { this.deepCopy( wikEdDiffConfig, this.config ); } // Add CSS stylescheet this.addStyleSheet( this.config.stylesheet ); // Load block handler script if ( this.config.showBlockMoves === true ) { // Add block handler to head if running under Greasemonkey if ( typeof GM_info === 'object' ) { var script = 'var wikEdDiffBlockHandler = ' + this.config.blockHandler.toString() + ';'; this.addScript( script ); } else { window.wikEdDiffBlockHandler = this.config.blockHandler; } } return; }; /** * Main diff method. * * @param string oldString Old text version * @param string newString New text version * @param[out] array fragment * Diff fragment list ready for markup, abstraction layer for customized diffs * @param[out] string html Html code of diff * @return string Html code of diff */ this.diff = function ( oldString, newString ) { // Start total timer if ( this.config.timer === true ) { this.time( 'total' ); } // Start diff timer if ( this.config.timer === true ) { this.time( 'diff' ); } // Reset error flag this.error = false; // Strip trailing newline (.js only) if ( this.config.stripTrailingNewline === true ) { if ( newString.substr( -1 ) === '\n' && oldString.substr( -1 === '\n' ) ) { newString = newString.substr( 0, newString.length - 1 ); oldString = oldString.substr( 0, oldString.length - 1 ); } } // Load version strings into WikEdDiffText objects this.newText = new WikEdDiff.WikEdDiffText( newString, this ); this.oldText = new WikEdDiff.WikEdDiffText( oldString, this ); // Trap trivial changes: no change if ( this.newText.text === this.oldText.text ) { this.html = this.config.htmlCode.containerStart + this.config.htmlCode.noChangeStart + this.htmlEscape( this.config.msg['wiked-diff-empty'] ) + this.config.htmlCode.noChangeEnd + this.config.htmlCode.containerEnd; return this.html; } // Trap trivial changes: old text deleted if ( this.oldText.text === '' || ( this.oldText.text === '\n' && ( this.newText.text.charAt( this.newText.text.length - 1 ) === '\n' ) ) ) { this.html = this.config.htmlCode.containerStart + this.config.htmlCode.fragmentStart + this.config.htmlCode.insertStart + this.htmlEscape( this.newText.text ) + this.config.htmlCode.insertEnd + this.config.htmlCode.fragmentEnd + this.config.htmlCode.containerEnd; return this.html; } // Trap trivial changes: new text deleted if ( this.newText.text === '' || ( this.newText.text === '\n' && ( this.oldText.text.charAt( this.oldText.text.length - 1 ) === '\n' ) ) ) { this.html = this.config.htmlCode.containerStart + this.config.htmlCode.fragmentStart + this.config.htmlCode.deleteStart + this.htmlEscape( this.oldText.text ) + this.config.htmlCode.deleteEnd + this.config.htmlCode.fragmentEnd + this.config.htmlCode.containerEnd; return this.html; } // Split new and old text into paragraps if ( this.config.timer === true ) { this.time( 'paragraph split' ); } this.newText.splitText( 'paragraph' ); this.oldText.splitText( 'paragraph' ); if ( this.config.timer === true ) { this.timeEnd( 'paragraph split' ); } // Calculate diff this.calculateDiff( 'line' ); // Refine different paragraphs into lines if ( this.config.timer === true ) { this.time( 'line split' ); } this.newText.splitRefine( 'line' ); this.oldText.splitRefine( 'line' ); if ( this.config.timer === true ) { this.timeEnd( 'line split' ); } // Calculate refined diff this.calculateDiff( 'line' ); // Refine different lines into sentences if ( this.config.timer === true ) { this.time( 'sentence split' ); } this.newText.splitRefine( 'sentence' ); this.oldText.splitRefine( 'sentence' ); if ( this.config.timer === true ) { this.timeEnd( 'sentence split' ); } // Calculate refined diff this.calculateDiff( 'sentence' ); // Refine different sentences into chunks if ( this.config.timer === true ) { this.time( 'chunk split' ); } this.newText.splitRefine( 'chunk' ); this.oldText.splitRefine( 'chunk' ); if ( this.config.timer === true ) { this.timeEnd( 'chunk split' ); } // Calculate refined diff this.calculateDiff( 'chunk' ); // Refine different chunks into words if ( this.config.timer === true ) { this.time( 'word split' ); } this.newText.splitRefine( 'word' ); this.oldText.splitRefine( 'word' ); if ( this.config.timer === true ) { this.timeEnd( 'word split' ); } // Calculate refined diff information with recursion for unresolved gaps this.calculateDiff( 'word', true ); // Slide gaps if ( this.config.timer === true ) { this.time( 'word slide' ); } this.slideGaps( this.newText, this.oldText ); this.slideGaps( this.oldText, this.newText ); if ( this.config.timer === true ) { this.timeEnd( 'word slide' ); } // Split tokens into chars if ( this.config.charDiff === true ) { // Split tokens into chars in selected unresolved gaps if ( this.config.timer === true ) { this.time( 'character split' ); } this.splitRefineChars(); if ( this.config.timer === true ) { this.timeEnd( 'character split' ); } // Calculate refined diff information with recursion for unresolved gaps this.calculateDiff( 'character', true ); // Slide gaps if ( this.config.timer === true ) { this.time( 'character slide' ); } this.slideGaps( this.newText, this.oldText ); this.slideGaps( this.oldText, this.newText ); if ( this.config.timer === true ) { this.timeEnd( 'character slide' ); } } // Free memory this.symbols = undefined; this.bordersDown = undefined; this.bordersUp = undefined; this.newText.words = undefined; this.oldText.words = undefined; // Enumerate token lists this.newText.enumerateTokens(); this.oldText.enumerateTokens(); // Detect moved blocks if ( this.config.timer === true ) { this.time( 'blocks' ); } this.detectBlocks(); if ( this.config.timer === true ) { this.timeEnd( 'blocks' ); } // Free memory this.newText.tokens = undefined; this.oldText.tokens = undefined; // Assemble blocks into fragment table this.getDiffFragments(); // Free memory this.blocks = undefined; this.groups = undefined; this.sections = undefined; // Stop diff timer if ( this.config.timer === true ) { this.timeEnd( 'diff' ); } // Unit tests if ( this.config.unitTesting === true ) { // Test diff to test consistency between input and output if ( this.config.timer === true ) { this.time( 'unit tests' ); } this.unitTests(); if ( this.config.timer === true ) { this.timeEnd( 'unit tests' ); } } // Clipping if ( this.config.fullDiff === false ) { // Clipping unchanged sections from unmoved block text if ( this.config.timer === true ) { this.time( 'clip' ); } this.clipDiffFragments(); if ( this.config.timer === true ) { this.timeEnd( 'clip' ); } } // Create html formatted diff code from diff fragments if ( this.config.timer === true ) { this.time( 'html' ); } this.getDiffHtml(); if ( this.config.timer === true ) { this.timeEnd( 'html' ); } // No change if ( this.html === '' ) { this.html = this.config.htmlCode.containerStart + this.config.htmlCode.noChangeStart + this.htmlEscape( this.config.msg['wiked-diff-empty'] ) + this.config.htmlCode.noChangeEnd + this.config.htmlCode.containerEnd; } // Add error indicator if ( this.error === true ) { this.html = this.config.htmlCode.errorStart + this.html + this.config.htmlCode.errorEnd; } // Stop total timer if ( this.config.timer === true ) { this.timeEnd( 'total' ); } return this.html; }; /** * Split tokens into chars in the following unresolved regions (gaps): * - One token became connected or separated by space or dash (or any token) * - Same number of tokens in gap and strong similarity of all tokens: * - Addition or deletion of flanking strings in tokens * - Addition or deletion of internal string in tokens * - Same length and at least 50 % identity * - Same start or end, same text longer than different text * Identical tokens including space separators will be linked, * resulting in word-wise char-level diffs * * @param[in/out] WikEdDiffText newText, oldText Text object tokens list */ this.splitRefineChars = function () { /** Find corresponding gaps. */ // Cycle through new text tokens list var gaps = []; var gap = null; var i = this.newText.first; var j = this.oldText.first; while ( i !== null ) { // Get token links var newLink = this.newText.tokens[i].link; var oldLink = null; if ( j !== null ) { oldLink = this.oldText.tokens[j].link; } // Start of gap in new and old if ( gap === null && newLink === null && oldLink === null ) { gap = gaps.length; gaps.push( { newFirst: i, newLast: i, newTokens: 1, oldFirst: j, oldLast: j, oldTokens: null, charSplit: null } ); } // Count chars and tokens in gap else if ( gap !== null && newLink === null ) { gaps[gap].newLast = i; gaps[gap].newTokens ++; } // Gap ended else if ( gap !== null && newLink !== null ) { gap = null; } // Next list elements if ( newLink !== null ) { j = this.oldText.tokens[newLink].next; } i = this.newText.tokens[i].next; } // Cycle through gaps and add old text gap data var gapsLength = gaps.length; for ( var gap = 0; gap < gapsLength; gap ++ ) { // Cycle through old text tokens list var j = gaps[gap].oldFirst; while ( j !== null && this.oldText.tokens[j] !== null && this.oldText.tokens[j].link === null ) { // Count old chars and tokens in gap gaps[gap].oldLast = j; gaps[gap].oldTokens ++; j = this.oldText.tokens[j].next; } } /** Select gaps of identical token number and strong similarity of all tokens. */ var gapsLength = gaps.length; for ( var gap = 0; gap < gapsLength; gap ++ ) { var charSplit = true; // Not same gap length if ( gaps[gap].newTokens !== gaps[gap].oldTokens ) { // One word became separated by space, dash, or any string if ( gaps[gap].newTokens === 1 && gaps[gap].oldTokens === 3 ) { var token = this.newText.tokens[ gaps[gap].newFirst ].token; var tokenFirst = this.oldText.tokens[ gaps[gap].oldFirst ].token; var tokenLast = this.oldText.tokens[ gaps[gap].oldLast ].token; if ( token.indexOf( tokenFirst ) !== 0 || token.indexOf( tokenLast ) !== token.length - tokenLast.length ) { continue; } } else if ( gaps[gap].oldTokens === 1 && gaps[gap].newTokens === 3 ) { var token = this.oldText.tokens[ gaps[gap].oldFirst ].token; var tokenFirst = this.newText.tokens[ gaps[gap].newFirst ].token; var tokenLast = this.newText.tokens[ gaps[gap].newLast ].token; if ( token.indexOf( tokenFirst ) !== 0 || token.indexOf( tokenLast ) !== token.length - tokenLast.length ) { continue; } } else { continue; } gaps[gap].charSplit = true; } // Cycle through new text tokens list and set charSplit else { var i = gaps[gap].newFirst; var j = gaps[gap].oldFirst; while ( i !== null ) { var newToken = this.newText.tokens[i].token; var oldToken = this.oldText.tokens[j].token; // Get shorter and longer token var shorterToken; var longerToken; if ( newToken.length < oldToken.length ) { shorterToken = newToken; longerToken = oldToken; } else { shorterToken = oldToken; longerToken = newToken; } // Not same token length if ( newToken.length !== oldToken.length ) { // Test for addition or deletion of internal string in tokens // Find number of identical chars from left var left = 0; while ( left < shorterToken.length ) { if ( newToken.charAt( left ) !== oldToken.charAt( left ) ) { break; } left ++; } // Find number of identical chars from right var right = 0; while ( right < shorterToken.length ) { if ( newToken.charAt( newToken.length - 1 - right ) !== oldToken.charAt( oldToken.length - 1 - right ) ) { break; } right ++; } // No simple insertion or deletion of internal string if ( left + right !== shorterToken.length ) { // Not addition or deletion of flanking strings in tokens // Smaller token not part of larger token if ( longerToken.indexOf( shorterToken ) === -1 ) { // Same text at start or end shorter than different text if ( left < shorterToken.length / 2 && (right < shorterToken.length / 2) ) { // Do not split into chars in this gap charSplit = false; break; } } } } // Same token length else if ( newToken !== oldToken ) { // Tokens less than 50 % identical var ident = 0; var tokenLength = shorterToken.length; for ( var pos = 0; pos < tokenLength; pos ++ ) { if ( shorterToken.charAt( pos ) === longerToken.charAt( pos ) ) { ident ++; } } if ( ident / shorterToken.length < 0.49 ) { // Do not split into chars this gap charSplit = false; break; } } // Next list elements if ( i === gaps[gap].newLast ) { break; } i = this.newText.tokens[i].next; j = this.oldText.tokens[j].next; } gaps[gap].charSplit = charSplit; } } /** Refine words into chars in selected gaps. */ var gapsLength = gaps.length; for ( var gap = 0; gap < gapsLength; gap ++ ) { if ( gaps[gap].charSplit === true ) { // Cycle through new text tokens list, link spaces, and split into chars var i = gaps[gap].newFirst; var j = gaps[gap].oldFirst; var newGapLength = i - gaps[gap].newLast; var oldGapLength = j - gaps[gap].oldLast; while ( i !== null || j !== null ) { // Link identical tokens (spaces) to keep char refinement to words if ( newGapLength === oldGapLength && this.newText.tokens[i].token === this.oldText.tokens[j].token ) { this.newText.tokens[i].link = j; this.oldText.tokens[j].link = i; } // Refine words into chars else { if ( i !== null ) { this.newText.splitText( 'character', i ); } if ( j !== null ) { this.oldText.splitText( 'character', j ); } } // Next list elements if ( i === gaps[gap].newLast ) { i = null; } if ( j === gaps[gap].oldLast ) { j = null; } if ( i !== null ) { i = this.newText.tokens[i].next; } if ( j !== null ) { j = this.oldText.tokens[j].next; } } } } return; }; /** * Move gaps with ambiguous identical fronts to last newline border or otherwise last word border. * * @param[in/out] wikEdDiffText text, textLinked These two are newText and oldText */ this.slideGaps = function ( text, textLinked ) { var regExpSlideBorder = this.config.regExp.slideBorder; var regExpSlideStop = this.config.regExp.slideStop; // Cycle through tokens list var i = text.first; var gapStart = null; while ( i !== null ) { // Remember gap start if ( gapStart === null && text.tokens[i].link === null ) { gapStart = i; } // Find gap end else if ( gapStart !== null && text.tokens[i].link !== null ) { var gapFront = gapStart; var gapBack = text.tokens[i].prev; // Slide down as deep as possible var front = gapFront; var back = text.tokens[gapBack].next; if ( front !== null && back !== null && text.tokens[front].link === null && text.tokens[back].link !== null && text.tokens[front].token === text.tokens[back].token ) { text.tokens[front].link = text.tokens[back].link; textLinked.tokens[ text.tokens[front].link ].link = front; text.tokens[back].link = null; gapFront = text.tokens[gapFront].next; gapBack = text.tokens[gapBack].next; front = text.tokens[front].next; back = text.tokens[back].next; } // Test slide up, remember last line break or word border var front = text.tokens[gapFront].prev; var back = gapBack; var gapFrontBlankTest = regExpSlideBorder.test( text.tokens[gapFront].token ); var frontStop = front; if ( text.tokens[back].link === null ) { while ( front !== null && back !== null && text.tokens[front].link !== null && text.tokens[front].token === text.tokens[back].token ) { if ( front !== null ) { // Stop at line break if ( regExpSlideStop.test( text.tokens[front].token ) === true ) { frontStop = front; break; } // Stop at first word border (blank/word or word/blank) if ( regExpSlideBorder.test( text.tokens[front].token ) !== gapFrontBlankTest ) { frontStop = front; } } front = text.tokens[front].prev; back = text.tokens[back].prev; } } // Actually slide up to stop var front = text.tokens[gapFront].prev; var back = gapBack; while ( front !== null && back !== null && front !== frontStop && text.tokens[front].link !== null && text.tokens[back].link === null && text.tokens[front].token === text.tokens[back].token ) { text.tokens[back].link = text.tokens[front].link; textLinked.tokens[ text.tokens[back].link ].link = back; text.tokens[front].link = null; front = text.tokens[front].prev; back = text.tokens[back].prev; } gapStart = null; } i = text.tokens[i].next; } return; }; /** * Calculate diff information, can be called repeatedly during refining. * Links corresponding tokens from old and new text. * Steps: * Pass 1: parse new text into symbol table * Pass 2: parse old text into symbol table * Pass 3: connect unique matching tokens * Pass 4: connect adjacent identical tokens downwards * Pass 5: connect adjacent identical tokens upwards * Repeat with empty symbol table (against crossed-over gaps) * Recursively diff still unresolved regions downwards with empty symbol table * Recursively diff still unresolved regions upwards with empty symbol table * * @param array symbols Symbol table object * @param string level Split level: 'paragraph', 'line', 'sentence', 'chunk', 'word', 'character' * * Optionally for recursive or repeated calls: * @param bool repeating Currently repeating with empty symbol table * @param bool recurse Enable recursion * @param int newStart, newEnd, oldStart, oldEnd Text object tokens indices * @param int recursionLevel Recursion level * @param[in/out] WikEdDiffText newText, oldText Text object, tokens list link property */ this.calculateDiff = function ( level, recurse, repeating, newStart, oldStart, up, recursionLevel ) { // Set defaults if ( repeating === undefined ) { repeating = false; } if ( recurse === undefined ) { recurse = false; } if ( newStart === undefined ) { newStart = this.newText.first; } if ( oldStart === undefined ) { oldStart = this.oldText.first; } if ( up === undefined ) { up = false; } if ( recursionLevel === undefined ) { recursionLevel = 0; } // Start timers if ( this.config.timer === true && repeating === false && recursionLevel === 0 ) { this.time( level ); } if ( this.config.timer === true && repeating === false ) { this.time( level + recursionLevel ); } // Get object symbols table and linked region borders var symbols; var bordersDown; var bordersUp; if ( recursionLevel === 0 && repeating === false ) { symbols = this.symbols; bordersDown = this.bordersDown; bordersUp = this.bordersUp; } // Create empty local symbols table and linked region borders arrays else { symbols = { token: [], hashTable: {}, linked: false }; bordersDown = []; bordersUp = []; } // Updated versions of linked region borders var bordersUpNext = []; var bordersDownNext = []; /** * Pass 1: parse new text into symbol table. */ // Cycle through new text tokens list var i = newStart; while ( i !== null ) { if ( this.newText.tokens[i].link === null ) { // Add new entry to symbol table var token = this.newText.tokens[i].token; if ( Object.prototype.hasOwnProperty.call( symbols.hashTable, token ) === false ) { symbols.hashTable[token] = symbols.token.length; symbols.token.push( { newCount: 1, oldCount: 0, newToken: i, oldToken: null } ); } // Or update existing entry else { // Increment token counter for new text var hashToArray = symbols.hashTable[token]; symbols.token[hashToArray].newCount ++; } } // Stop after gap if recursing else if ( recursionLevel > 0 ) { break; } // Get next token if ( up === false ) { i = this.newText.tokens[i].next; } else { i = this.newText.tokens[i].prev; } } /** * Pass 2: parse old text into symbol table. */ // Cycle through old text tokens list var j = oldStart; while ( j !== null ) { if ( this.oldText.tokens[j].link === null ) { // Add new entry to symbol table var token = this.oldText.tokens[j].token; if ( Object.prototype.hasOwnProperty.call( symbols.hashTable, token ) === false ) { symbols.hashTable[token] = symbols.token.length; symbols.token.push( { newCount: 0, oldCount: 1, newToken: null, oldToken: j } ); } // Or update existing entry else { // Increment token counter for old text var hashToArray = symbols.hashTable[token]; symbols.token[hashToArray].oldCount ++; // Add token number for old text symbols.token[hashToArray].oldToken = j; } } // Stop after gap if recursing else if ( recursionLevel > 0 ) { break; } // Get next token if ( up === false ) { j = this.oldText.tokens[j].next; } else { j = this.oldText.tokens[j].prev; } } /** * Pass 3: connect unique tokens. */ // Cycle through symbol array var symbolsLength = symbols.token.length; for ( var i = 0; i < symbolsLength; i ++ ) { // Find tokens in the symbol table that occur only once in both versions if ( symbols.token[i].newCount === 1 && symbols.token[i].oldCount === 1 ) { var newToken = symbols.token[i].newToken; var oldToken = symbols.token[i].oldToken; var newTokenObj = this.newText.tokens[newToken]; var oldTokenObj = this.oldText.tokens[oldToken]; // Connect from new to old and from old to new if ( newTokenObj.link === null ) { // Do not use spaces as unique markers if ( this.config.regExp.blankOnlyToken.test( newTokenObj.token ) === true ) { // Link new and old tokens newTokenObj.link = oldToken; oldTokenObj.link = newToken; symbols.linked = true; // Save linked region borders bordersDown.push( [newToken, oldToken] ); bordersUp.push( [newToken, oldToken] ); // Check if token contains unique word if ( recursionLevel === 0 ) { var unique = false; if ( level === 'character' ) { unique = true; } else { var token = newTokenObj.token; var words = ( token.match( this.config.regExp.countWords ) || [] ).concat( ( token.match( this.config.regExp.countChunks ) || [] ) ); // Unique if longer than min block length var wordsLength = words.length; if ( wordsLength >= this.config.blockMinLength ) { unique = true; } // Unique if it contains at least one unique word else { for ( var i = 0;i < wordsLength; i ++ ) { var word = words[i]; if ( this.oldText.words[word] === 1 && this.newText.words[word] === 1 && Object.prototype.hasOwnProperty.call( this.oldText.words, word ) === true && Object.prototype.hasOwnProperty.call( this.newText.words, word ) === true ) { unique = true; break; } } } } // Set unique if ( unique === true ) { newTokenObj.unique = true; oldTokenObj.unique = true; } } } } } } // Continue passes only if unique tokens have been linked previously if ( symbols.linked === true ) { /** * Pass 4: connect adjacent identical tokens downwards. */ // Cycle through list of linked new text tokens var bordersLength = bordersDown.length; for ( var match = 0; match < bordersLength; match ++ ) { var i = bordersDown[match][0]; var j = bordersDown[match][1]; // Next down var iMatch = i; var jMatch = j; i = this.newText.tokens[i].next; j = this.oldText.tokens[j].next; // Cycle through new text list gap region downwards while ( i !== null && j !== null && this.newText.tokens[i].link === null && this.oldText.tokens[j].link === null ) { // Connect if same token if ( this.newText.tokens[i].token === this.oldText.tokens[j].token ) { this.newText.tokens[i].link = j; this.oldText.tokens[j].link = i; } // Not a match yet, maybe in next refinement level else { bordersDownNext.push( [iMatch, jMatch] ); break; } // Next token down iMatch = i; jMatch = j; i = this.newText.tokens[i].next; j = this.oldText.tokens[j].next; } } /** * Pass 5: connect adjacent identical tokens upwards. */ // Cycle through list of connected new text tokens var bordersLength = bordersUp.length; for ( var match = 0; match < bordersLength; match ++ ) { var i = bordersUp[match][0]; var j = bordersUp[match][1]; // Next up var iMatch = i; var jMatch = j; i = this.newText.tokens[i].prev; j = this.oldText.tokens[j].prev; // Cycle through new text gap region upwards while ( i !== null && j !== null && this.newText.tokens[i].link === null && this.oldText.tokens[j].link === null ) { // Connect if same token if ( this.newText.tokens[i].token === this.oldText.tokens[j].token ) { this.newText.tokens[i].link = j; this.oldText.tokens[j].link = i; } // Not a match yet, maybe in next refinement level else { bordersUpNext.push( [iMatch, jMatch] ); break; } // Next token up iMatch = i; jMatch = j; i = this.newText.tokens[i].prev; j = this.oldText.tokens[j].prev; } } /** * Connect adjacent identical tokens downwards from text start. * Treat boundary as connected, stop after first connected token. */ // Only for full text diff if ( recursionLevel === 0 && repeating === false ) { // From start var i = this.newText.first; var j = this.oldText.first; var iMatch = null; var jMatch = null; // Cycle through old text tokens down // Connect identical tokens, stop after first connected token while ( i !== null && j !== null && this.newText.tokens[i].link === null && this.oldText.tokens[j].link === null && this.newText.tokens[i].token === this.oldText.tokens[j].token ) { this.newText.tokens[i].link = j; this.oldText.tokens[j].link = i; iMatch = i; jMatch = j; i = this.newText.tokens[i].next; j = this.oldText.tokens[j].next; } if ( iMatch !== null ) { bordersDownNext.push( [iMatch, jMatch] ); } // From end i = this.newText.last; j = this.oldText.last; iMatch = null; jMatch = null; // Cycle through old text tokens up // Connect identical tokens, stop after first connected token while ( i !== null && j !== null && this.newText.tokens[i].link === null && this.oldText.tokens[j].link === null && this.newText.tokens[i].token === this.oldText.tokens[j].token ) { this.newText.tokens[i].link = j; this.oldText.tokens[j].link = i; iMatch = i; jMatch = j; i = this.newText.tokens[i].prev; j = this.oldText.tokens[j].prev; } if ( iMatch !== null ) { bordersUpNext.push( [iMatch, jMatch] ); } } // Save updated linked region borders to object if ( recursionLevel === 0 && repeating === false ) { this.bordersDown = bordersDownNext; this.bordersUp = bordersUpNext; } // Merge local updated linked region borders into object else { this.bordersDown = this.bordersDown.concat( bordersDownNext ); this.bordersUp = this.bordersUp.concat( bordersUpNext ); } /** * Repeat once with empty symbol table to link hidden unresolved common tokens in cross-overs. * ("and" in "and this a and b that" -> "and this a and b that") */ if ( repeating === false && this.config.repeatedDiff === true ) { var repeat = true; this.calculateDiff( level, recurse, repeat, newStart, oldStart, up, recursionLevel ); } /** * Refine by recursively diffing not linked regions with new symbol table. * At word and character level only. * Helps against gaps caused by addition of common tokens around sequences of common tokens. */ if ( recurse === true && this.config['recursiveDiff'] === true && recursionLevel < this.config.recursionMax ) { /** * Recursively diff gap downwards. */ // Cycle through list of linked region borders var bordersLength = bordersDownNext.length; for ( match = 0; match < bordersLength; match ++ ) { var i = bordersDownNext[match][0]; var j = bordersDownNext[match][1]; // Next token down i = this.newText.tokens[i].next; j = this.oldText.tokens[j].next; // Start recursion at first gap token pair if ( i !== null && j !== null && this.newText.tokens[i].link === null && this.oldText.tokens[j].link === null ) { var repeat = false; var dirUp = false; this.calculateDiff( level, recurse, repeat, i, j, dirUp, recursionLevel + 1 ); } } /** * Recursively diff gap upwards. */ // Cycle through list of linked region borders var bordersLength = bordersUpNext.length; for ( match = 0; match < bordersLength; match ++ ) { var i = bordersUpNext[match][0]; var j = bordersUpNext[match][1]; // Next token up i = this.newText.tokens[i].prev; j = this.oldText.tokens[j].prev; // Start recursion at first gap token pair if ( i !== null && j !== null && this.newText.tokens[i].link === null && this.oldText.tokens[j].link === null ) { var repeat = false; var dirUp = true; this.calculateDiff( level, recurse, repeat, i, j, dirUp, recursionLevel + 1 ); } } } } // Stop timers if ( this.config.timer === true && repeating === false ) { if ( this.recursionTimer[recursionLevel] === undefined ) { this.recursionTimer[recursionLevel] = 0; } this.recursionTimer[recursionLevel] += this.timeEnd( level + recursionLevel, true ); } if ( this.config.timer === true && repeating === false && recursionLevel === 0 ) { this.timeRecursionEnd( level ); this.timeEnd( level ); } return; }; /** * Main method for processing raw diff data, extracting deleted, inserted, and moved blocks. * * Scheme of blocks, sections, and groups (old block numbers): * Old: 1 2 3D4 5E6 7 8 9 10 11 * | ‾/-/_ X | >|< | * New: 1 I 3D4 2 E6 5 N 7 10 9 8 11 * Section: 0 0 0 1 1 2 2 2 * Group: 0 10 111 2 33 4 11 5 6 7 8 9 * Fixed: . +++ - ++ - . . - - + * Type: = . =-= = -= = . = = = = = * * @param[out] array groups Groups table object * @param[out] array blocks Blocks table object * @param[in/out] WikEdDiffText newText, oldText Text object tokens list */ this.detectBlocks = function () { // Debug log if ( this.config.debug === true ) { this.oldText.debugText( 'Old text' ); this.newText.debugText( 'New text' ); } // Collect identical corresponding ('=') blocks from old text and sort by new text this.getSameBlocks(); // Collect independent block sections with no block move crosses outside a section this.getSections(); // Find groups of continuous old text blocks this.getGroups(); // Set longest sequence of increasing groups in sections as fixed (not moved) this.setFixed(); // Convert groups to insertions/deletions if maximum block length is too short // Only for more complex texts that actually have blocks of minimum block length var unlinkCount = 0; if ( this.config.unlinkBlocks === true && this.config.blockMinLength > 0 && this.maxWords >= this.config.blockMinLength ) { if ( this.config.timer === true ) { this.time( 'total unlinking' ); } // Repeat as long as unlinking is possible var unlinked = true; while ( unlinked === true && unlinkCount < this.config.unlinkMax ) { // Convert '=' to '+'/'-' pairs unlinked = this.unlinkBlocks(); // Start over after conversion if ( unlinked === true ) { unlinkCount ++; this.slideGaps( this.newText, this.oldText ); this.slideGaps( this.oldText, this.newText ); // Repeat block detection from start this.maxWords = 0; this.getSameBlocks(); this.getSections(); this.getGroups(); this.setFixed(); } } if ( this.config.timer === true ) { this.timeEnd( 'total unlinking' ); } } // Collect deletion ('-') blocks from old text this.getDelBlocks(); // Position '-' blocks into new text order this.positionDelBlocks(); // Collect insertion ('+') blocks from new text this.getInsBlocks(); // Set group numbers of '+' blocks this.setInsGroups(); // Mark original positions of moved groups this.insertMarks(); // Debug log if ( this.config.timer === true || this.config.debug === true ) { console.log( 'Unlink count: ', unlinkCount ); } if ( this.config.debug === true ) { this.debugGroups( 'Groups' ); this.debugBlocks( 'Blocks' ); } return; }; /** * Collect identical corresponding matching ('=') blocks from old text and sort by new text. * * @param[in] WikEdDiffText newText, oldText Text objects * @param[in/out] array blocks Blocks table object */ this.getSameBlocks = function () { if ( this.config.timer === true ) { this.time( 'getSameBlocks' ); } var blocks = this.blocks; // Clear blocks array blocks.splice( 0 ); // Cycle through old text to find connected (linked, matched) blocks var j = this.oldText.first; var i = null; while ( j !== null ) { // Skip '-' blocks while ( j !== null && this.oldText.tokens[j].link === null ) { j = this.oldText.tokens[j].next; } // Get '=' block if ( j !== null ) { i = this.oldText.tokens[j].link; var iStart = i; var jStart = j; // Detect matching blocks ('=') var count = 0; var unique = false; var text = ''; while ( i !== null && j !== null && this.oldText.tokens[j].link === i ) { text += this.oldText.tokens[j].token; count ++; if ( this.newText.tokens[i].unique === true ) { unique = true; } i = this.newText.tokens[i].next; j = this.oldText.tokens[j].next; } // Save old text '=' block blocks.push( { oldBlock: blocks.length, newBlock: null, oldNumber: this.oldText.tokens[jStart].number, newNumber: this.newText.tokens[iStart].number, oldStart: jStart, count: count, unique: unique, words: this.wordCount( text ), chars: text.length, type: '=', section: null, group: null, fixed: null, moved: null, text: text } ); } } // Sort blocks by new text token number blocks.sort( function( a, b ) { return a.newNumber - b.newNumber; } ); // Number blocks in new text order var blocksLength = blocks.length; for ( var block = 0; block < blocksLength; block ++ ) { blocks[block].newBlock = block; } if ( this.config.timer === true ) { this.timeEnd( 'getSameBlocks' ); } return; }; /** * Collect independent block sections with no block move crosses * outside a section for per-section determination of non-moving fixed groups. * * @param[out] array sections Sections table object * @param[in/out] array blocks Blocks table object, section property */ this.getSections = function () { if ( this.config.timer === true ) { this.time( 'getSections' ); } var blocks = this.blocks; var sections = this.sections; // Clear sections array sections.splice( 0 ); // Cycle through blocks var blocksLength = blocks.length; for ( var block = 0; block < blocksLength; block ++ ) { var sectionStart = block; var sectionEnd = block; var oldMax = blocks[sectionStart].oldNumber; var sectionOldMax = oldMax; // Check right for ( var j = sectionStart + 1; j < blocksLength; j ++ ) { // Check for crossing over to the left if ( blocks[j].oldNumber > oldMax ) { oldMax = blocks[j].oldNumber; } else if ( blocks[j].oldNumber < sectionOldMax ) { sectionEnd = j; sectionOldMax = oldMax; } } // Save crossing sections if ( sectionEnd > sectionStart ) { // Save section to block for ( var i = sectionStart; i <= sectionEnd; i ++ ) { blocks[i].section = sections.length; } // Save section sections.push( { blockStart: sectionStart, blockEnd: sectionEnd } ); block = sectionEnd; } } if ( this.config.timer === true ) { this.timeEnd( 'getSections' ); } return; }; /** * Find groups of continuous old text blocks. * * @param[out] array groups Groups table object * @param[in/out] array blocks Blocks table object, group property */ this.getGroups = function () { if ( this.config.timer === true ) { this.time( 'getGroups' ); } var blocks = this.blocks; var groups = this.groups; // Clear groups array groups.splice( 0 ); // Cycle through blocks var blocksLength = blocks.length; for ( var block = 0; block < blocksLength; block ++ ) { var groupStart = block; var groupEnd = block; var oldBlock = blocks[groupStart].oldBlock; // Get word and char count of block var words = this.wordCount( blocks[block].text ); var maxWords = words; var unique = blocks[block].unique; var chars = blocks[block].chars; // Check right for ( var i = groupEnd + 1; i < blocksLength; i ++ ) { // Check for crossing over to the left if ( blocks[i].oldBlock !== oldBlock + 1 ) { break; } oldBlock = blocks[i].oldBlock; // Get word and char count of block if ( blocks[i].words > maxWords ) { maxWords = blocks[i].words; } if ( blocks[i].unique === true ) { unique = true; } words += blocks[i].words; chars += blocks[i].chars; groupEnd = i; } // Save crossing group if ( groupEnd >= groupStart ) { // Set groups outside sections as fixed var fixed = false; if ( blocks[groupStart].section === null ) { fixed = true; } // Save group to block for ( var i = groupStart; i <= groupEnd; i ++ ) { blocks[i].group = groups.length; blocks[i].fixed = fixed; } // Save group groups.push( { oldNumber: blocks[groupStart].oldNumber, blockStart: groupStart, blockEnd: groupEnd, unique: unique, maxWords: maxWords, words: words, chars: chars, fixed: fixed, movedFrom: null, color: null } ); block = groupEnd; // Set global word count of longest linked block if ( maxWords > this.maxWords ) { this.maxWords = maxWords; } } } if ( this.config.timer === true ) { this.timeEnd( 'getGroups' ); } return; }; /** * Set longest sequence of increasing groups in sections as fixed (not moved). * * @param[in] array sections Sections table object * @param[in/out] array groups Groups table object, fixed property * @param[in/out] array blocks Blocks table object, fixed property */ this.setFixed = function () { if ( this.config.timer === true ) { this.time( 'setFixed' ); } var blocks = this.blocks; var groups = this.groups; var sections = this.sections; // Cycle through sections var sectionsLength = sections.length; for ( var section = 0; section < sectionsLength; section ++ ) { var blockStart = sections[section].blockStart; var blockEnd = sections[section].blockEnd; var groupStart = blocks[blockStart].group; var groupEnd = blocks[blockEnd].group; // Recusively find path of groups in increasing old group order with longest char length var cache = []; var maxChars = 0; var maxPath = null; // Start at each group of section for ( var i = groupStart; i <= groupEnd; i ++ ) { var pathObj = this.findMaxPath( i, groupEnd, cache ); if ( pathObj.chars > maxChars ) { maxPath = pathObj.path; maxChars = pathObj.chars; } } // Mark fixed groups var maxPathLength = maxPath.length; for ( var i = 0; i < maxPathLength; i ++ ) { var group = maxPath[i]; groups[group].fixed = true; // Mark fixed blocks for ( var block = groups[group].blockStart; block <= groups[group].blockEnd; block ++ ) { blocks[block].fixed = true; } } } if ( this.config.timer === true ) { this.timeEnd( 'setFixed' ); } return; }; /** * Recusively find path of groups in increasing old group order with longest char length. * * @param int start Path start group * @param int groupEnd Path last group * @param array cache Cache object, contains returnObj for start * @return array returnObj Contains path and char length */ this.findMaxPath = function ( start, groupEnd, cache ) { var groups = this.groups; // Find longest sub-path var maxChars = 0; var oldNumber = groups[start].oldNumber; var returnObj = { path: [], chars: 0}; for ( var i = start + 1; i <= groupEnd; i ++ ) { // Only in increasing old group order if ( groups[i].oldNumber < oldNumber ) { continue; } // Get longest sub-path from cache (deep copy) var pathObj; if ( cache[i] !== undefined ) { pathObj = { path: cache[i].path.slice(), chars: cache[i].chars }; } // Get longest sub-path by recursion else { pathObj = this.findMaxPath( i, groupEnd, cache ); } // Select longest sub-path if ( pathObj.chars > maxChars ) { maxChars = pathObj.chars; returnObj = pathObj; } } // Add current start to path returnObj.path.unshift( start ); returnObj.chars += groups[start].chars; // Save path to cache (deep copy) if ( cache[start] === undefined ) { cache[start] = { path: returnObj.path.slice(), chars: returnObj.chars }; } return returnObj; }; /** * Convert matching '=' blocks in groups into insertion/deletion ('+'/'-') pairs * if too short and too common. * Prevents fragmentated diffs for very different versions. * * @param[in] array blocks Blocks table object * @param[in/out] WikEdDiffText newText, oldText Text object, linked property * @param[in/out] array groups Groups table object * @return bool True if text tokens were unlinked */ this.unlinkBlocks = function () { var blocks = this.blocks; var groups = this.groups; // Cycle through groups var unlinked = false; var groupsLength = groups.length; for ( var group = 0; group < groupsLength; group ++ ) { var blockStart = groups[group].blockStart; var blockEnd = groups[group].blockEnd; // Unlink whole group if no block is at least blockMinLength words long and unique if ( groups[group].maxWords < this.config.blockMinLength && groups[group].unique === false ) { for ( var block = blockStart; block <= blockEnd; block ++ ) { if ( blocks[block].type === '=' ) { this.unlinkSingleBlock( blocks[block] ); unlinked = true; } } } // Otherwise unlink block flanks else { // Unlink blocks from start for ( var block = blockStart; block <= blockEnd; block ++ ) { if ( blocks[block].type === '=' ) { // Stop unlinking if more than one word or a unique word if ( blocks[block].words > 1 || blocks[block].unique === true ) { break; } this.unlinkSingleBlock( blocks[block] ); unlinked = true; blockStart = block; } } // Unlink blocks from end for ( var block = blockEnd; block > blockStart; block -- ) { if ( blocks[block].type === '=' ) { // Stop unlinking if more than one word or a unique word if ( blocks[block].words > 1 || ( blocks[block].words === 1 && blocks[block].unique === true ) ) { break; } this.unlinkSingleBlock( blocks[block] ); unlinked = true; } } } } return unlinked; }; /** * Unlink text tokens of single block, convert them into into insertion/deletion ('+'/'-') pairs. * * @param[in] array blocks Blocks table object * @param[out] WikEdDiffText newText, oldText Text objects, link property */ this.unlinkSingleBlock = function ( block ) { // Cycle through old text var j = block.oldStart; for ( var count = 0; count < block.count; count ++ ) { // Unlink tokens this.newText.tokens[ this.oldText.tokens[j].link ].link = null; this.oldText.tokens[j].link = null; j = this.oldText.tokens[j].next; } return; }; /** * Collect deletion ('-') blocks from old text. * * @param[in] WikEdDiffText oldText Old Text object * @param[out] array blocks Blocks table object */ this.getDelBlocks = function () { if ( this.config.timer === true ) { this.time( 'getDelBlocks' ); } var blocks = this.blocks; // Cycle through old text to find connected (linked, matched) blocks var j = this.oldText.first; var i = null; while ( j !== null ) { // Collect '-' blocks var oldStart = j; var count = 0; var text = ''; while ( j !== null && this.oldText.tokens[j].link === null ) { count ++; text += this.oldText.tokens[j].token; j = this.oldText.tokens[j].next; } // Save old text '-' block if ( count !== 0 ) { blocks.push( { oldBlock: null, newBlock: null, oldNumber: this.oldText.tokens[oldStart].number, newNumber: null, oldStart: oldStart, count: count, unique: false, words: null, chars: text.length, type: '-', section: null, group: null, fixed: null, moved: null, text: text } ); } // Skip '=' blocks if ( j !== null ) { i = this.oldText.tokens[j].link; while ( i !== null && j !== null && this.oldText.tokens[j].link === i ) { i = this.newText.tokens[i].next; j = this.oldText.tokens[j].next; } } } if ( this.config.timer === true ) { this.timeEnd( 'getDelBlocks' ); } return; }; /** * Position deletion '-' blocks into new text order. * Deletion blocks move with fixed reference: * Old: 1 D 2 1 D 2 * / \ / \ \ * New: 1 D 2 1 D 2 * Fixed: * * * newNumber: 1 1 2 2 * * Marks '|' and deletions '-' get newNumber of reference block * and are sorted around it by old text number. * * @param[in/out] array blocks Blocks table, newNumber, section, group, and fixed properties * */ this.positionDelBlocks = function () { if ( this.config.timer === true ) { this.time( 'positionDelBlocks' ); } var blocks = this.blocks; var groups = this.groups; // Sort shallow copy of blocks by oldNumber var blocksOld = blocks.slice(); blocksOld.sort( function( a, b ) { return a.oldNumber - b.oldNumber; } ); // Cycle through blocks in old text order var blocksOldLength = blocksOld.length; for ( var block = 0; block < blocksOldLength; block ++ ) { var delBlock = blocksOld[block]; // '-' block only if ( delBlock.type !== '-' ) { continue; } // Find fixed '=' reference block from original block position to position '-' block // Similar to position marks '|' code // Get old text prev block var prevBlockNumber = null; var prevBlock = null; if ( block > 0 ) { prevBlockNumber = blocksOld[block - 1].newBlock; prevBlock = blocks[prevBlockNumber]; } // Get old text next block var nextBlockNumber = null; var nextBlock = null; if ( block < blocksOld.length - 1 ) { nextBlockNumber = blocksOld[block + 1].newBlock; nextBlock = blocks[nextBlockNumber]; } // Move after prev block if fixed var refBlock = null; if ( prevBlock !== null && prevBlock.type === '=' && prevBlock.fixed === true ) { refBlock = prevBlock; } // Move before next block if fixed else if ( nextBlock !== null && nextBlock.type === '=' && nextBlock.fixed === true ) { refBlock = nextBlock; } // Move after prev block if not start of group else if ( prevBlock !== null && prevBlock.type === '=' && prevBlockNumber !== groups[ prevBlock.group ].blockEnd ) { refBlock = prevBlock; } // Move before next block if not start of group else if ( nextBlock !== null && nextBlock.type === '=' && nextBlockNumber !== groups[ nextBlock.group ].blockStart ) { refBlock = nextBlock; } // Move after closest previous fixed block else { for ( var fixed = block; fixed >= 0; fixed -- ) { if ( blocksOld[fixed].type === '=' && blocksOld[fixed].fixed === true ) { refBlock = blocksOld[fixed]; break; } } } // Move before first block if ( refBlock === null ) { delBlock.newNumber = -1; } // Update '-' block data else { delBlock.newNumber = refBlock.newNumber; delBlock.section = refBlock.section; delBlock.group = refBlock.group; delBlock.fixed = refBlock.fixed; } } // Sort '-' blocks in and update groups this.sortBlocks(); if ( this.config.timer === true ) { this.timeEnd( 'positionDelBlocks' ); } return; }; /** * Collect insertion ('+') blocks from new text. * * @param[in] WikEdDiffText newText New Text object * @param[out] array blocks Blocks table object */ this.getInsBlocks = function () { if ( this.config.timer === true ) { this.time( 'getInsBlocks' ); } var blocks = this.blocks; // Cycle through new text to find insertion blocks var i = this.newText.first; while ( i !== null ) { // Jump over linked (matched) block while ( i !== null && this.newText.tokens[i].link !== null ) { i = this.newText.tokens[i].next; } // Detect insertion blocks ('+') if ( i !== null ) { var iStart = i; var count = 0; var text = ''; while ( i !== null && this.newText.tokens[i].link === null ) { count ++; text += this.newText.tokens[i].token; i = this.newText.tokens[i].next; } // Save new text '+' block blocks.push( { oldBlock: null, newBlock: null, oldNumber: null, newNumber: this.newText.tokens[iStart].number, oldStart: null, count: count, unique: false, words: null, chars: text.length, type: '+', section: null, group: null, fixed: null, moved: null, text: text } ); } } // Sort '+' blocks in and update groups this.sortBlocks(); if ( this.config.timer === true ) { this.timeEnd( 'getInsBlocks' ); } return; }; /** * Sort blocks by new text token number and update groups. * * @param[in/out] array groups Groups table object * @param[in/out] array blocks Blocks table object */ this.sortBlocks = function () { var blocks = this.blocks; var groups = this.groups; // Sort by newNumber, then by old number blocks.sort( function( a, b ) { var comp = a.newNumber - b.newNumber; if ( comp === 0 ) { comp = a.oldNumber - b.oldNumber; } return comp; } ); // Cycle through blocks and update groups with new block numbers var group = null; var blocksLength = blocks.length; for ( var block = 0; block < blocksLength; block ++ ) { var blockGroup = blocks[block].group; if ( blockGroup !== null ) { if ( blockGroup !== group ) { group = blocks[block].group; groups[group].blockStart = block; groups[group].oldNumber = blocks[block].oldNumber; } groups[blockGroup].blockEnd = block; } } return; }; /** * Set group numbers of insertion '+' blocks. * * @param[in/out] array groups Groups table object * @param[in/out] array blocks Blocks table object, fixed and group properties */ this.setInsGroups = function () { if ( this.config.timer === true ) { this.time( 'setInsGroups' ); } var blocks = this.blocks; var groups = this.groups; // Set group numbers of '+' blocks inside existing groups var groupsLength = groups.length; for ( var group = 0; group < groupsLength; group ++ ) { var fixed = groups[group].fixed; for ( var block = groups[group].blockStart; block <= groups[group].blockEnd; block ++ ) { if ( blocks[block].group === null ) { blocks[block].group = group; blocks[block].fixed = fixed; } } } // Add remaining '+' blocks to new groups // Cycle through blocks var blocksLength = blocks.length; for ( var block = 0; block < blocksLength; block ++ ) { // Skip existing groups if ( blocks[block].group === null ) { blocks[block].group = groups.length; // Save new single-block group groups.push( { oldNumber: blocks[block].oldNumber, blockStart: block, blockEnd: block, unique: blocks[block].unique, maxWords: blocks[block].words, words: blocks[block].words, chars: blocks[block].chars, fixed: blocks[block].fixed, movedFrom: null, color: null } ); } } if ( this.config.timer === true ) { this.timeEnd( 'setInsGroups' ); } return; }; /** * Mark original positions of moved groups. * Scheme: moved block marks at original positions relative to fixed groups: * Groups: 3 7 * 1 <| | (no next smaller fixed) * 5 |< | * |> 5 | * | 5 <| * | >| 5 * | |> 9 (no next larger fixed) * Fixed: * * * * Mark direction: groups.movedGroup.blockStart < groups.group.blockStart * Group side: groups.movedGroup.oldNumber < groups.group.oldNumber * * Marks '|' and deletions '-' get newNumber of reference block * and are sorted around it by old text number. * * @param[in/out] array groups Groups table object, movedFrom property * @param[in/out] array blocks Blocks table object */ this.insertMarks = function () { if ( this.config.timer === true ) { this.time( 'insertMarks' ); } var blocks = this.blocks; var groups = this.groups; var moved = []; var color = 1; // Make shallow copy of blocks var blocksOld = blocks.slice(); // Enumerate copy var blocksOldLength = blocksOld.length; for ( var i = 0; i < blocksOldLength; i ++ ) { blocksOld[i].number = i; } // Sort copy by oldNumber blocksOld.sort( function( a, b ) { var comp = a.oldNumber - b.oldNumber; if ( comp === 0 ) { comp = a.newNumber - b.newNumber; } return comp; } ); // Create lookup table: original to sorted var lookupSorted = []; for ( var i = 0; i < blocksOldLength; i ++ ) { lookupSorted[ blocksOld[i].number ] = i; } // Cycle through groups (moved group) var groupsLength = groups.length; for ( var moved = 0; moved < groupsLength; moved ++ ) { var movedGroup = groups[moved]; if ( movedGroup.fixed !== false ) { continue; } var movedOldNumber = movedGroup.oldNumber; // Find fixed '=' reference block from original block position to position '|' block // Similar to position deletions '-' code // Get old text prev block var prevBlock = null; var block = lookupSorted[ movedGroup.blockStart ]; if ( block > 0 ) { prevBlock = blocksOld[block - 1]; } // Get old text next block var nextBlock = null; var block = lookupSorted[ movedGroup.blockEnd ]; if ( block < blocksOld.length - 1 ) { nextBlock = blocksOld[block + 1]; } // Move after prev block if fixed var refBlock = null; if ( prevBlock !== null && prevBlock.type === '=' && prevBlock.fixed === true ) { refBlock = prevBlock; } // Move before next block if fixed else if ( nextBlock !== null && nextBlock.type === '=' && nextBlock.fixed === true ) { refBlock = nextBlock; } // Find closest fixed block to the left else { for ( var fixed = lookupSorted[ movedGroup.blockStart ] - 1; fixed >= 0; fixed -- ) { if ( blocksOld[fixed].type === '=' && blocksOld[fixed].fixed === true ) { refBlock = blocksOld[fixed]; break; } } } // Get position of new mark block var newNumber; var markGroup; // No smaller fixed block, moved right from before first block if ( refBlock === null ) { newNumber = -1; markGroup = groups.length; // Save new single-mark-block group groups.push( { oldNumber: 0, blockStart: blocks.length, blockEnd: blocks.length, unique: false, maxWords: null, words: null, chars: 0, fixed: null, movedFrom: null, color: null } ); } else { newNumber = refBlock.newNumber; markGroup = refBlock.group; } // Insert '|' block blocks.push( { oldBlock: null, newBlock: null, oldNumber: movedOldNumber, newNumber: newNumber, oldStart: null, count: null, unique: null, words: null, chars: 0, type: '|', section: null, group: markGroup, fixed: true, moved: moved, text: '' } ); // Set group color movedGroup.color = color; movedGroup.movedFrom = markGroup; color ++; } // Sort '|' blocks in and update groups this.sortBlocks(); if ( this.config.timer === true ) { this.timeEnd( 'insertMarks' ); } return; }; /** * Collect diff fragment list for markup, create abstraction layer for customized diffs. * Adds the following fagment types: * '=', '-', '+' same, deletion, insertion * '<', '>' mark left, mark right * '(<', '(>', ')' block start and end * '[', ']' fragment start and end * '{', '}' container start and end * * @param[in] array groups Groups table object * @param[in] array blocks Blocks table object * @param[out] array fragments Fragments array, abstraction layer for diff code */ this.getDiffFragments = function () { var blocks = this.blocks; var groups = this.groups; var fragments = this.fragments; // Make shallow copy of groups and sort by blockStart var groupsSort = groups.slice(); groupsSort.sort( function( a, b ) { return a.blockStart - b.blockStart; } ); // Cycle through groups var groupsSortLength = groupsSort.length; for ( var group = 0; group < groupsSortLength; group ++ ) { var blockStart = groupsSort[group].blockStart; var blockEnd = groupsSort[group].blockEnd; // Add moved block start var color = groupsSort[group].color; if ( color !== null ) { var type; if ( groupsSort[group].movedFrom < blocks[ blockStart ].group ) { type = '(<'; } else { type = '(>'; } fragments.push( { text: '', type: type, color: color } ); } // Cycle through blocks for ( var block = blockStart; block <= blockEnd; block ++ ) { var type = blocks[block].type; // Add '=' unchanged text and moved block if ( type === '=' || type === '-' || type === '+' ) { fragments.push( { text: blocks[block].text, type: type, color: color } ); } // Add '<' and '>' marks else if ( type === '|' ) { var movedGroup = groups[ blocks[block].moved ]; // Get mark text var markText = ''; for ( var movedBlock = movedGroup.blockStart; movedBlock <= movedGroup.blockEnd; movedBlock ++ ) { if ( blocks[movedBlock].type === '=' || blocks[movedBlock].type === '-' ) { markText += blocks[movedBlock].text; } } // Get mark direction var markType; if ( movedGroup.blockStart < blockStart ) { markType = '<'; } else { markType = '>'; } // Add mark fragments.push( { text: markText, type: markType, color: movedGroup.color } ); } } // Add moved block end if ( color !== null ) { fragments.push( { text: '', type: ' )', color: color } ); } } // Cycle through fragments, join consecutive fragments of same type (i.e. '-' blocks) var fragmentsLength = fragments.length; for ( var fragment = 1; fragment < fragmentsLength; fragment ++ ) { // Check if joinable if ( fragments[fragment].type === fragments[fragment - 1].type && fragments[fragment].color === fragments[fragment - 1].color && fragments[fragment].text !== '' && fragments[fragment - 1].text !== '' ) { // Join and splice fragments[fragment - 1].text += fragments[fragment].text; fragments.splice( fragment, 1 ); fragment --; } } // Enclose in containers fragments.unshift( { text: '', type: '{', color: null }, { text: '', type: '[', color: null } ); fragments.push( { text: '', type: ']', color: null }, { text: '', type: '}', color: null } ); return; }; /** * Clip unchanged sections from unmoved block text. * Adds the following fagment types: * '~', ' ~', '~ ' omission indicators * '[', ']', ',' fragment start and end, fragment separator * * @param[in/out] array fragments Fragments array, abstraction layer for diff code */ this.clipDiffFragments = function () { var fragments = this.fragments; // Skip if only one fragment in containers, no change if ( fragments.length === 5 ) { return; } // Min length for clipping right var minRight = this.config.clipHeadingRight; if ( this.config.clipParagraphRightMin < minRight ) { minRight = this.config.clipParagraphRightMin; } if ( this.config.clipLineRightMin < minRight ) { minRight = this.config.clipLineRightMin; } if ( this.config.clipBlankRightMin < minRight ) { minRight = this.config.clipBlankRightMin; } if ( this.config.clipCharsRight < minRight ) { minRight = this.config.clipCharsRight; } // Min length for clipping left var minLeft = this.config.clipHeadingLeft; if ( this.config.clipParagraphLeftMin < minLeft ) { minLeft = this.config.clipParagraphLeftMin; } if ( this.config.clipLineLeftMin < minLeft ) { minLeft = this.config.clipLineLeftMin; } if ( this.config.clipBlankLeftMin < minLeft ) { minLeft = this.config.clipBlankLeftMin; } if ( this.config.clipCharsLeft < minLeft ) { minLeft = this.config.clipCharsLeft; } // Cycle through fragments var fragmentsLength = fragments.length; for ( var fragment = 0; fragment < fragmentsLength; fragment ++ ) { // Skip if not an unmoved and unchanged block var type = fragments[fragment].type; var color = fragments[fragment].color; if ( type !== '=' || color !== null ) { continue; } // Skip if too short for clipping var text = fragments[fragment].text; var textLength = text.length; if ( textLength < minRight && textLength < minLeft ) { continue; } // Get line positions including start and end var lines = []; var lastIndex = null; var regExpMatch; while ( ( regExpMatch = this.config.regExp.clipLine.exec( text ) ) !== null ) { lines.push( regExpMatch.index ); lastIndex = this.config.regExp.clipLine.lastIndex; } if ( lines[0] !== 0 ) { lines.unshift( 0 ); } if ( lastIndex !== textLength ) { lines.push( textLength ); } // Get heading positions var headings = []; var headingsEnd = []; while ( ( regExpMatch = this.config.regExp.clipHeading.exec( text ) ) !== null ) { headings.push( regExpMatch.index ); headingsEnd.push( regExpMatch.index + regExpMatch[0].length ); } // Get paragraph positions including start and end var paragraphs = []; var lastIndex = null; while ( ( regExpMatch = this.config.regExp.clipParagraph.exec( text ) ) !== null ) { paragraphs.push( regExpMatch.index ); lastIndex = this.config.regExp.clipParagraph.lastIndex; } if ( paragraphs[0] !== 0 ) { paragraphs.unshift( 0 ); } if ( lastIndex !== textLength ) { paragraphs.push( textLength ); } // Determine ranges to keep on left and right side var rangeRight = null; var rangeLeft = null; var rangeRightType = ''; var rangeLeftType = ''; // Find clip pos from left, skip for first non-container block if ( fragment !== 2 ) { // Maximum lines to search from left var rangeLeftMax = textLength; if ( this.config.clipLinesLeftMax < lines.length ) { rangeLeftMax = lines[this.config.clipLinesLeftMax]; } // Find first heading from left if ( rangeLeft === null ) { var headingsLength = headingsEnd.length; for ( var j = 0; j < headingsLength; j ++ ) { if ( headingsEnd[j] > this.config.clipHeadingLeft || headingsEnd[j] > rangeLeftMax ) { break; } rangeLeft = headingsEnd[j]; rangeLeftType = 'heading'; break; } } // Find first paragraph from left if ( rangeLeft === null ) { var paragraphsLength = paragraphs.length; for ( var j = 0; j < paragraphsLength; j ++ ) { if ( paragraphs[j] > this.config.clipParagraphLeftMax || paragraphs[j] > rangeLeftMax ) { break; } if ( paragraphs[j] > this.config.clipParagraphLeftMin ) { rangeLeft = paragraphs[j]; rangeLeftType = 'paragraph'; break; } } } // Find first line break from left if ( rangeLeft === null ) { var linesLength = lines.length; for ( var j = 0; j < linesLength; j ++ ) { if ( lines[j] > this.config.clipLineLeftMax || lines[j] > rangeLeftMax ) { break; } if ( lines[j] > this.config.clipLineLeftMin ) { rangeLeft = lines[j]; rangeLeftType = 'line'; break; } } } // Find first blank from left if ( rangeLeft === null ) { this.config.regExp.clipBlank.lastIndex = this.config.clipBlankLeftMin; if ( ( regExpMatch = this.config.regExp.clipBlank.exec( text ) ) !== null ) { if ( regExpMatch.index < this.config.clipBlankLeftMax && regExpMatch.index < rangeLeftMax ) { rangeLeft = regExpMatch.index; rangeLeftType = 'blank'; } } } // Fixed number of chars from left if ( rangeLeft === null ) { if ( this.config.clipCharsLeft < rangeLeftMax ) { rangeLeft = this.config.clipCharsLeft; rangeLeftType = 'chars'; } } // Fixed number of lines from left if ( rangeLeft === null ) { rangeLeft = rangeLeftMax; rangeLeftType = 'fixed'; } } // Find clip pos from right, skip for last non-container block if ( fragment !== fragments.length - 3 ) { // Maximum lines to search from right var rangeRightMin = 0; if ( lines.length >= this.config.clipLinesRightMax ) { rangeRightMin = lines[lines.length - this.config.clipLinesRightMax]; } // Find last heading from right if ( rangeRight === null ) { for ( var j = headings.length - 1; j >= 0; j -- ) { if ( headings[j] < textLength - this.config.clipHeadingRight || headings[j] < rangeRightMin ) { break; } rangeRight = headings[j]; rangeRightType = 'heading'; break; } } // Find last paragraph from right if ( rangeRight === null ) { for ( var j = paragraphs.length - 1; j >= 0 ; j -- ) { if ( paragraphs[j] < textLength - this.config.clipParagraphRightMax || paragraphs[j] < rangeRightMin ) { break; } if ( paragraphs[j] < textLength - this.config.clipParagraphRightMin ) { rangeRight = paragraphs[j]; rangeRightType = 'paragraph'; break; } } } // Find last line break from right if ( rangeRight === null ) { for ( var j = lines.length - 1; j >= 0; j -- ) { if ( lines[j] < textLength - this.config.clipLineRightMax || lines[j] < rangeRightMin ) { break; } if ( lines[j] < textLength - this.config.clipLineRightMin ) { rangeRight = lines[j]; rangeRightType = 'line'; break; } } } // Find last blank from right if ( rangeRight === null ) { var startPos = textLength - this.config.clipBlankRightMax; if ( startPos < rangeRightMin ) { startPos = rangeRightMin; } this.config.regExp.clipBlank.lastIndex = startPos; var lastPos = null; while ( ( regExpMatch = this.config.regExp.clipBlank.exec( text ) ) !== null ) { if ( regExpMatch.index > textLength - this.config.clipBlankRightMin ) { if ( lastPos !== null ) { rangeRight = lastPos; rangeRightType = 'blank'; } break; } lastPos = regExpMatch.index; } } // Fixed number of chars from right if ( rangeRight === null ) { if ( textLength - this.config.clipCharsRight > rangeRightMin ) { rangeRight = textLength - this.config.clipCharsRight; rangeRightType = 'chars'; } } // Fixed number of lines from right if ( rangeRight === null ) { rangeRight = rangeRightMin; rangeRightType = 'fixed'; } } // Check if we skip clipping if ranges are close together if ( rangeLeft !== null && rangeRight !== null ) { // Skip if overlapping ranges if ( rangeLeft > rangeRight ) { continue; } // Skip if chars too close var skipChars = rangeRight - rangeLeft; if ( skipChars < this.config.clipSkipChars ) { continue; } // Skip if lines too close var skipLines = 0; var linesLength = lines.length; for ( var j = 0; j < linesLength; j ++ ) { if ( lines[j] > rangeRight || skipLines > this.config.clipSkipLines ) { break; } if ( lines[j] > rangeLeft ) { skipLines ++; } } if ( skipLines < this.config.clipSkipLines ) { continue; } } // Skip if nothing to clip if ( rangeLeft === null && rangeRight === null ) { continue; } // Split left text var textLeft = null; var omittedLeft = null; if ( rangeLeft !== null ) { textLeft = text.slice( 0, rangeLeft ); // Remove trailing empty lines textLeft = textLeft.replace( this.config.regExp.clipTrimNewLinesLeft, '' ); // Get omission indicators, remove trailing blanks if ( rangeLeftType === 'chars' ) { omittedLeft = '~'; textLeft = textLeft.replace( this.config.regExp.clipTrimBlanksLeft, '' ); } else if ( rangeLeftType === 'blank' ) { omittedLeft = ' ~'; textLeft = textLeft.replace( this.config.regExp.clipTrimBlanksLeft, '' ); } } // Split right text var textRight = null; var omittedRight = null; if ( rangeRight !== null ) { textRight = text.slice( rangeRight ); // Remove leading empty lines textRight = textRight.replace( this.config.regExp.clipTrimNewLinesRight, '' ); // Get omission indicators, remove leading blanks if ( rangeRightType === 'chars' ) { omittedRight = '~'; textRight = textRight.replace( this.config.regExp.clipTrimBlanksRight, '' ); } else if ( rangeRightType === 'blank' ) { omittedRight = '~ '; textRight = textRight.replace( this.config.regExp.clipTrimBlanksRight, '' ); } } // Remove split element fragments.splice( fragment, 1 ); fragmentsLength --; // Add left text to fragments list if ( rangeLeft !== null ) { fragments.splice( fragment ++, 0, { text: textLeft, type: '=', color: null } ); fragmentsLength ++; if ( omittedLeft !== null ) { fragments.splice( fragment ++, 0, { text: '', type: omittedLeft, color: null } ); fragmentsLength ++; } } // Add fragment container and separator to list if ( rangeLeft !== null && rangeRight !== null ) { fragments.splice( fragment ++, 0, { text: '', type: ']', color: null } ); fragments.splice( fragment ++, 0, { text: '', type: ',', color: null } ); fragments.splice( fragment ++, 0, { text: '', type: '[', color: null } ); fragmentsLength += 3; } // Add right text to fragments list if ( rangeRight !== null ) { if ( omittedRight !== null ) { fragments.splice( fragment ++, 0, { text: '', type: omittedRight, color: null } ); fragmentsLength ++; } fragments.splice( fragment ++, 0, { text: textRight, type: '=', color: null } ); fragmentsLength ++; } } // Debug log if ( this.config.debug === true ) { this.debugFragments( 'Fragments' ); } return; }; /** * Create html formatted diff code from diff fragments. * * @param[in] array fragments Fragments array, abstraction layer for diff code * @param string|undefined version * Output version: 'new' or 'old': only text from new or old version, used for unit tests * @param[out] string html Html code of diff */ this.getDiffHtml = function ( version ) { var fragments = this.fragments; // No change, only one unchanged block in containers if ( fragments.length === 5 && fragments[2].type === '=' ) { this.html = ''; return; } // Cycle through fragments var htmlFragments = []; var fragmentsLength = fragments.length; for ( var fragment = 0; fragment < fragmentsLength; fragment ++ ) { var text = fragments[fragment].text; var type = fragments[fragment].type; var color = fragments[fragment].color; var html = ''; // Test if text is blanks-only or a single character var blank = false; if ( text !== '' ) { blank = this.config.regExp.blankBlock.test( text ); } // Add container start markup if ( type === '{' ) { html = this.config.htmlCode.containerStart; } // Add container end markup else if ( type === '}' ) { html = this.config.htmlCode.containerEnd; } // Add fragment start markup if ( type === '[' ) { html = this.config.htmlCode.fragmentStart; } // Add fragment end markup else if ( type === ']' ) { html = this.config.htmlCode.fragmentEnd; } // Add fragment separator markup else if ( type === ',' ) { html = this.config.htmlCode.separator; } // Add omission markup if ( type === '~' ) { html = this.config.htmlCode.omittedChars; } // Add omission markup if ( type === ' ~' ) { html = ' ' + this.config.htmlCode.omittedChars; } // Add omission markup if ( type === '~ ' ) { html = this.config.htmlCode.omittedChars + ' '; } // Add colored left-pointing block start markup else if ( type === '(<' ) { if ( version !== 'old' ) { // Get title var title; if ( this.config.noUnicodeSymbols === true ) { title = this.config.msg['wiked-diff-block-left-nounicode']; } else { title = this.config.msg['wiked-diff-block-left']; } // Get html if ( this.config.coloredBlocks === true ) { html = this.config.htmlCode.blockColoredStart; } else { html = this.config.htmlCode.blockStart; } html = this.htmlCustomize( html, color, title ); } } // Add colored right-pointing block start markup else if ( type === '(>' ) { if ( version !== 'old' ) { // Get title var title; if ( this.config.noUnicodeSymbols === true ) { title = this.config.msg['wiked-diff-block-right-nounicode']; } else { title = this.config.msg['wiked-diff-block-right']; } // Get html if ( this.config.coloredBlocks === true ) { html = this.config.htmlCode.blockColoredStart; } else { html = this.config.htmlCode.blockStart; } html = this.htmlCustomize( html, color, title ); } } // Add colored block end markup else if ( type === ' )' ) { if ( version !== 'old' ) { html = this.config.htmlCode.blockEnd; } } // Add '=' (unchanged) text and moved block if ( type === '=' ) { text = this.htmlEscape( text ); if ( color !== null ) { if ( version !== 'old' ) { html = this.markupBlanks( text, true ); } } else { html = this.markupBlanks( text ); } } // Add '-' text else if ( type === '-' ) { if ( version !== 'new' ) { // For old version skip '-' inside moved group if ( version !== 'old' || color === null ) { text = this.htmlEscape( text ); text = this.markupBlanks( text, true ); if ( blank === true ) { html = this.config.htmlCode.deleteStartBlank; } else { html = this.config.htmlCode.deleteStart; } html += text + this.config.htmlCode.deleteEnd; } } } // Add '+' text else if ( type === '+' ) { if ( version !== 'old' ) { text = this.htmlEscape( text ); text = this.markupBlanks( text, true ); if ( blank === true ) { html = this.config.htmlCode.insertStartBlank; } else { html = this.config.htmlCode.insertStart; } html += text + this.config.htmlCode.insertEnd; } } // Add '<' and '>' code else if ( type === '<' || type === '>' ) { if ( version !== 'new' ) { // Display as deletion at original position if ( this.config.showBlockMoves === false || version === 'old' ) { text = this.htmlEscape( text ); text = this.markupBlanks( text, true ); if ( version === 'old' ) { if ( this.config.coloredBlocks === true ) { html = this.htmlCustomize( this.config.htmlCode.blockColoredStart, color ) + text + this.config.htmlCode.blockEnd; } else { html = this.htmlCustomize( this.config.htmlCode.blockStart, color ) + text + this.config.htmlCode.blockEnd; } } else { if ( blank === true ) { html = this.config.htmlCode.deleteStartBlank + text + this.config.htmlCode.deleteEnd; } else { html = this.config.htmlCode.deleteStart + text + this.config.htmlCode.deleteEnd; } } } // Display as mark else { if ( type === '<' ) { if ( this.config.coloredBlocks === true ) { html = this.htmlCustomize( this.config.htmlCode.markLeftColored, color, text ); } else { html = this.htmlCustomize( this.config.htmlCode.markLeft, color, text ); } } else { if ( this.config.coloredBlocks === true ) { html = this.htmlCustomize( this.config.htmlCode.markRightColored, color, text ); } else { html = this.htmlCustomize( this.config.htmlCode.markRight, color, text ); } } } } } htmlFragments.push( html ); } // Join fragments this.html = htmlFragments.join( '' ); return; }; /** * Customize html code fragments. * Replaces: * {number}: class/color/block/mark/id number * {title}: title attribute (popup) * {nounicode}: noUnicodeSymbols fallback * input: html, number: block number, title: title attribute (popup) text * * @param string html Html code to be customized * @return string Customized html code */ this.htmlCustomize = function ( html, number, title ) { // Replace {number} with class/color/block/mark/id number html = html.replace( /\{number\}/g, number); // Replace {nounicode} with wikEdDiffNoUnicode class name if ( this.config.noUnicodeSymbols === true ) { html = html.replace( /\{nounicode\}/g, ' wikEdDiffNoUnicode'); } else { html = html.replace( /\{nounicode\}/g, ''); } // Shorten title text, replace {title} if ( title !== undefined ) { var max = 512; var end = 128; var gapMark = ' [...] '; if ( title.length > max ) { title = title.substr( 0, max - gapMark.length - end ) + gapMark + title.substr( title.length - end ); } title = this.htmlEscape( title ); title = title.replace( /\t/g, '  '); title = title.replace( / /g, '  '); html = html.replace( /\{title\}/, title); } return html; }; /** * Replace html-sensitive characters in output text with character entities. * * @param string html Html code to be escaped * @return string Escaped html code */ this.htmlEscape = function ( html ) { html = html.replace( /&/g, '&'); html = html.replace( //g, '>'); html = html.replace( /"/g, '"'); return html; }; /** * Markup tabs, newlines, and spaces in diff fragment text. * * @param bool highlight Highlight newlines and spaces in addition to tabs * @param string html Text code to be marked-up * @return string Marked-up text */ this.markupBlanks = function ( html, highlight ) { if ( highlight === true ) { html = html.replace( / /g, this.config.htmlCode.space); html = html.replace( /\n/g, this.config.htmlCode.newline); } html = html.replace( /\t/g, this.config.htmlCode.tab); return html; }; /** * Count real words in text. * * @param string text Text for word counting * @return int Number of words in text */ this.wordCount = function ( text ) { return ( text.match( this.config.regExp.countWords ) || [] ).length; }; /** * Test diff code for consistency with input versions. * Prints results to debug console. * * @param[in] WikEdDiffText newText, oldText Text objects */ this.unitTests = function () { // Check if output is consistent with new text this.getDiffHtml( 'new' ); var diff = this.html.replace( /<[^>]*>/g, ''); var text = this.htmlEscape( this.newText.text ); if ( diff !== text ) { console.log( 'Error: wikEdDiff unit test failure: diff not consistent with new text version!' ); this.error = true; console.log( 'new text:\n', text ); console.log( 'new diff:\n', diff ); } else { console.log( 'OK: wikEdDiff unit test passed: diff consistent with new text.' ); } // Check if output is consistent with old text this.getDiffHtml( 'old' ); var diff = this.html.replace( /<[^>]*>/g, ''); var text = this.htmlEscape( this.oldText.text ); if ( diff !== text ) { console.log( 'Error: wikEdDiff unit test failure: diff not consistent with old text version!' ); this.error = true; console.log( 'old text:\n', text ); console.log( 'old diff:\n', diff ); } else { console.log( 'OK: wikEdDiff unit test passed: diff consistent with old text.' ); } return; }; /** * Dump blocks object to browser console. * * @param string name Block name * @param[in] array blocks Blocks table object */ this.debugBlocks = function ( name, blocks ) { if ( blocks === undefined ) { blocks = this.blocks; } var dump = '\ni \toldBl \tnewBl \toldNm \tnewNm \toldSt \tcount \tuniq' + '\twords \tchars \ttype \tsect \tgroup \tfixed \tmoved \ttext\n'; var blocksLength = blocks.length; for ( var i = 0; i < blocksLength; i ++ ) { dump += i + ' \t' + blocks[i].oldBlock + ' \t' + blocks[i].newBlock + ' \t' + blocks[i].oldNumber + ' \t' + blocks[i].newNumber + ' \t' + blocks[i].oldStart + ' \t' + blocks[i].count + ' \t' + blocks[i].unique + ' \t' + blocks[i].words + ' \t' + blocks[i].chars + ' \t' + blocks[i].type + ' \t' + blocks[i].section + ' \t' + blocks[i].group + ' \t' + blocks[i].fixed + ' \t' + blocks[i].moved + ' \t' + this.debugShortenText( blocks[i].text ) + '\n'; } console.log( name + ':\n' + dump ); }; /** * Dump groups object to browser console. * * @param string name Group name * @param[in] array groups Groups table object */ this.debugGroups = function ( name, groups ) { if ( groups === undefined ) { groups = this.groups; } var dump = '\ni \toldNm \tblSta \tblEnd \tuniq \tmaxWo' + '\twords \tchars \tfixed \toldNm \tmFrom \tcolor\n'; var groupsLength = groupsLength; for ( var i = 0; i < groups.length; i ++ ) { dump += i + ' \t' + groups[i].oldNumber + ' \t' + groups[i].blockStart + ' \t' + groups[i].blockEnd + ' \t' + groups[i].unique + ' \t' + groups[i].maxWords + ' \t' + groups[i].words + ' \t' + groups[i].chars + ' \t' + groups[i].fixed + ' \t' + groups[i].oldNumber + ' \t' + groups[i].movedFrom + ' \t' + groups[i].color + '\n'; } console.log( name + ':\n' + dump ); }; /** * Dump fragments array to browser console. * * @param string name Fragments name * @param[in] array fragments Fragments array */ this.debugFragments = function ( name ) { var fragments = this.fragments; var dump = '\ni \ttype \tcolor \ttext\n'; var fragmentsLength = fragments.length; for ( var i = 0; i < fragmentsLength; i ++ ) { dump += i + ' \t"' + fragments[i].type + '" \t' + fragments[i].color + ' \t' + this.debugShortenText( fragments[i].text, 120, 40 ) + '\n'; } console.log( name + ':\n' + dump ); }; /** * Dump borders array to browser console. * * @param string name Arrays name * @param[in] array border Match border array */ this.debugBorders = function ( name, borders ) { var dump = '\ni \t[ new \told ]\n'; var bordersLength = borders.length; for ( var i = 0; i < bordersLength; i ++ ) { dump += i + ' \t[ ' + borders[i][0] + ' \t' + borders[i][1] + ' ]\n'; } console.log( name, dump ); }; /** * Shorten text for dumping. * * @param string text Text to be shortened * @param int max Max length of (shortened) text * @param int end Length of trailing fragment of shortened text * @return string Shortened text */ this.debugShortenText = function ( text, max, end ) { if ( typeof text !== 'string' ) { text = text.toString(); } text = text.replace( /\n/g, '\\n'); text = text.replace( /\t/g, ' '); if ( max === undefined ) { max = 50; } if ( end === undefined ) { end = 15; } if ( text.length > max ) { text = text.substr( 0, max - 1 - end ) + '…' + text.substr( text.length - end ); } return '"' + text + '"'; }; /** * Start timer 'label', analogous to JavaScript console timer. * Usage: this.time( 'label' ); * * @param string label Timer label * @param[out] array timer Current time in milliseconds (float) */ this.time = function ( label ) { this.timer[label] = new Date().getTime(); return; }; /** * Stop timer 'label', analogous to JavaScript console timer. * Logs time in milliseconds since start to browser console. * Usage: this.timeEnd( 'label' ); * * @param string label Timer label * @param bool noLog Do not log result * @return float Time in milliseconds */ this.timeEnd = function ( label, noLog ) { var diff = 0; if ( this.timer[label] !== undefined ) { var start = this.timer[label]; var stop = new Date().getTime(); diff = stop - start; this.timer[label] = undefined; if ( noLog !== true ) { console.log( label + ': ' + diff.toFixed( 2 ) + ' ms' ); } } return diff; }; /** * Log recursion timer results to browser console. * Usage: this.timeRecursionEnd(); * * @param string text Text label for output * @param[in] array recursionTimer Accumulated recursion times */ this.timeRecursionEnd = function ( text ) { if ( this.recursionTimer.length > 1 ) { // Subtract times spent in deeper recursions var timerEnd = this.recursionTimer.length - 1; for ( var i = 0; i < timerEnd; i ++ ) { this.recursionTimer[i] -= this.recursionTimer[i + 1]; } // Log recursion times var timerLength = this.recursionTimer.length; for ( var i = 0; i < timerLength; i ++ ) { console.log( text + ' recursion ' + i + ': ' + this.recursionTimer[i].toFixed( 2 ) + ' ms' ); } } this.recursionTimer = []; return; }; /** * Log variable values to debug console. * Usage: this.debug( 'var', var ); * * @param string name Object identifier * @param mixed|undefined name Object to be logged */ this.debug = function ( name, object ) { if ( object === undefined ) { console.log( name ); } else { console.log( name + ': ' + object ); } return; }; /** * Add script to document head. * * @param string code JavaScript code */ this.addScript = function ( code ) { if ( document.getElementById( 'wikEdDiffBlockHandler' ) === null ) { var script = document.createElement( 'script' ); script.id = 'wikEdDiffBlockHandler'; if ( script.innerText !== undefined ) { script.innerText = code; } else { script.textContent = code; } document.getElementsByTagName( 'head' )[0].appendChild( script ); } return; }; /** * Add stylesheet to document head, cross-browser >= IE6. * * @param string css CSS code */ this.addStyleSheet = function ( css ) { if ( document.getElementById( 'wikEdDiffStyles' ) === null ) { // Replace mark symbols css = css.replace( /\{cssMarkLeft\}/g, this.config.cssMarkLeft); css = css.replace( /\{cssMarkRight\}/g, this.config.cssMarkRight); var style = document.createElement( 'style' ); style.id = 'wikEdDiffStyles'; style.type = 'text/css'; if ( style.styleSheet !== undefined ) { style.styleSheet.cssText = css; } else { style.appendChild( document.createTextNode( css ) ); } document.getElementsByTagName( 'head' )[0].appendChild( style ); } return; }; /** * Recursive deep copy from target over source for customization import. * * @param object source Source object * @param object target Target object */ this.deepCopy = function ( source, target ) { for ( var key in source ) { if ( Object.prototype.hasOwnProperty.call( source, key ) === true ) { if ( typeof source[key] === 'object' ) { this.deepCopy( source[key], target[key] ); } else { target[key] = source[key]; } } } return; }; // Initialze WikEdDiff object this.init(); }; /** * Data and methods for single text version (old or new one). * * @class WikEdDiffText */ WikEdDiff.WikEdDiffText = function ( text, parent ) { /** @var WikEdDiff parent Parent object for configuration settings and debugging methods */ this.parent = parent; /** @var string text Text of this version */ this.text = null; /** @var array tokens Tokens list */ this.tokens = []; /** @var int first, last First and last index of tokens list */ this.first = null; this.last = null; /** @var array words Word counts for version text */ this.words = {}; /** * Constructor, initialize text object. * * @param string text Text of version * @param WikEdDiff parent Parent, for configuration settings and debugging methods */ this.init = function () { if ( typeof text !== 'string' ) { text = text.toString(); } // IE / Mac fix this.text = text.replace( /\r\n?/g, '\n'); // Parse and count words and chunks for identification of unique real words if ( this.parent.config.timer === true ) { this.parent.time( 'wordParse' ); } this.wordParse( this.parent.config.regExp.countWords ); this.wordParse( this.parent.config.regExp.countChunks ); if ( this.parent.config.timer === true ) { this.parent.timeEnd( 'wordParse' ); } return; }; /** * Parse and count words and chunks for identification of unique words. * * @param string regExp Regular expression for counting words * @param[in] string text Text of version * @param[out] array words Number of word occurrences */ this.wordParse = function ( regExp ) { var regExpMatch = this.text.match( regExp ); if ( regExpMatch !== null ) { var matchLength = regExpMatch.length; for (var i = 0; i < matchLength; i ++) { var word = regExpMatch[i]; if ( Object.prototype.hasOwnProperty.call( this.words, word ) === false ) { this.words[word] = 1; } else { this.words[word] ++; } } } return; }; /** * Split text into paragraph, line, sentence, chunk, word, or character tokens. * * @param string level Level of splitting: paragraph, line, sentence, chunk, word, or character * @param int|null token Index of token to be split, otherwise uses full text * @param[in] string text Full text to be split * @param[out] array tokens Tokens list * @param[out] int first, last First and last index of tokens list */ this.splitText = function ( level, token ) { var prev = null; var next = null; var current = this.tokens.length; var first = current; var text = ''; // Split full text or specified token if ( token === undefined ) { text = this.text; } else { prev = this.tokens[token].prev; next = this.tokens[token].next; text = this.tokens[token].token; } // Split text into tokens, regExp match as separator var number = 0; var split = []; var regExpMatch; var lastIndex = 0; var regExp = this.parent.config.regExp.split[level]; while ( ( regExpMatch = regExp.exec( text ) ) !== null ) { if ( regExpMatch.index > lastIndex ) { split.push( text.substring( lastIndex, regExpMatch.index ) ); } split.push( regExpMatch[0] ); lastIndex = regExp.lastIndex; } if ( lastIndex < text.length ) { split.push( text.substring( lastIndex ) ); } // Cycle through new tokens var splitLength = split.length; for ( var i = 0; i < splitLength; i ++ ) { // Insert current item, link to previous this.tokens.push( { token: split[i], prev: prev, next: null, link: null, number: null, unique: false } ); number ++; // Link previous item to current if ( prev !== null ) { this.tokens[prev].next = current; } prev = current; current ++; } // Connect last new item and existing next item if ( number > 0 && token !== undefined ) { if ( prev !== null ) { this.tokens[prev].next = next; } if ( next !== null ) { this.tokens[next].prev = prev; } } // Set text first and last token index if ( number > 0 ) { // Initial text split if ( token === undefined ) { this.first = 0; this.last = prev; } // First or last token has been split else { if ( token === this.first ) { this.first = first; } if ( token === this.last ) { this.last = prev; } } } return; }; /** * Split unique unmatched tokens into smaller tokens. * * @param string level Level of splitting: line, sentence, chunk, or word * @param[in] array tokens Tokens list */ this.splitRefine = function ( regExp ) { // Cycle through tokens list var i = this.first; while ( i !== null ) { // Refine unique unmatched tokens into smaller tokens if ( this.tokens[i].link === null ) { this.splitText( regExp, i ); } i = this.tokens[i].next; } return; }; /** * Enumerate text token list before detecting blocks. * * @param[out] array tokens Tokens list */ this.enumerateTokens = function () { // Enumerate tokens list var number = 0; var i = this.first; while ( i !== null ) { this.tokens[i].number = number; number ++; i = this.tokens[i].next; } return; }; /** * Dump tokens object to browser console. * * @param string name Text name * @param[in] int first, last First and last index of tokens list * @param[in] array tokens Tokens list */ this.debugText = function ( name ) { var tokens = this.tokens; var dump = 'first: ' + this.first + '\tlast: ' + this.last + '\n'; dump += '\ni \tlink \t(prev \tnext) \tuniq \t#num \t"token"\n'; var i = this.first; while ( i !== null ) { dump += i + ' \t' + tokens[i].link + ' \t(' + tokens[i].prev + ' \t' + tokens[i].next + ') \t' + tokens[i].unique + ' \t#' + tokens[i].number + ' \t' + parent.debugShortenText( tokens[i].token ) + '\n'; i = tokens[i].next; } console.log( name + ':\n' + dump ); return; }; // Initialize WikEdDiffText object this.init(); }; //




© 2015 - 2025 Weber Informatics LLC | Privacy Policy