A
download ByteComparator.cpp
Language: C++
LOC: 371
Project Info
WinMerge
Server: SourceForge
Type: cvs
...erge\winmerge\WinMerge\Src\
   7zCommon.cpp
   7zCommon.h
   AboutDlg.cpp
   AboutDlg.h
   AppSerialize.cpp
   AppSerialize.h
   BCMenu.cpp
   BCMenu.h
   ByteComparator.cpp
   ByteComparator.h
   CCPrompt.cpp
   CCPrompt.h
   charsets.c
   charsets.h
   ChildFrm.cpp
   ChildFrm.h
   codepage.cpp
   codepage.h
   codepage_detect.cpp
   codepage_detect.h
   ColorButton.cpp
   ColorButton.h
   CompareOptions.h
   CompareStatisticsDlg.cpp
   CompareStatisticsDlg.h
   CompareStats.cpp
   CompareStats.h
   ConfigLog.cpp
   ConfigLog.h
   Diff.cpp
   DiffContext.cpp
   DiffContext.h
   DiffFileData.cpp
   DiffFileData.h
   DiffFileInfo.cpp
   DiffFileInfo.h
   DiffItem.cpp
   DiffItem.h
   DiffItemList.cpp
   DiffItemList.h
   DiffList.cpp
   DiffList.h
   diffmain.c
   DiffThread.cpp
   DiffThread.h
   DiffViewBar.cpp
   DiffViewBar.h
   DiffWrapper.cpp
   DiffWrapper.h
   Dir.cpp
   DirActions.cpp
   DirCmpReport.cpp
   DirCmpReport.h
   DirCmpReportDlg.cpp
   DirCmpReportDlg.h
   DirColsDlg.cpp
   DirColsDlg.h
   DirCompProgressDlg.cpp
   DirCompProgressDlg.h
   DirDoc.cpp
   DirDoc.h
   DirFrame.cpp
   DirFrame.h
   DirReportTypes.h
   DirScan.cpp
   DirScan.h
   DirView.cpp
   DirView.h
   DirViewColHandler.cpp
   DirViewColItems.cpp
   DirViewColItems.h
   dlgutil.cpp
   dlgutil.h
   dllpstub.cpp
   dllpstub.h
   dllver.cpp
   dllver.h
   EditorFilepathBar.cpp
   EditorFilepathBar.h
   Exceptions.h
   FileActionScript.cpp
   FileActionScript.h
   FileFilterHelper.cpp
   FileFilterHelper.h
   FileFilterMgr.cpp
   FileFilterMgr.h
   FileFiltersDlg.cpp
   FileFiltersDlg.h
   FileInfo.cpp
   FileInfo.h
   FileLocation.h
   FilepathEdit.cpp
   FilepathEdit.h
   files.cpp
   files.h
   FileTextEncoding.cpp
   FileTextEncoding.h
   FileTextStats.h
   FileTransform.cpp
   FileTransform.h
   GhostTextBuffer.cpp
   GhostTextBuffer.h
   GhostTextView.cpp
   GhostTextView.h
   IAbortable.h
   IntToIntMap.h
   IOptionsPanel.h
   LoadSaveCodepageDlg.cpp
   LoadSaveCodepageDlg.h
   locality.cpp
   locality.h
   LocationBar.cpp
   LocationBar.h
   LocationView.cpp
   LocationView.h
   MainFrm.cpp
   MainFrm.h
   markdown.cpp
   markdown.h
   Merge.cpp
   Merge.dsp
   Merge.h
   Merge.rc
   MergeArgs.cpp
   MergeDiffDetailView.cpp
   MergeDiffDetailView.h
   MergeDoc.cpp
   MergeDoc.h
   MergeDocEncoding.cpp
   MergeDocLineDiffs.cpp
   MergeEditStatus.h
   MergeEditView.cpp
   MergeEditView.h
   MergeLineFlags.h
   MovedBlocks.cpp
   OpenDlg.cpp
   OpenDlg.h
   OptionsDef.h
   OptionsInit.cpp
   OutputDlg.cpp
   OutputDlg.h
   PatchDlg.cpp
   PatchDlg.h
   PatchTool.cpp
   PatchTool.h
   PathContext.cpp
   PathContext.h
   paths.cpp
   paths.h
   PluginManager.cpp
   PluginManager.h
   Plugins.cpp
   Plugins.h
   ProjectFile.cpp
   ProjectFile.h
   ProjectFilePathsDlg.cpp
   ProjectFilePathsDlg.h
   PropArchive.cpp
   PropArchive.h
   PropCodepage.cpp
   PropCodepage.h
   PropColors.cpp
   PropColors.h
   PropCompare.cpp
   PropCompare.h
   PropEditor.cpp
   PropEditor.h
   PropGeneral.cpp
   PropGeneral.h
   PropLineFilter.cpp
   PropLineFilter.h
   PropRegistry.cpp
   PropRegistry.h
   PropSyntaxColors.cpp
   PropSyntaxColors.h
   PropTextColors.cpp
   PropTextColors.h
   PropVss.cpp
   PropVss.h
   resource.h
   SaveClosingDlg.cpp
   SaveClosingDlg.h
   SelectUnpackerDlg.cpp
   SelectUnpackerDlg.h
   SetResourceVersions.bat
   SetResourceVersions.wsf
   SharedFilterDlg.cpp
   SharedFilterDlg.h
   SourceControl.cpp
   Splash.cpp
   Splash.h
   ssapi.cpp
   ssapi.h
   ssauto.h
   STACK.C
   StdAfx.cpp
   StdAfx.h
   stringdiffs.cpp
   stringdiffs.h
   stringdiffsi.h
   SyntaxColors.cpp
   SyntaxColors.h
   TestFilterDlg.cpp
   TestFilterDlg.h
   ViewableWhitespace.cpp
   ViewableWhitespace.h
   VSSHelper.cpp
   VSSHelper.h
   VssPrompt.cpp
   VssPrompt.h
   WaitStatusCursor.cpp
   WaitStatusCursor.h
   winnt_supp.h
   WMGotoDlg.cpp
   WMGotoDlg.h
   XmlDoc.cpp
   XmlDoc.h

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
/** 
 * @file  ByteComparator.cpp
 *
 * @brief Implements ByteComparator class.
 */
// RCS ID line follows -- this is updated by CVS
// $Id: ByteComparator.cpp,v 1.11 2006/07/27 10:41:24 kimmov Exp $

#include "stdafx.h"
#include "ByteComparator.h"
#include "FileTextStats.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif


/**
 * @brief Returns if given char is EOL byte.
 * @param [in] ch Char to test.
 * @return true if char is EOL byte, false otherwise.
 */
static inline bool iseolch(TCHAR ch)
{
	return ch=='\n' || ch=='\r';
}

/**
 * @brief Returns if given char is whitespace char.
 * @param [in] ch Char to test.
 * @return true if char is whitespace char, false otherwise.
 * @todo What about nbsp or various Unicode spacing codes?
 */
static inline bool iswsch(TCHAR ch)
{
	return ch==' ' || ch=='\t';
}

/**
 * @brief Calculates statistics from given buffer.
 * This function calculates EOL byte and zero-byte statistics from given
 * buffer.
 * @param [in,out] stats Structure holding statistics.
 * @param [in] ptr Pointer to begin of the buffer.
 * @param [in] end Pointer to end of buffer.
 * @param [in] eof Is buffer end also end of file?
 * @param [in] crflag Did previous scan end to CR?
 * @param [in] offset Byte offset in whole file (among several buffers).
 */
static void TextScan(FileTextStats & stats, LPCSTR ptr, LPCSTR end, bool eof,
	bool crflag, __int64 offset)
{
	LPCSTR start = ptr; // remember for recording zero-byte offsets

	// Handle any crs left from last buffer
	if (crflag)
	{
		if (ptr < end && *ptr == '\n')
		{
			++stats.ncrlfs;
			++ptr;
		}
		else
		{
			++stats.ncrs;
		}
	}
	for ( ; ptr < end; ++ptr)
	{
		char ch = *ptr;
		if (ch == 0)
		{
			++stats.nzeros;
			__int64 index = offset + (ptr - start);
			if (stats.first_zero == -1)
				stats.first_zero = index;
			stats.last_zero = index;
		}
		else if (ch == '\r')
		{
			if (ptr+1 < end)
			{
				if (ptr[1] == '\n')
				{
					++stats.ncrlfs;
					++ptr;
				}
				else
				{
					++stats.ncrs;
				}
			}
			else if (eof)
			{
				++stats.ncrs;
			}
			else
			{
				// else last byte of buffer
				// leave alone, the CompareBuffers loop will set the appropriate m_cr flag
				// and we'll handle it next time we're called
			}
		}
		else if (ch == '\n')
		{
			++stats.nlfs;
		}
	}
}

/**
 * @brief Constructor taking compare options as parameters.
 * @param [in] ignore_case Ignore character case.
 * @param [in] ignore_space_change Ignore change in whitespace.
 * @param [in] ignore_all_space Ignore all whitespace chars.
 * @param [in] ignore_eol_diff Ignore EOL byte differences.
 * @param [in] ignore_blank_lines Ignore blank lines.
 * @note Parameters are same than diffutils options.
 */
ByteComparator::ByteComparator(int ignore_case, int ignore_space_change,
	int ignore_all_space, int ignore_eol_diff, int ignore_blank_lines)
// settings
: m_ignore_case(!!ignore_case)
, m_ignore_space_change(!!ignore_space_change)
, m_ignore_all_space(!!ignore_all_space)
, m_ignore_eol_diff(!!ignore_eol_diff)
, m_ignore_blank_lines(!!ignore_blank_lines)
// state
, m_wsflag(false)
, m_eol0(false)
, m_eol1(false)
, m_cr0(false)
, m_cr1(false)
, m_bol0(true)
, m_bol1(true)
{
}

/**
 * @brief Compare two buffers byte per byte.
 *
 * This function compares two buffers pointed to by @p ptr0 and @p ptr1.
 * Comparing takes account diffutils options flags given to constructor.
 * Buffer pointers are advanced while comparing so they point to current
 * compare position. End of buffers are given by @p end0 and @p end1, which
 * may point past last valid byte in file. Offset-params tell is how far this
 * buffer is into the file (ie, 0 the first time called).
 * @param [in,out] stats0 Statistics for first side.
 * @param [in,out] stats1 Statistics for second side.
 * @param [in,out] ptr0 Pointer to begin of the first buffer.
 * @param [in,out] ptr1 Pointer to begin of the second buffer.
 * @param [in] end0 Pointer to end of the first buffer.
 * @param [in] end1 Pointer to end of the second buffer.
 * @param [in] eof0 Is first buffers end also end of the file?
 * @param [in] eof1 Is second buffers end also end of the file?
 * @param [in] offset0 Offset of the buffer begin in the first file.
 * @param [in] offset1 Offset of the buffer begin in the second file.
 * @return COMP_RESULT telling result of the compare.
 */
ByteComparator::COMP_RESULT ByteComparator::CompareBuffers(
	FileTextStats & stats0, FileTextStats & stats1, LPCSTR &ptr0, LPCSTR &ptr1,
	LPCSTR end0, LPCSTR end1, bool eof0, bool eof1, __int64 offset0, __int64 offset1)
{
	// First, update file text statistics by doing a full scan
	// for 0s and all types of line delimiters
	TextScan(stats0, ptr0, end0, eof0, m_cr0, offset0);
	TextScan(stats1, ptr1, end1, eof1, m_cr1, offset1);

	// cycle through buffer data performing actual comparison
	while (true)
	{
		if (m_ignore_all_space)
		{
			// Skip over any whitespace on either side
			// skip over all whitespace
			while (ptr0 < end0 && iswsch(*ptr0))
			{
				m_bol0=false;
				++ptr0;
			}
			// skip over all whitespace
			while (ptr1 < end1 && iswsch(*ptr1))
			{
				m_bol1=false;
				++ptr1;
			}
			if ( (ptr0 == end0 && !eof0) || (ptr1 == end1 && !eof1) )
			{
				goto need_more;
			}
		}
		if (m_ignore_space_change)
		{
			// Skip over whitespace change
			// Also skip whitespace on one side if 
			//  either end of line or end of file on other
			
			// Handle case of whitespace on side0
			// (First four cases)
			if (ptr0 < end0 && iswsch(*ptr0))
			{
				// Whitespace on side0

				if (ptr1 < end1)
				{
					if (iswsch(*ptr1))
					{
						// whitespace on both sides
						m_wsflag = true;
						m_bol0=false;
						++ptr0;
						m_bol1=false;
						++ptr1;
					}
					else if (iseolch(*ptr1))
					{
						// whitespace on side 0 (end of line on side 1)
						m_wsflag = true;
						m_bol0=false;
						++ptr0;
					}
				}
				else // ptr1 == end1
				{
					if (!eof1)
					{
						// Whitespace on side0, don't know what is on side1
						// Cannot tell if matching whitespace yet
						goto need_more;
					}
					else // eof1
					{
						// Whitespace on side0, eof on side1
						m_wsflag = true;
						m_bol0=false;
						++ptr0;
					}
				}
			}
			else
			{
				// Handle case of whitespace on side1
				// but not whitespace on side0 (that was handled above)
				// (Remaining three cases)
				if (ptr1 < end1 && iswsch(*ptr1))
				{
					// Whitespace on side1

					if (ptr0 < end0)
					{
						// "whitespace on both sides"
						// should not come here, it should have been
						// handled above
						ASSERT(!iswsch(*ptr0));

						if (iseolch(*ptr0))
						{
							// whitespace on side 1 (eol on side 0)
							m_wsflag = true;
							m_bol1=false;
							++ptr1;
						}
					}
					else // ptr0 == end0
					{
						if (!eof0)
						{
							// Whitespace on side1, don't know what is on side0
							// Cannot tell if matching whitespace yet
							goto need_more;
						}
						else // eof0
						{
							// Whitespace on side1, eof on side0
							m_wsflag = true;
							m_bol1=false;
							++ptr1;
						}
					}
				}
			}

			if (m_wsflag)
			{
				// skip over consecutive whitespace
				while (ptr0 < end0 && iswsch(*ptr0))
				{
					m_bol0=false;
					++ptr0;
				}
				// skip over consecutive whitespace
				while (ptr1 < end1 && iswsch(*ptr1))
				{
					m_bol1=false;
					++ptr1;
				}
				if ( (ptr0 == end0 && !eof0) || (ptr1 == end1 && !eof1) )
				{
					// if run out of buffer on either side
					// must fetch more, to continue skipping whitespace
					m_wsflag = true;
					goto need_more;
				}
			}
			m_wsflag = false;
		}
		if (m_ignore_eol_diff)
		{
			if (m_ignore_blank_lines)
			{
				// skip over any line delimiters on either side
				while (ptr0 < end0 && iseolch(*ptr0))
				{
					// m_bol0 not used because m_ignore_eol_diff
					++ptr0;
				}
				while (ptr1 < end1 && iseolch(*ptr1))
				{
					// m_bol1 not used because m_ignore_eol_diff
					++ptr1;
				}
				if ( (ptr0 == end0 && !eof0) || (ptr1 == end1 && !eof1) )
				{
					goto need_more;
				}
			}
			else // don't skip blank lines, but still ignore eol difference
			{
				if (m_cr0)
				{
					// finish split CR/LF pair on 0-side
					if (ptr0 < end0 && *ptr0 == '\n')
					{
						// m_bol0 not used because m_ignore_eol_diff
						++ptr0;
					}
					m_eol0 = true;
					m_cr0 = false;
				}
				if (ptr0 < end0)
				{
					if (*ptr0 == '\n')
					{
						// m_bol0 not used because m_ignore_eol_diff
						++ptr0;
						m_eol0 = true;
					}
					else if (*ptr0 == '\r')
					{
						// m_bol0 not used because m_ignore_eol_diff
						++ptr0;
						m_eol0 = true;
						if (ptr0 == end0 && !eof0)
						{
							// can't tell if a CR/LF pair yet
							m_cr0 = true;
							m_eol0 = true;
						}
						else if (ptr0 < end0 && *ptr0 == '\n')
						{
							++ptr0;
						}
					}
					else
					{
						m_eol0 = false;
					}
				}
				if (m_cr1)
				{
					// finish split CR/LF pair on 1-side
					if (ptr1 < end1 && *ptr1 == '\n')
					{
						// m_bol1 not used because m_ignore_eol_diff
						++ptr1;
					}
					m_eol1 = true;
					m_cr1 = false;
				}
				if (ptr1 < end1)
				{
					if (*ptr1 == '\n')
					{
						// m_bol1 not used because m_ignore_eol_diff
						++ptr1;
						m_eol1 = true;
					}
					else if (*ptr1 == '\r')
					{
						// m_bol1 not used because m_ignore_eol_diff
						++ptr1;
						m_eol1 = true;
						if (ptr1 == end1 && !eof1)
						{
							// can't tell if a CR/LF pair yet
							m_cr1 = true;
							m_eol1 = true;
						}
						else if (ptr1 < end1 && *ptr1 == '\n')
						{
							++ptr1;
						}
					}
					else
					{
						m_eol1 = false;
					}
				}
				if (m_cr0 || m_cr1)
				{
					// these flags mean possible split CR/LF 
					goto need_more;
				}
				if (m_eol0 || m_eol1)
				{
					if (!m_eol0 || !m_eol1)
					{
						// one side had an end-of-line, but the other didn't
						return RESULT_DIFF;
					}
					// otherwise, both sides had end-of-line
					// pointers have already been advanced, so just continue happily
				}
			}
		}
		else
		{ // do not ignore eol differences
			if (m_ignore_blank_lines)
			{
				if (m_bol0)
				{
					while (ptr0 < end0 && iseolch(*ptr0))
					{
						++ptr0;
					}
				}
				if (m_bol1)
				{
					while (ptr1 < end1 && iseolch(*ptr1))
					{
						++ptr1;
					}
				}
				if ( (ptr0 == end0 && !eof0) || (ptr1 == end1 && !eof1) )
				{
					goto need_more;
				}
			}
		}

		if ( ptr0 == end0 || ptr1 == end1)
		{
			if ( ptr0 == end0 && ptr1 == end1)
			{
				if (!eof0 || !eof1)
					goto need_more;
				else
					return RESULT_SAME;
			}
			else
			{
				return RESULT_DIFF;
			}
		}

		TCHAR c0 = *ptr0, c1 = *ptr1;
		if (m_ignore_case)
		{
			c0 = _istupper(c0) ? _totlower(c0) : c0;
			c1 = _istupper(c1) ? _totlower(c1) : c1;
		}
		if (c0 != c1)
			return RESULT_DIFF; // buffers are different
		if (ptr0 < end0 && ptr1 < end1)
		{
			m_bol0 = iseolch(c0);
			m_bol1 = iseolch(c1);
			++ptr0;
			++ptr1;
			continue;
		}
		goto need_more;
	}

need_more:
	if (ptr0 == end0 && !eof0)
	{
		if (ptr1 == end1 && !eof1)
			return NEED_MORE_BOTH;
		else
			return NEED_MORE_0;
	}
	else if(ptr1 == end1 && !eof1)
	{
		return NEED_MORE_1;
	}
	else
	{
		return RESULT_SAME;
	}
}

About Koders | Resources | Downloads | Support | Black Duck | Terms of Service | DMCA | Privacy Policy | Contact Us