A
download DiffFileData.cpp
Language: C++
LOC: 533
Project Info
WinMerge
Server: SourceForge
Type: cvs
...erge\winmerge\WinMerge\Src\
   7zCommon.cpp
   7zCommon.h
   AboutDlg.cpp
   AboutDlg.h
   AppSerialize.cpp
   AppSerialize.h
   BCMenu.cpp
   BCMenu.h
   ByteComparator.cpp
   ByteComparator.h
   CCPrompt.cpp
   CCPrompt.h
   charsets.c
   charsets.h
   ChildFrm.cpp
   ChildFrm.h
   codepage.cpp
   codepage.h
   codepage_detect.cpp
   codepage_detect.h
   ColorButton.cpp
   ColorButton.h
   CompareOptions.h
   CompareStatisticsDlg.cpp
   CompareStatisticsDlg.h
   CompareStats.cpp
   CompareStats.h
   ConfigLog.cpp
   ConfigLog.h
   Diff.cpp
   DiffContext.cpp
   DiffContext.h
   DiffFileData.cpp
   DiffFileData.h
   DiffFileInfo.cpp
   DiffFileInfo.h
   DiffItem.cpp
   DiffItem.h
   DiffItemList.cpp
   DiffItemList.h
   DiffList.cpp
   DiffList.h
   diffmain.c
   DiffThread.cpp
   DiffThread.h
   DiffViewBar.cpp
   DiffViewBar.h
   DiffWrapper.cpp
   DiffWrapper.h
   Dir.cpp
   DirActions.cpp
   DirCmpReport.cpp
   DirCmpReport.h
   DirCmpReportDlg.cpp
   DirCmpReportDlg.h
   DirColsDlg.cpp
   DirColsDlg.h
   DirCompProgressDlg.cpp
   DirCompProgressDlg.h
   DirDoc.cpp
   DirDoc.h
   DirFrame.cpp
   DirFrame.h
   DirReportTypes.h
   DirScan.cpp
   DirScan.h
   DirView.cpp
   DirView.h
   DirViewColHandler.cpp
   DirViewColItems.cpp
   DirViewColItems.h
   dlgutil.cpp
   dlgutil.h
   dllpstub.cpp
   dllpstub.h
   dllver.cpp
   dllver.h
   EditorFilepathBar.cpp
   EditorFilepathBar.h
   Exceptions.h
   FileActionScript.cpp
   FileActionScript.h
   FileFilterHelper.cpp
   FileFilterHelper.h
   FileFilterMgr.cpp
   FileFilterMgr.h
   FileFiltersDlg.cpp
   FileFiltersDlg.h
   FileInfo.cpp
   FileInfo.h
   FileLocation.h
   FilepathEdit.cpp
   FilepathEdit.h
   files.cpp
   files.h
   FileTextEncoding.cpp
   FileTextEncoding.h
   FileTextStats.h
   FileTransform.cpp
   FileTransform.h
   GhostTextBuffer.cpp
   GhostTextBuffer.h
   GhostTextView.cpp
   GhostTextView.h
   IAbortable.h
   IntToIntMap.h
   IOptionsPanel.h
   LoadSaveCodepageDlg.cpp
   LoadSaveCodepageDlg.h
   locality.cpp
   locality.h
   LocationBar.cpp
   LocationBar.h
   LocationView.cpp
   LocationView.h
   MainFrm.cpp
   MainFrm.h
   markdown.cpp
   markdown.h
   Merge.cpp
   Merge.dsp
   Merge.h
   Merge.rc
   MergeArgs.cpp
   MergeDiffDetailView.cpp
   MergeDiffDetailView.h
   MergeDoc.cpp
   MergeDoc.h
   MergeDocEncoding.cpp
   MergeDocLineDiffs.cpp
   MergeEditStatus.h
   MergeEditView.cpp
   MergeEditView.h
   MergeLineFlags.h
   MovedBlocks.cpp
   OpenDlg.cpp
   OpenDlg.h
   OptionsDef.h
   OptionsInit.cpp
   OutputDlg.cpp
   OutputDlg.h
   PatchDlg.cpp
   PatchDlg.h
   PatchTool.cpp
   PatchTool.h
   PathContext.cpp
   PathContext.h
   paths.cpp
   paths.h
   PluginManager.cpp
   PluginManager.h
   Plugins.cpp
   Plugins.h
   ProjectFile.cpp
   ProjectFile.h
   ProjectFilePathsDlg.cpp
   ProjectFilePathsDlg.h
   PropArchive.cpp
   PropArchive.h
   PropCodepage.cpp
   PropCodepage.h
   PropColors.cpp
   PropColors.h
   PropCompare.cpp
   PropCompare.h
   PropEditor.cpp
   PropEditor.h
   PropGeneral.cpp
   PropGeneral.h
   PropLineFilter.cpp
   PropLineFilter.h
   PropRegistry.cpp
   PropRegistry.h
   PropSyntaxColors.cpp
   PropSyntaxColors.h
   PropTextColors.cpp
   PropTextColors.h
   PropVss.cpp
   PropVss.h
   resource.h
   SaveClosingDlg.cpp
   SaveClosingDlg.h
   SelectUnpackerDlg.cpp
   SelectUnpackerDlg.h
   SetResourceVersions.bat
   SetResourceVersions.wsf
   SharedFilterDlg.cpp
   SharedFilterDlg.h
   SourceControl.cpp
   Splash.cpp
   Splash.h
   ssapi.cpp
   ssapi.h
   ssauto.h
   STACK.C
   StdAfx.cpp
   StdAfx.h
   stringdiffs.cpp
   stringdiffs.h
   stringdiffsi.h
   SyntaxColors.cpp
   SyntaxColors.h
   TestFilterDlg.cpp
   TestFilterDlg.h
   ViewableWhitespace.cpp
   ViewableWhitespace.h
   VSSHelper.cpp
   VSSHelper.h
   VssPrompt.cpp
   VssPrompt.h
   WaitStatusCursor.cpp
   WaitStatusCursor.h
   winnt_supp.h
   WMGotoDlg.cpp
   WMGotoDlg.h
   XmlDoc.cpp
   XmlDoc.h

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
/** 
 * @file DiffFileData.cpp
 *
 * @brief Code for DiffFileData class
 *
 * @date  Created: 2003-08-22
 */
// RCS ID line follows -- this is updated by CVS
// $Id: DiffFileData.cpp,v 1.1 2006/08/11 15:09:42 kimmov Exp $

#include "stdafx.h"
#include "io.h"
#include "DiffItem.h"
#include "FileLocation.h"
#include "Diff.h"
#include "IAbortable.h"
#include "Paths.h"
#include "DiffContext.h"
#include "FileTransform.h"
#include "common/unicoder.h"
#include "DiffWrapper.h"
#include "ByteComparator.h"
#include "codepage_detect.h"
#include <shlwapi.h>
#include "DiffFileData.h"

static const int KILO = 1024; // Kilo(byte)

/** @brief Quick contents compare's file buffer size. */
static const int WMCMPBUFF = 32 * KILO;

static void GetComparePaths(CDiffContext * pCtxt, const DIFFITEM &di, CString & left, CString & right);
static bool Unpack(CString & filepathTransformed,
	const CString & filteredFilenames, PackingInfo * infoUnpacker);
static void CopyTextStats(const file_data * inf, FileTextStats * myTextStats);
static void CopyDiffutilTextStats(file_data *inf, DiffFileData * diffData);

static int f_defcp = 0; // default codepage


struct FileHandle
{
	FileHandle() : m_fp(0) { }
	void Assign(FILE * fp) { Close(); m_fp = fp; }
	void Close() { if (m_fp) { fclose(m_fp); m_fp = 0; } }
	~FileHandle() { Close(); }
	FILE * m_fp;
};

/**
 * @brief Simple initialization of DiffFileData
 * @note Diffcounts are initialized to invalid values, not zeros.
 */
DiffFileData::DiffFileData()
{
	m_inf = new file_data[2];
	int i=0;
	for (i=0; i<2; ++i)
		memset(&m_inf[i], 0, sizeof(m_inf[i]));
	m_used = false;
	m_ndiffs = DiffFileData::DIFFS_UNKNOWN;
	m_ntrivialdiffs = DiffFileData::DIFFS_UNKNOWN;
	Reset();
	// Set default codepages
	for (i=0; i<sizeof(m_FileLocation)/sizeof(m_FileLocation[0]); ++i)
	{
		m_FileLocation[i].encoding.SetCodepage(f_defcp);
	}
}

/** @brief deallocate member data */
DiffFileData::~DiffFileData()
{
	Reset();
	delete [] m_inf;
}

/** @brief Allow caller to specify codepage to assume for all unknown files */
void DiffFileData::SetDefaultCodepage(int defcp)
{
	f_defcp = defcp;
}

/** @brief Open file descriptors in the inf structure (return false if failure) */
bool DiffFileData::OpenFiles(LPCTSTR szFilepath1, LPCTSTR szFilepath2)
{
	m_FileLocation[0].setPath(szFilepath1);
	m_FileLocation[1].setPath(szFilepath2);
	bool b = DoOpenFiles();
	if (!b)
		Reset();
	return b;
}

/** @brief stash away true names for display, before opening files */
void DiffFileData::SetDisplayFilepaths(LPCTSTR szTrueFilepath1, LPCTSTR szTrueFilepath2)
{
	m_sDisplayFilepath[0] = szTrueFilepath1;
	m_sDisplayFilepath[1] = szTrueFilepath2;
}


/** @brief Open file descriptors in the inf structure (return false if failure) */
bool DiffFileData::DoOpenFiles()
{
	Reset();

	for (int i=0; i<2; ++i)
	{
		// Fill in 8-bit versions of names for diffutils (WinMerge doesn't use these)
		USES_CONVERSION;
		// Actual paths are m_FileLocation[i].filepath
		// but these are often temporary files
		// Displayable (original) paths are m_sDisplayFilepath[i]
		m_inf[i].name = strdup(T2CA(m_sDisplayFilepath[i]));
		if (m_inf[i].name == NULL)
			return false;

		// Open up file descriptors
		// Always use O_BINARY mode, to avoid terminating file read on ctrl-Z (DOS EOF)
		// Also, WinMerge-modified diffutils handles all three major eol styles
		if (m_inf[i].desc == 0)
		{
			m_inf[i].desc = _topen(m_FileLocation[i].filepath, O_RDONLY|O_BINARY, _S_IREAD);
		}
		if (m_inf[i].desc < 0)
			return false;

		// Get file stats (diffutils uses these)
		if (fstat(m_inf[i].desc, &m_inf[i].stat) != 0)
		{
			return false;
		}
		if (0 == m_FileLocation[1].filepath.CompareNoCase(m_FileLocation[0].filepath))
		{
			m_inf[1].desc = m_inf[0].desc;
		}
	}

	m_used = true;
	return true;
}

/** @brief Clear inf structure to pristine */
void DiffFileData::Reset()
{
	ASSERT(m_inf);
	// If diffutils put data in, have it cleanup
	if (m_used)
	{
		cleanup_file_buffers(m_inf);
		m_used = false;
	}
	// clean up any open file handles, and zero stuff out
	// open file handles might be leftover from a failure in DiffFileData::OpenFiles
	for (int i=0; i<2; ++i)
	{
		if (m_inf[1].desc == m_inf[0].desc)
		{
			m_inf[1].desc = 0;
		}
		free((void *)m_inf[i].name);
		m_inf[i].name = NULL;

		if (m_inf[i].desc > 0)
		{
			close(m_inf[i].desc);
		}
		m_inf[i].desc = 0;
		memset(&m_inf[i], 0, sizeof(m_inf[i]));
	}
}

/**
 * @brief Try to deduce encoding for this file (given copy in memory)
 */
void DiffFileData::GuessEncoding_from_buffer(FileLocation & fpenc, const char **data, int count)
{
	if (!fpenc.encoding.m_bom)
	{
		CString sExt = PathFindExtension(fpenc.filepath);
		GuessEncoding_from_bytes(sExt, data, count, &fpenc.encoding);
	}
}

/** @brief Guess encoding for one file (in DiffContext memory buffer) */
void DiffFileData::GuessEncoding_from_buffer_in_DiffContext(int side, CDiffContext * pCtxt)
{
	GuessEncoding_from_buffer(m_FileLocation[side], m_inf[side].linbuf + m_inf[side].linbuf_base, 
	                                m_inf[side].valid_lines - m_inf[side].linbuf_base);
}

/** @brief Guess encoding for one file (in DiffContext memory buffer) */
void DiffFileData::GuessEncoding_from_FileLocation(FileLocation & fpenc)
{
	if (!fpenc.encoding.m_bom)
	{
		BOOL bGuess = TRUE;
		GuessCodepageEncoding(fpenc.filepath, &fpenc.encoding, bGuess);
	}
}

/**
 * @brief Compare two specified files.
 *
 * @param [in] depth Current directory depth.
 * @return Compare result as DIFFCODE.
 */
int DiffFileData::diffutils_compare_files(int depth)
{
	int bin_flag = 0;
	int bin_file = 0; // bitmap for binary files

	// Do the actual comparison (generating a change script)
	struct change *script = NULL;
	BOOL success = Diff2Files(&script, depth, &bin_flag, FALSE, &bin_file);
	if (!success)
	{
		return DIFFCODE::FILE | DIFFCODE::TEXT | DIFFCODE::CMPERR;
	}
	int code = DIFFCODE::FILE | DIFFCODE::TEXT | DIFFCODE::SAME;

	// make sure to start counting diffs at 0
	// (usually it is -1 at this point, for unknown)
	m_ndiffs = 0;
	m_ntrivialdiffs = 0;

	// Free change script (which we don't want)
	if (script != NULL)
	{
		struct change *p,*e;
		for (e = script; e; e = p)
		{
			if (!e->trivial)
				++m_ndiffs;
			else
				++m_ntrivialdiffs;
			p = e->link;
			free (e);
		}
		if (m_ndiffs > 0)
			code = code & ~DIFFCODE::SAME | DIFFCODE::DIFF;
	}

	// diff_2_files set bin_flag to -1 if different binary
	// diff_2_files set bin_flag to +1 if same binary

	if (bin_flag != 0)
	{
		// Clear text-flag, set binary flag
		// We don't know diff counts for binary files
		code = code & ~DIFFCODE::TEXT;
		switch (bin_file)
		{
		case BINFILE_SIDE1: code |= DIFFCODE::BINSIDE1;
			break;
		case BINFILE_SIDE2: code |= DIFFCODE::BINSIDE2;
			break;
		case BINFILE_SIDE1 | BINFILE_SIDE2: code |= DIFFCODE::BIN;
			break;
		default:
			_RPTF1(_CRT_ERROR, "Invalid bin_file value: %d", bin_file);
			break;
		}
		m_ndiffs = DiffFileData::DIFFS_UNKNOWN;
	}

	if (bin_flag < 0)
	{
		// Clear same-flag, set diff-flag
		code = code & ~DIFFCODE::SAME | DIFFCODE::DIFF;
	}

	return code;
}

/** @brief detect unicode file and quess encoding */
DiffFileData::UniFileBom::UniFileBom(int fd)
{
	size = 0;
	unicoding = ucr::NONE;
	if (fd == -1) 
		return;
	long tmp = _lseek(fd, 0, SEEK_SET);
	switch (_read(fd, buffer, 3))
	{
		case 3:
			if (buffer[0] == 0xEF && buffer[1] == 0xBB && buffer[2] == 0xBF)
			{
				size = 3;
				unicoding = ucr::UTF8;
				break;
			}
			// fall through & try the 2-byte BOMs
		case 2:
			if (buffer[0] == 0xFF && buffer[1] == 0xFE)
			{
				size = 2;
				unicoding = ucr::UCS2LE;
				break;
			}
			if (buffer[0] == 0xFE && buffer[1] == 0xFF)
			{
				size = 2;
				unicoding = ucr::UCS2BE;
				break;
			}
		default:
			size = 0;
			unicoding = ucr::NONE;
	}
	_lseek(fd, tmp, SEEK_SET);
}

/**
 * @brief Invoke appropriate plugins for prediffing
 * return false if anything fails
 * caller has to DeleteFile filepathTransformed, if it differs from filepath
 */
bool DiffFileData::Filepath_Transform(FileLocation & fpenc, const CString & filepath, CString & filepathTransformed,
	const CString & filteredFilenames, PrediffingInfo * infoPrediffer, int fd)
{
	BOOL bMayOverwrite = FALSE; // temp variable set each time it is used

	// Read BOM to check for Unicode
	UniFileBom bom = fd;
	if (bom.unicoding)
		fpenc.encoding.SetUnicoding(bom.unicoding);

	if (fpenc.encoding.m_unicoding && fpenc.encoding.m_unicoding != ucr::UCS2LE)
	{
		// second step : normalize Unicode to OLECHAR (most of time, do nothing) (OLECHAR = UCS-2LE in Windows)
		bMayOverwrite = (filepathTransformed != filepath); // may overwrite if we've already copied to temp file
		if (!FileTransform_NormalizeUnicode(filepathTransformed, bMayOverwrite))
			return false;
	}

	// Note: filepathTransformed may be in UCS-2 (if toUtf8), or it may be raw encoding (if !Utf8)
	// prediff plugins must handle both

	// third step : prediff (plugins)
	bMayOverwrite = (filepathTransformed != filepath); // may overwrite if we've already copied to temp file
	if (infoPrediffer->bToBeScanned)
	{
		// FileTransform_Prediffing tries each prediffer for the pointed out filteredFilenames
		// if a prediffer fails, we consider it is not the good one, that's all
		// FileTransform_Prediffing returns FALSE only if the prediffer works, 
		// but the data can not be saved to disk (no more place ??)
		if (FileTransform_Prediffing(filepathTransformed, filteredFilenames, infoPrediffer, bMayOverwrite) 
				== FALSE)
			return false;
	}
	else
	{
		// this can failed if the pointed out prediffer has a problem
		if (FileTransform_Prediffing(filepathTransformed, *infoPrediffer, bMayOverwrite) 
				== FALSE)
			return false;
	}

	if (fpenc.encoding.m_unicoding)
	{
		// fourth step : prepare for diffing
		// may overwrite if we've already copied to temp file
		BOOL bMayOverwrite = (0 != filepathTransformed.CompareNoCase(filepath));
		if (!FileTransform_UCS2ToUTF8(filepathTransformed, bMayOverwrite))
			return false;
	}
	return true;
}

/**
 * @brief Prepare files (run plugins) & compare them, and return diffcode.
 * This is function to compare two files in folder compare. It is not used in
 * file compare.
 * @param [in] pCtxt Pointer to compare context.
 * @param [in, out] di Compared files with associated data.
 * @return Compare result code.
 */
int DiffFileData::prepAndCompareTwoFiles(CDiffContext * pCtxt, DIFFITEM &di)
{
	int nCompMethod = pCtxt->m_nCompMethod;
	CString filepath1;
	CString filepath2;
	GetComparePaths(pCtxt, di, filepath1, filepath2);

	// Reset text stats
	m_textStats0.clear();
	m_textStats1.clear();

	int code = DIFFCODE::FILE | DIFFCODE::CMPERR;
	// For user chosen plugins, define bAutomaticUnpacker as false and use the chosen infoHandler
	// but how can we receive the infoHandler ? DirScan actually only 
	// returns info, but can not use file dependent information.

	// Transformation happens here
	// text used for automatic mode : plugin filter must match it
	CString filteredFilenames = filepath1 + "|" + filepath2;

	PackingInfo * infoUnpacker=0;
	PrediffingInfo * infoPrediffer=0;

	// Get existing or new plugin infos
	pCtxt->FetchPluginInfos(filteredFilenames, &infoUnpacker, &infoPrediffer);

	// plugin may alter filepaths to temp copies (which we delete before returning in all cases)
	CString filepathUnpacked1 = filepath1;
	CString filepathUnpacked2 = filepath2;

	CString filepathTransformed1;
	CString filepathTransformed2;

	//DiffFileData diffdata; //(filepathTransformed1, filepathTransformed2);
	// Invoke unpacking plugins
	if (!Unpack(filepathUnpacked1, filteredFilenames, infoUnpacker))
	{
		di.errorDesc = _T("Unpack Error Side 1");
		goto exitPrepAndCompare;
	}

	// we use the same plugins for both files, so they must be defined before second file
	ASSERT(infoUnpacker->bToBeScanned == FALSE);

	if (!Unpack(filepathUnpacked2, filteredFilenames, infoUnpacker))
	{
		di.errorDesc = _T("Unpack Error Side 2");
		goto exitPrepAndCompare;
	}

	// As we keep handles open on unpacked files, Transform() may not delete them.
	// Unpacked files will be deleted at end of this function.
	filepathTransformed1 = filepathUnpacked1;
	filepathTransformed2 = filepathUnpacked2;
	SetDisplayFilepaths(filepath1, filepath2); // store true names for diff utils patch file
	if (!OpenFiles(filepathTransformed1, filepathTransformed2))
	{
		di.errorDesc = _T("OpenFiles Error (before tranform)");
		goto exitPrepAndCompare;
	}

	// Invoke prediff'ing plugins
	if (!Filepath_Transform(m_FileLocation[0], filepathUnpacked1, filepathTransformed1, filteredFilenames, infoPrediffer, m_inf[0].desc))
	{
		di.errorDesc = _T("Transform Error Side 1");
		goto exitPrepAndCompare;
	}

	// we use the same plugins for both files, so they must be defined before second file
	ASSERT(infoPrediffer->bToBeScanned == FALSE);

	if (!Filepath_Transform(m_FileLocation[1], filepathUnpacked2, filepathTransformed2, filteredFilenames, infoPrediffer, m_inf[1].desc))
	{
		di.errorDesc = _T("Transform Error Side 2");
		goto exitPrepAndCompare;
	}

	// If options are binary equivalent, we could check for filesize
	// difference here, and bail out if files are clearly different
	// But, then we don't know if file is ascii or binary, and this
	// affects behavior (also, we don't have an icon for unknown type)

	// Actually compare the files
	// diffutils_compare_files is a fairly thin front-end to diffutils
	if (filepathTransformed1 != filepathUnpacked1 || filepathTransformed2 != filepathUnpacked2)
	{
		//diffdata.m_sFilepath[0] = filepathTransformed1;
		//diffdata.m_sFilepath[1] = filepathTransformed2;
		if (!OpenFiles(filepathTransformed1, filepathTransformed2))
		{
			di.errorDesc = _T("OpenFiles Error (after tranform)");
			goto exitPrepAndCompare;
		}
	}

	// If either file is larger than limit compare files by quick contents
	// This allows us to (faster) compare big binary files
	if (pCtxt->m_nCompMethod == CMP_CONTENT && 
		(di.left.size > pCtxt->m_nQuickCompareLimit ||
		di.right.size > pCtxt->m_nQuickCompareLimit))
	{
		nCompMethod = CMP_QUICK_CONTENT;
	}

	if (nCompMethod == CMP_CONTENT)
	{
		// use diffutils
		code = diffutils_compare_files(0);
		// If unique item, it was being compared to itself to determine encoding
		// and the #diffs is invalid
		if (di.isSideRight() || di.isSideLeft())
		{
			m_ndiffs = DiffFileData::DIFFS_UNKNOWN;
			m_ntrivialdiffs = DiffFileData::DIFFS_UNKNOWN;
		}
		if (DIFFCODE::isResultError(code))
			di.errorDesc = _T("DiffUtils Error");

		if (!DIFFCODE::isResultError(code) && pCtxt->m_bGuessEncoding)
		{
			// entire file is in memory in the diffutils buffers
			// inside the diff context, so may as well use in-memory copy
			GuessEncoding_from_buffer_in_DiffContext(0, pCtxt);
			GuessEncoding_from_buffer_in_DiffContext(1, pCtxt);
		}
	}
	else if (nCompMethod == CMP_QUICK_CONTENT)
	{
		// use our own byte-by-byte compare
		code = byte_compare_files(pCtxt->m_bStopAfterFirstDiff, pCtxt->GetAbortable());
		// Quick contents doesn't know about diff counts
		// Set to special value to indicate invalid
		m_ndiffs = DIFFS_UNKNOWN_QUICKCOMPARE;
		m_ntrivialdiffs = DIFFS_UNKNOWN_QUICKCOMPARE;
		di.left.m_textStats = m_textStats0;
		di.right.m_textStats = m_textStats1;

		if (!DIFFCODE::isResultError(code) && pCtxt->m_bGuessEncoding)
		{
			GuessEncoding_from_FileLocation(m_FileLocation[0]);
			GuessEncoding_from_FileLocation(m_FileLocation[1]);
		}
	}
	else
	{
		// Print error since we should have handled by date compare earlier
		_RPTF0(_CRT_ERROR, "Invalid compare type, DiffFileData can't handle it");
		di.errorDesc = _T("Bad compare type");
		goto exitPrepAndCompare;
	}


exitPrepAndCompare:
	Reset();
	// delete the temp files after comparison
	if (filepathTransformed1 != filepathUnpacked1)
		VERIFY(::DeleteFile(filepathTransformed1) || gLog::DeleteFileFailed(filepathTransformed1));
	if (filepathTransformed2 != filepathUnpacked2)
		VERIFY(::DeleteFile(filepathTransformed2) || gLog::DeleteFileFailed(filepathTransformed2));
	if (filepathUnpacked1 != filepath1)
		VERIFY(::DeleteFile(filepathUnpacked1) || gLog::DeleteFileFailed(filepathUnpacked1));
	if (filepathUnpacked2 != filepath2)
		VERIFY(::DeleteFile(filepathUnpacked2) || gLog::DeleteFileFailed(filepathUnpacked2));
	return code;
}

/**
 * @brief Compare two files using diffutils.
 *
 * Compare two files (in DiffFileData param) using diffutils. Run diffutils
 * inside SEH so we can trap possible error and exceptions. If error or
 * execption is trapped, return compare failure.
 * @param [out] diffs Pointer to list of change structs where diffdata is stored.
 * @param [in] depth Depth in folder compare (we use 0).
 * @param [out] bin_status used to return binary status from compare.
 * @param [in] bMovedBlocks If TRUE moved blocks are analyzed.
 * @param [out] bin_file Returns which file was binary file as bitmap.
    So if first file is binary, first bit is set etc. Can be NULL if binary file
    info is not needed (faster compare since diffutils don't bother checking
    second file if first is binary).
 * @return TRUE when compare succeeds, FALSE if error happened during compare.
 */
BOOL DiffFileData::Diff2Files(struct change ** diffs, int depth,
	int * bin_status, BOOL bMovedBlocks, int * bin_file)
{
	BOOL bRet = TRUE;
	__try
	{
		*diffs = diff_2_files (m_inf, depth, bin_status, bMovedBlocks, bin_file);
		CopyDiffutilTextStats(m_inf, this);
	}
	__except (EXCEPTION_EXECUTE_HANDLER)
	{
		*diffs = NULL;
		bRet = FALSE;
	}
	return bRet;
}

/** 
 * @brief Compare two specified files, byte-by-byte
 * @param [in] bStopAfterFirstDiff Stop compare after we find first difference?
 * @param [in] piAbortable Interface allowing to abort compare
 * @return DIFFCODE
 */
int DiffFileData::byte_compare_files(BOOL bStopAfterFirstDiff, const IAbortable * piAbortable)
{
	// Close any descriptors open for diffutils
	Reset();

	// TODO
	// Right now, we assume files are in 8-bit encoding
	// because transform code converted any UCS-2 files to UTF-8
	// We could compare directly in UCS-2LE here, as an optimization, in that case
	char buff[2][WMCMPBUFF]; // buffered access to files
	FILE * fp[2]; // for files to compare
	FileHandle fhd[2]; // to ensure file handles fp get closed
	int i;
	int diffcode = 0;

	// Open both files
	for (i=0; i<2; ++i)
	{
		fp[i] = _tfopen(m_FileLocation[i].filepath, _T("rb"));
		if (!fp[i])
			return DIFFCODE::CMPERR;
		fhd[i].Assign(fp[i]);
	}

	// area of buffer currently holding data
	__int64 bfstart[2]; // offset into buff[i] where current data resides
	__int64 bfend[2]; // past-the-end pointer into buff[i], giving end of current data
	// buff[0] has bytes to process from buff[0][bfstart[0]] to buff[0][bfend[0]-1]

	bool eof[2]; // if we've finished file

	// initialize our buffer pointers and end of file flags
	for (i=0; i<2; ++i)
	{
		bfstart[i] = bfend[i] = 0;
		eof[i] = false;
	}

	ByteComparator comparator(ignore_case_flag, ignore_space_change_flag
		, ignore_all_space_flag, ignore_eol_diff, ignore_blank_lines_flag);

	// Begin loop
	// we handle the files in WMCMPBUFF sized buffers (variable buff[][])
	// That is, we do one buffer full at a time
	// or even less, as we process until one side buffer is empty, then reload that one
	// and continue
	while (!eof[0] || !eof[1])
	{
		if (piAbortable && piAbortable->ShouldAbort())
			return DIFFCODE::CMPABORT;

		// load or update buffers as appropriate
		for (i=0; i<2; ++i)
		{
			if (!eof[i] && bfstart[i]==countof(buff[i]))
			{
				bfstart[i]=bfend[i] = 0;
			}
			if (!eof[i] && bfend[i]<countof(buff[i])-1)
			{
				// Assume our blocks are in range of unsigned int
				unsigned int space = countof(buff[i]) - bfend[i];
				size_t rtn = fread(&buff[i][bfend[i]], 1, space, fp[i]);
				if (ferror(fp[i]))
					return DIFFCODE::CMPERR;
				if (feof(fp[i]))
					eof[i] = true;
				bfend[i] += rtn;
			}
		}

		// where to start comparing right now
		LPCSTR ptr0 = &buff[0][bfstart[0]];
		LPCSTR ptr1 = &buff[1][bfstart[1]];

		// remember where we started
		LPCSTR orig0 = ptr0, orig1 = ptr1;

		// how far can we go right now?
		LPCSTR end0 = &buff[0][bfend[0]];
		LPCSTR end1 = &buff[1][bfend[1]];

		__int64 offset0 = (ptr0 - &buff[0][0]);
		__int64 offset1 = (ptr1 - &buff[1][0]);

		// are these two buffers the same?
		if (!comparator.CompareBuffers(m_textStats0, m_textStats1, 
			ptr0, ptr1, end0, end1, eof[0], eof[1], offset0, offset1))
		{
			if (bStopAfterFirstDiff)
			{
				// By bailing out here
				// we leave our text statistics incomplete
				return diffcode | DIFFCODE::DIFF;
			}
			else
			{
				diffcode |= DIFFCODE::DIFF;
				ptr0 = end0;
				ptr1 = end1;
			}
		}
		else
		{
			ptr0 = end0;
			ptr1 = end1;
		}


		// did we finish both files?
		if (eof[0] && eof[1])
		{

			BOOL bBin0 = (m_textStats0.nzeros>0);
			BOOL bBin1 = (m_textStats1.nzeros>0);

			if (bBin0 && bBin1)
				diffcode |= DIFFCODE::BIN;
			else if (bBin0)
				diffcode |= DIFFCODE::BINSIDE1;
			else if (bBin1)
				diffcode |= DIFFCODE::BINSIDE2;

			// If either unfinished, they differ
			if (ptr0 != end0 || ptr1 != end1)
				diffcode = (diffcode & DIFFCODE::DIFF);
			
			if (diffcode & DIFFCODE::DIFF)
				return diffcode | DIFFCODE::DIFF;
			else
				return diffcode | DIFFCODE::SAME;
		}

		// move our current pointers over what we just compared
		ASSERT(ptr0 >= orig0);
		ASSERT(ptr1 >= orig1);
		bfstart[0] += ptr0-orig0;
		bfstart[1] += ptr1-orig1;
	}
	return diffcode;
}

/**
 * @brief Get actual compared paths from DIFFITEM.
 * @note If item is unique, same path is returned for both.
 */
void GetComparePaths(CDiffContext * pCtxt, const DIFFITEM &di, CString & left, CString & right)
{
	static const TCHAR backslash[] = _T("\\");

	if (!di.isSideRight())
	{
		// Compare file to itself to detect encoding
		left = pCtxt->GetNormalizedLeft();
		if (!paths_EndsWithSlash(left))
			left += backslash;
		if (!di.sLeftSubdir.IsEmpty())
			left += di.sLeftSubdir + backslash;
		left += di.sLeftFilename;
		if (di.isSideLeft())
			right = left;
	}
	if (!di.isSideLeft())
	{
		// Compare file to itself to detect encoding
		right = pCtxt->GetNormalizedRight();
		if (!paths_EndsWithSlash(right))
			right += backslash;
		if (!di.sRightSubdir.IsEmpty())
			right += di.sRightSubdir + backslash;
		right += di.sRightFilename;
		if (di.isSideRight())
			left = right;
	}
}

/**
 * @brief Invoke appropriate plugins for unpacking
 * return false if anything fails
 * caller has to DeleteFile filepathTransformed, if it differs from filepath
 */
static bool Unpack(CString & filepathTransformed,
	const CString & filteredFilenames, PackingInfo * infoUnpacker)
{
	// first step : unpack (plugins)
	if (infoUnpacker->bToBeScanned)
	{
		if (!FileTransform_Unpacking(filepathTransformed, filteredFilenames, infoUnpacker, &infoUnpacker->subcode))
			return false;
	}
	else
	{
		if (!FileTransform_Unpacking(filepathTransformed, infoUnpacker, &infoUnpacker->subcode))
			return false;
	}
	return true;
}

/**
 * @brief Copy text stat results from diffutils back into the FileTextStats structure
 */
static void CopyTextStats(const file_data * inf, FileTextStats * myTextStats)
{
	myTextStats->ncrlfs = inf->count_crlfs;
	myTextStats->ncrs = inf->count_crs;
	myTextStats->nlfs = inf->count_lfs;
}

/**
 * @brief Copy both left & right text stats results back into the DiffFileData text stats
 */
static void CopyDiffutilTextStats(file_data *inf, DiffFileData * diffData)
{
	CopyTextStats(&inf[0], &diffData->m_textStats0);
	CopyTextStats(&inf[1], &diffData->m_textStats1);
}

About Koders | Resources | Downloads | Support | Black Duck | Terms of Service | DMCA | Privacy Policy | Contact Us