diff3.py
Go to the documentation of this file.
1 # ============================================================================
2 # Copyright (C) 1988, 1989, 1992, 1993, 1994 Free Software Foundation, Inc.
3 # Copyright (c) 2011-2012 University of Pennsylvania
4 # Copyright (c) 2013-2016 Andreas Schuh
5 # All rights reserved.
6 # ============================================================================
7 
8 """
9 
10  @file diff3.py
11  @brief Three-way file comparison algorithm.
12 
13  This is a line-by-line translation of the Text::Diff3 Perl module version
14  0.10 written by MIZUTANI Tociyuki <tociyuki@gmail.com>.
15 
16  The Text::Diff3 Perl module in turn was based on the diff3 program:
17 
18  Three way file comparison program (diff3) for Project GNU.
19  Copyright (C) 1988, 1989, 1992, 1993, 1994 Free Software Foundation, Inc.
20  Written by Randy Smith
21 
22  The two-way file comparison procedure was based on the article:
23  P. Heckel. ``A technique for isolating differences between files.''
24  Communications of the ACM, Vol. 21, No. 4, page 264, April 1978.
25 
26 """
27 
28 from __future__ import unicode_literals
29 
30 from operator import xor
31 
32 
33 # ----------------------------------------------------------------------------
34 def diff3(yourtext, origtext, theirtext):
35  """Three-way diff based on the GNU diff3.c by R. Smith.
36 
37  @param [in] yourtext Array of lines of your text.
38  @param [in] origtext Array of lines of original text.
39  @param [in] theirtext Array of lines of their text.
40 
41  @returns Array of tuples containing diff results. The tuples consist of
42  (cmd, loA, hiA, loB, hiB), where cmd is either one of
43  '0', '1', '2', or 'A'.
44 
45  """
46  # diff result => [(cmd, loA, hiA, loB, hiB), ...]
47  d2 = (diff(origtext, yourtext), diff(origtext, theirtext))
48  d3 = []
49  r3 = [None, 0, 0, 0, 0, 0, 0]
50  while d2[0] or d2[1]:
51  # find a continual range in origtext lo2..hi2
52  # changed by yourtext or by theirtext.
53  #
54  # d2[0] 222 222222222
55  # origtext ...L!!!!!!!!!!!!!!!!!!!!H...
56  # d2[1] 222222 22 2222222
57  r2 = ([], [])
58  if not d2[0]: i = 1
59  else:
60  if not d2[1]: i = 0
61  else:
62  if d2[0][0][1] <= d2[1][0][1]: i = 0
63  else: i = 1
64  j = i
65  k = xor(i, 1)
66  hi = d2[j][0][2]
67  r2[j].append(d2[j].pop(0))
68  while d2[k] and d2[k][0][1] <= hi + 1:
69  hi_k = d2[k][0][2]
70  r2[k].append(d2[k].pop(0))
71  if hi < hi_k:
72  hi = hi_k
73  j = k
74  k = xor(k, 1)
75  lo2 = r2[i][ 0][1]
76  hi2 = r2[j][-1][2]
77  # take the corresponding ranges in yourtext lo0..hi0
78  # and in theirtext lo1..hi1.
79  #
80  # yourtext ..L!!!!!!!!!!!!!!!!!!!!!!!!!!!!H...
81  # d2[0] 222 222222222
82  # origtext ...00!1111!000!!00!111111...
83  # d2[1] 222222 22 2222222
84  # theirtext ...L!!!!!!!!!!!!!!!!H...
85  if r2[0]:
86  lo0 = r2[0][ 0][3] - r2[0][ 0][1] + lo2
87  hi0 = r2[0][-1][4] - r2[0][-1][2] + hi2
88  else:
89  lo0 = r3[2] - r3[6] + lo2
90  hi0 = r3[2] - r3[6] + hi2
91  if r2[1]:
92  lo1 = r2[1][ 0][3] - r2[1][ 0][1] + lo2
93  hi1 = r2[1][-1][4] - r2[1][-1][2] + hi2
94  else:
95  lo1 = r3[4] - r3[6] + lo2
96  hi1 = r3[4] - r3[6] + hi2
97  # detect type of changes
98  if not r2[0]:
99  cmd = '1'
100  elif not r2[1]:
101  cmd = '0'
102  elif hi0 - lo0 != hi1 - lo1:
103  cmd = 'A'
104  else:
105  cmd = '2'
106  for d in range(0, hi0 - lo0 + 1):
107  (i0, i1) = (lo0 + d - 1, lo1 + d - 1)
108  ok0 = (0 <= i0 and i0 < len(yourtext))
109  ok1 = (0 <= i1 and i1 < len(theirtext))
110  if xor(ok0, ok1) or (ok0 and yourtext[i0] != theirtext[i1]):
111  cmd = 'A'
112  break
113  d3.append((cmd, lo0, hi0, lo1, hi1, lo2, hi2))
114  return d3
115 
116 # ----------------------------------------------------------------------------
117 def merge(yourtext, origtext, theirtext):
118  res = {'conflict': 0, 'body': []}
119  d3 = diff3(yourtext, origtext, theirtext)
120  text3 = (yourtext, theirtext, origtext)
121  i2 = 1
122  for r3 in d3:
123  for lineno in range(i2, r3[5]):
124  res['body'].append(text3[2][lineno - 1])
125  if r3[0] == '0':
126  for lineno in range(r3[1], r3[2] + 1):
127  res['body'].append(text3[0][lineno - 1])
128  elif r3[0] != 'A':
129  for lineno in range(r3[3], r3[4] + 1):
130  res['body'].append(text3[1][lineno - 1])
131  else:
132  res = _conflict_range(text3, r3, res)
133  i2 = r3[6] + 1
134  for lineno in range(i2, len(text3[2]) + 1):
135  res['body'].append(text3[2][lineno - 1])
136  return res
137 
138 # ----------------------------------------------------------------------------
139 def _conflict_range(text3, r3, res):
140  text_a = [] # their text
141  for i in range(r3[3], r3[4] + 1):
142  text_a.append(text3[1][i - 1])
143  text_b = [] # your text
144  for i in range(r3[1], r3[2] + 1):
145  text_b.append(text3[0][i - 1])
146  d = diff(text_a, text_b)
147  if _assoc_range(d, 'c') and r3[5] <= r3[6]:
148  res['conflict'] += 1
149  res['body'].append('<<<<<<<')
150  for lineno in range(r3[1], r3[2] + 1):
151  res['body'].append(text3[0][lineno - 1])
152  res['body'].append('|||||||')
153  for lineno in range(r3[5], r3[6] + 1):
154  res['body'].append(text3[2][lineno - 1])
155  res['body'].append('=======')
156  for lineno in range(r3[3], r3[4] + 1):
157  res['body'].append(text3[1][lineno - 1])
158  res['body'].append('>>>>>>>')
159  return res
160  ia = 1
161  for r2 in d:
162  for lineno in range(ia, r2[1]):
163  res['body'].append(text_a[lineno - 1])
164  if r2[0] == 'c':
165  res['conflict'] += 1
166  res['body'].append('<<<<<<<')
167  for lineno in range(r2[3], r2[4] + 1):
168  res['body'].append(text_b[lineno - 1])
169  res['body'].append('=======')
170  for lineno in range(r2[1], r2[2] + 1):
171  res['body'].append(text_a[lineno - 1])
172  res['body'].append('>>>>>>>')
173  elif r2[0] == 'a':
174  for lineno in range(r2[3], r2[4] + 1):
175  res['body'].append(text_b[lineno - 1])
176  ia = r2[2] + 1
177  for lineno in range(ia, len(text_a)):
178  res['body'].append(text_a[lineno - 1])
179  return res
180 
181 # ----------------------------------------------------------------------------
182 def _assoc_range(diff, diff_type):
183  for d in diff:
184  if d[0] == diff_type: return d
185  return None
186 
187 # ----------------------------------------------------------------------------
188 def _diff_heckel(text_a, text_b):
189  """Two-way diff based on the algorithm by P. Heckel.
190 
191  @param [in] text_a Array of lines of first text.
192  @param [in] text_b Array of lines of second text.
193 
194  @returns TODO
195 
196  """
197  d = [];
198  uniq = [(len(text_a), len(text_b))]
199  (freq, ap, bp) = ({}, {}, {})
200  for i in range(len(text_a)):
201  s = text_a[i]
202  freq[s] = freq.get(s, 0) + 2;
203  ap [s] = i;
204  for i in range(len(text_b)):
205  s = text_b[i]
206  freq[s] = freq.get(s, 0) + 3;
207  bp [s] = i;
208  for s, x in freq.items():
209  if x == 5: uniq.append((ap[s], bp[s]))
210  (freq, ap, bp) = ({}, {}, {})
211  uniq.sort(key=lambda x: x[0])
212  (a1, b1) = (0, 0)
213  while a1 < len(text_a) and b1 < len(text_b):
214  if text_a[a1] != text_b[b1]: break
215  a1 += 1
216  b1 += 1
217  for a_uniq, b_uniq in uniq:
218  if a_uniq < a1 or b_uniq < b1: continue
219  (a0, b0) = (a1, b1)
220  (a1, b1) = (a_uniq - 1, b_uniq - 1)
221  while a0 <= a1 and b0 <= b1:
222  if text_a[a1] != text_b[b1]: break
223  a1 -= 1
224  b1 -= 1
225  if a0 <= a1 and b0 <= b1:
226  d.append(('c', a0 + 1, a1 + 1, b0 + 1, b1 + 1))
227  elif a0 <= a1:
228  d.append(('d', a0 + 1, a1 + 1, b0 + 1, b0))
229  elif b0 <= b1:
230  d.append(('a', a0 + 1, a0, b0 + 1, b1 + 1))
231  (a1, b1) = (a_uniq + 1, b_uniq + 1)
232  while a1 < len(text_a) and b1 < len(text_b):
233  if text_a[a1] != text_b[b1]: break
234  a1 += 1
235  b1 += 1
236  return d
237 
238 # ----------------------------------------------------------------------------
239 diff = _diff_heckel # default two-way diff function used by diff3()
def diff
Definition: diff3.py:239
def diff3(yourtext, origtext, theirtext)
Definition: diff3.py:34
def merge(yourtext, origtext, theirtext)
Definition: diff3.py:117