Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/diff_match_patch/diff_match_patch.py: 4%

972 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1#!/usr/bin/python3 

2 

3"""Diff Match and Patch 

4Copyright 2018 The diff-match-patch Authors. 

5https://github.com/google/diff-match-patch 

6 

7Licensed under the Apache License, Version 2.0 (the "License"); 

8you may not use this file except in compliance with the License. 

9You may obtain a copy of the License at 

10 

11 http://www.apache.org/licenses/LICENSE-2.0 

12 

13Unless required by applicable law or agreed to in writing, software 

14distributed under the License is distributed on an "AS IS" BASIS, 

15WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

16See the License for the specific language governing permissions and 

17limitations under the License. 

18""" 

19 

20"""Functions for diff, match and patch. 

21 

22Computes the difference between two texts to create a patch. 

23Applies the patch onto another text, allowing for errors. 

24""" 

25 

26__author__ = "fraser@google.com (Neil Fraser)" 

27 

28import re 

29import sys 

30import time 

31import urllib.parse 

32 

33 

34class diff_match_patch: 

35 """Class containing the diff, match and patch methods. 

36 

37 Also contains the behaviour settings. 

38 """ 

39 

40 def __init__(self): 

41 """Inits a diff_match_patch object with default settings. 

42 Redefine these in your program to override the defaults. 

43 """ 

44 

45 # Number of seconds to map a diff before giving up (0 for infinity). 

46 self.Diff_Timeout = 1.0 

47 # Cost of an empty edit operation in terms of edit characters. 

48 self.Diff_EditCost = 4 

49 # At what point is no match declared (0.0 = perfection, 1.0 = very loose). 

50 self.Match_Threshold = 0.5 

51 # How far to search for a match (0 = exact location, 1000+ = broad match). 

52 # A match this many characters away from the expected location will add 

53 # 1.0 to the score (0.0 is a perfect match). 

54 self.Match_Distance = 1000 

55 # When deleting a large block of text (over ~64 characters), how close do 

56 # the contents have to be to match the expected contents. (0.0 = perfection, 

57 # 1.0 = very loose). Note that Match_Threshold controls how closely the 

58 # end points of a delete need to match. 

59 self.Patch_DeleteThreshold = 0.5 

60 # Chunk size for context length. 

61 self.Patch_Margin = 4 

62 

63 # The number of bits in an int. 

64 # Python has no maximum, thus to disable patch splitting set to 0. 

65 # However to avoid long patches in certain pathological cases, use 32. 

66 # Multiple short patches (using native ints) are much faster than long ones. 

67 self.Match_MaxBits = 32 

68 

69 # DIFF FUNCTIONS 

70 

71 # The data structure representing a diff is an array of tuples: 

72 # [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")] 

73 # which means: delete "Hello", add "Goodbye" and keep " world." 

74 DIFF_DELETE = -1 

75 DIFF_INSERT = 1 

76 DIFF_EQUAL = 0 

77 

78 def diff_main(self, text1, text2, checklines=True, deadline=None): 

79 """Find the differences between two texts. Simplifies the problem by 

80 stripping any common prefix or suffix off the texts before diffing. 

81 

82 Args: 

83 text1: Old string to be diffed. 

84 text2: New string to be diffed. 

85 checklines: Optional speedup flag. If present and false, then don't run 

86 a line-level diff first to identify the changed areas. 

87 Defaults to true, which does a faster, slightly less optimal diff. 

88 deadline: Optional time when the diff should be complete by. Used 

89 internally for recursive calls. Users should set DiffTimeout instead. 

90 

91 Returns: 

92 Array of changes. 

93 """ 

94 # Set a deadline by which time the diff must be complete. 

95 if deadline == None: 

96 # Unlike in most languages, Python counts time in seconds. 

97 if self.Diff_Timeout <= 0: 

98 deadline = sys.maxsize 

99 else: 

100 deadline = time.time() + self.Diff_Timeout 

101 

102 # Check for null inputs. 

103 if text1 == None or text2 == None: 

104 raise ValueError("Null inputs. (diff_main)") 

105 

106 # Check for equality (speedup). 

107 if text1 == text2: 

108 if text1: 

109 return [(self.DIFF_EQUAL, text1)] 

110 return [] 

111 

112 # Trim off common prefix (speedup). 

113 commonlength = self.diff_commonPrefix(text1, text2) 

114 commonprefix = text1[:commonlength] 

115 text1 = text1[commonlength:] 

116 text2 = text2[commonlength:] 

117 

118 # Trim off common suffix (speedup). 

119 commonlength = self.diff_commonSuffix(text1, text2) 

120 if commonlength == 0: 

121 commonsuffix = "" 

122 else: 

123 commonsuffix = text1[-commonlength:] 

124 text1 = text1[:-commonlength] 

125 text2 = text2[:-commonlength] 

126 

127 # Compute the diff on the middle block. 

128 diffs = self.diff_compute(text1, text2, checklines, deadline) 

129 

130 # Restore the prefix and suffix. 

131 if commonprefix: 

132 diffs[:0] = [(self.DIFF_EQUAL, commonprefix)] 

133 if commonsuffix: 

134 diffs.append((self.DIFF_EQUAL, commonsuffix)) 

135 self.diff_cleanupMerge(diffs) 

136 return diffs 

137 

138 def diff_compute(self, text1, text2, checklines, deadline): 

139 """Find the differences between two texts. Assumes that the texts do not 

140 have any common prefix or suffix. 

141 

142 Args: 

143 text1: Old string to be diffed. 

144 text2: New string to be diffed. 

145 checklines: Speedup flag. If false, then don't run a line-level diff 

146 first to identify the changed areas. 

147 If true, then run a faster, slightly less optimal diff. 

148 deadline: Time when the diff should be complete by. 

149 

150 Returns: 

151 Array of changes. 

152 """ 

153 if not text1: 

154 # Just add some text (speedup). 

155 return [(self.DIFF_INSERT, text2)] 

156 

157 if not text2: 

158 # Just delete some text (speedup). 

159 return [(self.DIFF_DELETE, text1)] 

160 

161 if len(text1) > len(text2): 

162 (longtext, shorttext) = (text1, text2) 

163 else: 

164 (shorttext, longtext) = (text1, text2) 

165 i = longtext.find(shorttext) 

166 if i != -1: 

167 # Shorter text is inside the longer text (speedup). 

168 diffs = [ 

169 (self.DIFF_INSERT, longtext[:i]), 

170 (self.DIFF_EQUAL, shorttext), 

171 (self.DIFF_INSERT, longtext[i + len(shorttext) :]), 

172 ] 

173 # Swap insertions for deletions if diff is reversed. 

174 if len(text1) > len(text2): 

175 diffs[0] = (self.DIFF_DELETE, diffs[0][1]) 

176 diffs[2] = (self.DIFF_DELETE, diffs[2][1]) 

177 return diffs 

178 

179 if len(shorttext) == 1: 

180 # Single character string. 

181 # After the previous speedup, the character can't be an equality. 

182 return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)] 

183 

184 # Check to see if the problem can be split in two. 

185 hm = self.diff_halfMatch(text1, text2) 

186 if hm: 

187 # A half-match was found, sort out the return data. 

188 (text1_a, text1_b, text2_a, text2_b, mid_common) = hm 

189 # Send both pairs off for separate processing. 

190 diffs_a = self.diff_main(text1_a, text2_a, checklines, deadline) 

191 diffs_b = self.diff_main(text1_b, text2_b, checklines, deadline) 

192 # Merge the results. 

193 return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b 

194 

195 if checklines and len(text1) > 100 and len(text2) > 100: 

196 return self.diff_lineMode(text1, text2, deadline) 

197 

198 return self.diff_bisect(text1, text2, deadline) 

199 

200 def diff_lineMode(self, text1, text2, deadline): 

201 """Do a quick line-level diff on both strings, then rediff the parts for 

202 greater accuracy. 

203 This speedup can produce non-minimal diffs. 

204 

205 Args: 

206 text1: Old string to be diffed. 

207 text2: New string to be diffed. 

208 deadline: Time when the diff should be complete by. 

209 

210 Returns: 

211 Array of changes. 

212 """ 

213 

214 # Scan the text on a line-by-line basis first. 

215 (text1, text2, linearray) = self.diff_linesToChars(text1, text2) 

216 

217 diffs = self.diff_main(text1, text2, False, deadline) 

218 

219 # Convert the diff back to original text. 

220 self.diff_charsToLines(diffs, linearray) 

221 # Eliminate freak matches (e.g. blank lines) 

222 self.diff_cleanupSemantic(diffs) 

223 

224 # Rediff any replacement blocks, this time character-by-character. 

225 # Add a dummy entry at the end. 

226 diffs.append((self.DIFF_EQUAL, "")) 

227 pointer = 0 

228 count_delete = 0 

229 count_insert = 0 

230 text_delete = "" 

231 text_insert = "" 

232 while pointer < len(diffs): 

233 if diffs[pointer][0] == self.DIFF_INSERT: 

234 count_insert += 1 

235 text_insert += diffs[pointer][1] 

236 elif diffs[pointer][0] == self.DIFF_DELETE: 

237 count_delete += 1 

238 text_delete += diffs[pointer][1] 

239 elif diffs[pointer][0] == self.DIFF_EQUAL: 

240 # Upon reaching an equality, check for prior redundancies. 

241 if count_delete >= 1 and count_insert >= 1: 

242 # Delete the offending records and add the merged ones. 

243 subDiff = self.diff_main(text_delete, text_insert, False, deadline) 

244 diffs[pointer - count_delete - count_insert : pointer] = subDiff 

245 pointer = pointer - count_delete - count_insert + len(subDiff) 

246 count_insert = 0 

247 count_delete = 0 

248 text_delete = "" 

249 text_insert = "" 

250 

251 pointer += 1 

252 

253 diffs.pop() # Remove the dummy entry at the end. 

254 

255 return diffs 

256 

257 def diff_bisect(self, text1, text2, deadline): 

258 """Find the 'middle snake' of a diff, split the problem in two 

259 and return the recursively constructed diff. 

260 See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. 

261 

262 Args: 

263 text1: Old string to be diffed. 

264 text2: New string to be diffed. 

265 deadline: Time at which to bail if not yet complete. 

266 

267 Returns: 

268 Array of diff tuples. 

269 """ 

270 

271 # Cache the text lengths to prevent multiple calls. 

272 text1_length = len(text1) 

273 text2_length = len(text2) 

274 max_d = (text1_length + text2_length + 1) // 2 

275 v_offset = max_d 

276 v_length = 2 * max_d 

277 v1 = [-1] * v_length 

278 v1[v_offset + 1] = 0 

279 v2 = v1[:] 

280 delta = text1_length - text2_length 

281 # If the total number of characters is odd, then the front path will 

282 # collide with the reverse path. 

283 front = delta % 2 != 0 

284 # Offsets for start and end of k loop. 

285 # Prevents mapping of space beyond the grid. 

286 k1start = 0 

287 k1end = 0 

288 k2start = 0 

289 k2end = 0 

290 for d in range(max_d): 

291 # Bail out if deadline is reached. 

292 if time.time() > deadline: 

293 break 

294 

295 # Walk the front path one step. 

296 for k1 in range(-d + k1start, d + 1 - k1end, 2): 

297 k1_offset = v_offset + k1 

298 if k1 == -d or (k1 != d and v1[k1_offset - 1] < v1[k1_offset + 1]): 

299 x1 = v1[k1_offset + 1] 

300 else: 

301 x1 = v1[k1_offset - 1] + 1 

302 y1 = x1 - k1 

303 while ( 

304 x1 < text1_length and y1 < text2_length and text1[x1] == text2[y1] 

305 ): 

306 x1 += 1 

307 y1 += 1 

308 v1[k1_offset] = x1 

309 if x1 > text1_length: 

310 # Ran off the right of the graph. 

311 k1end += 2 

312 elif y1 > text2_length: 

313 # Ran off the bottom of the graph. 

314 k1start += 2 

315 elif front: 

316 k2_offset = v_offset + delta - k1 

317 if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] != -1: 

318 # Mirror x2 onto top-left coordinate system. 

319 x2 = text1_length - v2[k2_offset] 

320 if x1 >= x2: 

321 # Overlap detected. 

322 return self.diff_bisectSplit(text1, text2, x1, y1, deadline) 

323 

324 # Walk the reverse path one step. 

325 for k2 in range(-d + k2start, d + 1 - k2end, 2): 

326 k2_offset = v_offset + k2 

327 if k2 == -d or (k2 != d and v2[k2_offset - 1] < v2[k2_offset + 1]): 

328 x2 = v2[k2_offset + 1] 

329 else: 

330 x2 = v2[k2_offset - 1] + 1 

331 y2 = x2 - k2 

332 while ( 

333 x2 < text1_length 

334 and y2 < text2_length 

335 and text1[-x2 - 1] == text2[-y2 - 1] 

336 ): 

337 x2 += 1 

338 y2 += 1 

339 v2[k2_offset] = x2 

340 if x2 > text1_length: 

341 # Ran off the left of the graph. 

342 k2end += 2 

343 elif y2 > text2_length: 

344 # Ran off the top of the graph. 

345 k2start += 2 

346 elif not front: 

347 k1_offset = v_offset + delta - k2 

348 if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] != -1: 

349 x1 = v1[k1_offset] 

350 y1 = v_offset + x1 - k1_offset 

351 # Mirror x2 onto top-left coordinate system. 

352 x2 = text1_length - x2 

353 if x1 >= x2: 

354 # Overlap detected. 

355 return self.diff_bisectSplit(text1, text2, x1, y1, deadline) 

356 

357 # Diff took too long and hit the deadline or 

358 # number of diffs equals number of characters, no commonality at all. 

359 return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)] 

360 

361 def diff_bisectSplit(self, text1, text2, x, y, deadline): 

362 """Given the location of the 'middle snake', split the diff in two parts 

363 and recurse. 

364 

365 Args: 

366 text1: Old string to be diffed. 

367 text2: New string to be diffed. 

368 x: Index of split point in text1. 

369 y: Index of split point in text2. 

370 deadline: Time at which to bail if not yet complete. 

371 

372 Returns: 

373 Array of diff tuples. 

374 """ 

375 text1a = text1[:x] 

376 text2a = text2[:y] 

377 text1b = text1[x:] 

378 text2b = text2[y:] 

379 

380 # Compute both diffs serially. 

381 diffs = self.diff_main(text1a, text2a, False, deadline) 

382 diffsb = self.diff_main(text1b, text2b, False, deadline) 

383 

384 return diffs + diffsb 

385 

386 def diff_linesToChars(self, text1, text2): 

387 """Split two texts into an array of strings. Reduce the texts to a string 

388 of hashes where each Unicode character represents one line. 

389 

390 Args: 

391 text1: First string. 

392 text2: Second string. 

393 

394 Returns: 

395 Three element tuple, containing the encoded text1, the encoded text2 and 

396 the array of unique strings. The zeroth element of the array of unique 

397 strings is intentionally blank. 

398 """ 

399 lineArray = [] # e.g. lineArray[4] == "Hello\n" 

400 lineHash = {} # e.g. lineHash["Hello\n"] == 4 

401 

402 # "\x00" is a valid character, but various debuggers don't like it. 

403 # So we'll insert a junk entry to avoid generating a null character. 

404 lineArray.append("") 

405 

406 def diff_linesToCharsMunge(text): 

407 """Split a text into an array of strings. Reduce the texts to a string 

408 of hashes where each Unicode character represents one line. 

409 Modifies linearray and linehash through being a closure. 

410 

411 Args: 

412 text: String to encode. 

413 

414 Returns: 

415 Encoded string. 

416 """ 

417 chars = [] 

418 # Walk the text, pulling out a substring for each line. 

419 # text.split('\n') would would temporarily double our memory footprint. 

420 # Modifying text would create many large strings to garbage collect. 

421 lineStart = 0 

422 lineEnd = -1 

423 while lineEnd < len(text) - 1: 

424 lineEnd = text.find("\n", lineStart) 

425 if lineEnd == -1: 

426 lineEnd = len(text) - 1 

427 line = text[lineStart : lineEnd + 1] 

428 

429 if line in lineHash: 

430 chars.append(chr(lineHash[line])) 

431 else: 

432 if len(lineArray) == maxLines: 

433 # Bail out at 1114111 because chr(1114112) throws. 

434 line = text[lineStart:] 

435 lineEnd = len(text) 

436 lineArray.append(line) 

437 lineHash[line] = len(lineArray) - 1 

438 chars.append(chr(len(lineArray) - 1)) 

439 lineStart = lineEnd + 1 

440 return "".join(chars) 

441 

442 # Allocate 2/3rds of the space for text1, the rest for text2. 

443 maxLines = 666666 

444 chars1 = diff_linesToCharsMunge(text1) 

445 maxLines = 1114111 

446 chars2 = diff_linesToCharsMunge(text2) 

447 return (chars1, chars2, lineArray) 

448 

449 def diff_charsToLines(self, diffs, lineArray): 

450 """Rehydrate the text in a diff from a string of line hashes to real lines 

451 of text. 

452 

453 Args: 

454 diffs: Array of diff tuples. 

455 lineArray: Array of unique strings. 

456 """ 

457 for i in range(len(diffs)): 

458 text = [] 

459 for char in diffs[i][1]: 

460 text.append(lineArray[ord(char)]) 

461 diffs[i] = (diffs[i][0], "".join(text)) 

462 

463 def diff_commonPrefix(self, text1, text2): 

464 """Determine the common prefix of two strings. 

465 

466 Args: 

467 text1: First string. 

468 text2: Second string. 

469 

470 Returns: 

471 The number of characters common to the start of each string. 

472 """ 

473 # Quick check for common null cases. 

474 if not text1 or not text2 or text1[0] != text2[0]: 

475 return 0 

476 # Binary search. 

477 # Performance analysis: https://neil.fraser.name/news/2007/10/09/ 

478 pointermin = 0 

479 pointermax = min(len(text1), len(text2)) 

480 pointermid = pointermax 

481 pointerstart = 0 

482 while pointermin < pointermid: 

483 if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]: 

484 pointermin = pointermid 

485 pointerstart = pointermin 

486 else: 

487 pointermax = pointermid 

488 pointermid = (pointermax - pointermin) // 2 + pointermin 

489 return pointermid 

490 

491 def diff_commonSuffix(self, text1, text2): 

492 """Determine the common suffix of two strings. 

493 

494 Args: 

495 text1: First string. 

496 text2: Second string. 

497 

498 Returns: 

499 The number of characters common to the end of each string. 

500 """ 

501 # Quick check for common null cases. 

502 if not text1 or not text2 or text1[-1] != text2[-1]: 

503 return 0 

504 # Binary search. 

505 # Performance analysis: https://neil.fraser.name/news/2007/10/09/ 

506 pointermin = 0 

507 pointermax = min(len(text1), len(text2)) 

508 pointermid = pointermax 

509 pointerend = 0 

510 while pointermin < pointermid: 

511 if ( 

512 text1[-pointermid : len(text1) - pointerend] 

513 == text2[-pointermid : len(text2) - pointerend] 

514 ): 

515 pointermin = pointermid 

516 pointerend = pointermin 

517 else: 

518 pointermax = pointermid 

519 pointermid = (pointermax - pointermin) // 2 + pointermin 

520 return pointermid 

521 

522 def diff_commonOverlap(self, text1, text2): 

523 """Determine if the suffix of one string is the prefix of another. 

524 

525 Args: 

526 text1 First string. 

527 text2 Second string. 

528 

529 Returns: 

530 The number of characters common to the end of the first 

531 string and the start of the second string. 

532 """ 

533 # Cache the text lengths to prevent multiple calls. 

534 text1_length = len(text1) 

535 text2_length = len(text2) 

536 # Eliminate the null case. 

537 if text1_length == 0 or text2_length == 0: 

538 return 0 

539 # Truncate the longer string. 

540 if text1_length > text2_length: 

541 text1 = text1[-text2_length:] 

542 elif text1_length < text2_length: 

543 text2 = text2[:text1_length] 

544 text_length = min(text1_length, text2_length) 

545 # Quick check for the worst case. 

546 if text1 == text2: 

547 return text_length 

548 

549 # Start by looking for a single character match 

550 # and increase length until no match is found. 

551 # Performance analysis: https://neil.fraser.name/news/2010/11/04/ 

552 best = 0 

553 length = 1 

554 while True: 

555 pattern = text1[-length:] 

556 found = text2.find(pattern) 

557 if found == -1: 

558 return best 

559 length += found 

560 if found == 0 or text1[-length:] == text2[:length]: 

561 best = length 

562 length += 1 

563 

564 def diff_halfMatch(self, text1, text2): 

565 """Do the two texts share a substring which is at least half the length of 

566 the longer text? 

567 This speedup can produce non-minimal diffs. 

568 

569 Args: 

570 text1: First string. 

571 text2: Second string. 

572 

573 Returns: 

574 Five element Array, containing the prefix of text1, the suffix of text1, 

575 the prefix of text2, the suffix of text2 and the common middle. Or None 

576 if there was no match. 

577 """ 

578 if self.Diff_Timeout <= 0: 

579 # Don't risk returning a non-optimal diff if we have unlimited time. 

580 return None 

581 if len(text1) > len(text2): 

582 (longtext, shorttext) = (text1, text2) 

583 else: 

584 (shorttext, longtext) = (text1, text2) 

585 if len(longtext) < 4 or len(shorttext) * 2 < len(longtext): 

586 return None # Pointless. 

587 

588 def diff_halfMatchI(longtext, shorttext, i): 

589 """Does a substring of shorttext exist within longtext such that the 

590 substring is at least half the length of longtext? 

591 Closure, but does not reference any external variables. 

592 

593 Args: 

594 longtext: Longer string. 

595 shorttext: Shorter string. 

596 i: Start index of quarter length substring within longtext. 

597 

598 Returns: 

599 Five element Array, containing the prefix of longtext, the suffix of 

600 longtext, the prefix of shorttext, the suffix of shorttext and the 

601 common middle. Or None if there was no match. 

602 """ 

603 seed = longtext[i : i + len(longtext) // 4] 

604 best_common = "" 

605 j = shorttext.find(seed) 

606 while j != -1: 

607 prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:]) 

608 suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j]) 

609 if len(best_common) < suffixLength + prefixLength: 

610 best_common = ( 

611 shorttext[j - suffixLength : j] 

612 + shorttext[j : j + prefixLength] 

613 ) 

614 best_longtext_a = longtext[: i - suffixLength] 

615 best_longtext_b = longtext[i + prefixLength :] 

616 best_shorttext_a = shorttext[: j - suffixLength] 

617 best_shorttext_b = shorttext[j + prefixLength :] 

618 j = shorttext.find(seed, j + 1) 

619 

620 if len(best_common) * 2 >= len(longtext): 

621 return ( 

622 best_longtext_a, 

623 best_longtext_b, 

624 best_shorttext_a, 

625 best_shorttext_b, 

626 best_common, 

627 ) 

628 else: 

629 return None 

630 

631 # First check if the second quarter is the seed for a half-match. 

632 hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) // 4) 

633 # Check again based on the third quarter. 

634 hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) // 2) 

635 if not hm1 and not hm2: 

636 return None 

637 elif not hm2: 

638 hm = hm1 

639 elif not hm1: 

640 hm = hm2 

641 else: 

642 # Both matched. Select the longest. 

643 if len(hm1[4]) > len(hm2[4]): 

644 hm = hm1 

645 else: 

646 hm = hm2 

647 

648 # A half-match was found, sort out the return data. 

649 if len(text1) > len(text2): 

650 (text1_a, text1_b, text2_a, text2_b, mid_common) = hm 

651 else: 

652 (text2_a, text2_b, text1_a, text1_b, mid_common) = hm 

653 return (text1_a, text1_b, text2_a, text2_b, mid_common) 

654 

655 def diff_cleanupSemantic(self, diffs): 

656 """Reduce the number of edits by eliminating semantically trivial 

657 equalities. 

658 

659 Args: 

660 diffs: Array of diff tuples. 

661 """ 

662 changes = False 

663 equalities = [] # Stack of indices where equalities are found. 

664 lastEquality = None # Always equal to diffs[equalities[-1]][1] 

665 pointer = 0 # Index of current position. 

666 # Number of chars that changed prior to the equality. 

667 length_insertions1, length_deletions1 = 0, 0 

668 # Number of chars that changed after the equality. 

669 length_insertions2, length_deletions2 = 0, 0 

670 while pointer < len(diffs): 

671 if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found. 

672 equalities.append(pointer) 

673 length_insertions1, length_insertions2 = length_insertions2, 0 

674 length_deletions1, length_deletions2 = length_deletions2, 0 

675 lastEquality = diffs[pointer][1] 

676 else: # An insertion or deletion. 

677 if diffs[pointer][0] == self.DIFF_INSERT: 

678 length_insertions2 += len(diffs[pointer][1]) 

679 else: 

680 length_deletions2 += len(diffs[pointer][1]) 

681 # Eliminate an equality that is smaller or equal to the edits on both 

682 # sides of it. 

683 if ( 

684 lastEquality 

685 and ( 

686 len(lastEquality) <= max(length_insertions1, length_deletions1) 

687 ) 

688 and ( 

689 len(lastEquality) <= max(length_insertions2, length_deletions2) 

690 ) 

691 ): 

692 # Duplicate record. 

693 diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality)) 

694 # Change second copy to insert. 

695 diffs[equalities[-1] + 1] = ( 

696 self.DIFF_INSERT, 

697 diffs[equalities[-1] + 1][1], 

698 ) 

699 # Throw away the equality we just deleted. 

700 equalities.pop() 

701 # Throw away the previous equality (it needs to be reevaluated). 

702 if len(equalities): 

703 equalities.pop() 

704 if len(equalities): 

705 pointer = equalities[-1] 

706 else: 

707 pointer = -1 

708 # Reset the counters. 

709 length_insertions1, length_deletions1 = 0, 0 

710 length_insertions2, length_deletions2 = 0, 0 

711 lastEquality = None 

712 changes = True 

713 pointer += 1 

714 

715 # Normalize the diff. 

716 if changes: 

717 self.diff_cleanupMerge(diffs) 

718 self.diff_cleanupSemanticLossless(diffs) 

719 

720 # Find any overlaps between deletions and insertions. 

721 # e.g: <del>abcxxx</del><ins>xxxdef</ins> 

722 # -> <del>abc</del>xxx<ins>def</ins> 

723 # e.g: <del>xxxabc</del><ins>defxxx</ins> 

724 # -> <ins>def</ins>xxx<del>abc</del> 

725 # Only extract an overlap if it is as big as the edit ahead or behind it. 

726 pointer = 1 

727 while pointer < len(diffs): 

728 if ( 

729 diffs[pointer - 1][0] == self.DIFF_DELETE 

730 and diffs[pointer][0] == self.DIFF_INSERT 

731 ): 

732 deletion = diffs[pointer - 1][1] 

733 insertion = diffs[pointer][1] 

734 overlap_length1 = self.diff_commonOverlap(deletion, insertion) 

735 overlap_length2 = self.diff_commonOverlap(insertion, deletion) 

736 if overlap_length1 >= overlap_length2: 

737 if ( 

738 overlap_length1 >= len(deletion) / 2.0 

739 or overlap_length1 >= len(insertion) / 2.0 

740 ): 

741 # Overlap found. Insert an equality and trim the surrounding edits. 

742 diffs.insert( 

743 pointer, (self.DIFF_EQUAL, insertion[:overlap_length1]) 

744 ) 

745 diffs[pointer - 1] = ( 

746 self.DIFF_DELETE, 

747 deletion[: len(deletion) - overlap_length1], 

748 ) 

749 diffs[pointer + 1] = ( 

750 self.DIFF_INSERT, 

751 insertion[overlap_length1:], 

752 ) 

753 pointer += 1 

754 else: 

755 if ( 

756 overlap_length2 >= len(deletion) / 2.0 

757 or overlap_length2 >= len(insertion) / 2.0 

758 ): 

759 # Reverse overlap found. 

760 # Insert an equality and swap and trim the surrounding edits. 

761 diffs.insert( 

762 pointer, (self.DIFF_EQUAL, deletion[:overlap_length2]) 

763 ) 

764 diffs[pointer - 1] = ( 

765 self.DIFF_INSERT, 

766 insertion[: len(insertion) - overlap_length2], 

767 ) 

768 diffs[pointer + 1] = ( 

769 self.DIFF_DELETE, 

770 deletion[overlap_length2:], 

771 ) 

772 pointer += 1 

773 pointer += 1 

774 pointer += 1 

775 

776 def diff_cleanupSemanticLossless(self, diffs): 

777 """Look for single edits surrounded on both sides by equalities 

778 which can be shifted sideways to align the edit to a word boundary. 

779 e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came. 

780 

781 Args: 

782 diffs: Array of diff tuples. 

783 """ 

784 

785 def diff_cleanupSemanticScore(one, two): 

786 """Given two strings, compute a score representing whether the 

787 internal boundary falls on logical boundaries. 

788 Scores range from 6 (best) to 0 (worst). 

789 Closure, but does not reference any external variables. 

790 

791 Args: 

792 one: First string. 

793 two: Second string. 

794 

795 Returns: 

796 The score. 

797 """ 

798 if not one or not two: 

799 # Edges are the best. 

800 return 6 

801 

802 # Each port of this function behaves slightly differently due to 

803 # subtle differences in each language's definition of things like 

804 # 'whitespace'. Since this function's purpose is largely cosmetic, 

805 # the choice has been made to use each language's native features 

806 # rather than force total conformity. 

807 char1 = one[-1] 

808 char2 = two[0] 

809 nonAlphaNumeric1 = not char1.isalnum() 

810 nonAlphaNumeric2 = not char2.isalnum() 

811 whitespace1 = nonAlphaNumeric1 and char1.isspace() 

812 whitespace2 = nonAlphaNumeric2 and char2.isspace() 

813 lineBreak1 = whitespace1 and (char1 == "\r" or char1 == "\n") 

814 lineBreak2 = whitespace2 and (char2 == "\r" or char2 == "\n") 

815 blankLine1 = lineBreak1 and self.BLANKLINEEND.search(one) 

816 blankLine2 = lineBreak2 and self.BLANKLINESTART.match(two) 

817 

818 if blankLine1 or blankLine2: 

819 # Five points for blank lines. 

820 return 5 

821 elif lineBreak1 or lineBreak2: 

822 # Four points for line breaks. 

823 return 4 

824 elif nonAlphaNumeric1 and not whitespace1 and whitespace2: 

825 # Three points for end of sentences. 

826 return 3 

827 elif whitespace1 or whitespace2: 

828 # Two points for whitespace. 

829 return 2 

830 elif nonAlphaNumeric1 or nonAlphaNumeric2: 

831 # One point for non-alphanumeric. 

832 return 1 

833 return 0 

834 

835 pointer = 1 

836 # Intentionally ignore the first and last element (don't need checking). 

837 while pointer < len(diffs) - 1: 

838 if ( 

839 diffs[pointer - 1][0] == self.DIFF_EQUAL 

840 and diffs[pointer + 1][0] == self.DIFF_EQUAL 

841 ): 

842 # This is a single edit surrounded by equalities. 

843 equality1 = diffs[pointer - 1][1] 

844 edit = diffs[pointer][1] 

845 equality2 = diffs[pointer + 1][1] 

846 

847 # First, shift the edit as far left as possible. 

848 commonOffset = self.diff_commonSuffix(equality1, edit) 

849 if commonOffset: 

850 commonString = edit[-commonOffset:] 

851 equality1 = equality1[:-commonOffset] 

852 edit = commonString + edit[:-commonOffset] 

853 equality2 = commonString + equality2 

854 

855 # Second, step character by character right, looking for the best fit. 

856 bestEquality1 = equality1 

857 bestEdit = edit 

858 bestEquality2 = equality2 

859 bestScore = diff_cleanupSemanticScore( 

860 equality1, edit 

861 ) + diff_cleanupSemanticScore(edit, equality2) 

862 while edit and equality2 and edit[0] == equality2[0]: 

863 equality1 += edit[0] 

864 edit = edit[1:] + equality2[0] 

865 equality2 = equality2[1:] 

866 score = diff_cleanupSemanticScore( 

867 equality1, edit 

868 ) + diff_cleanupSemanticScore(edit, equality2) 

869 # The >= encourages trailing rather than leading whitespace on edits. 

870 if score >= bestScore: 

871 bestScore = score 

872 bestEquality1 = equality1 

873 bestEdit = edit 

874 bestEquality2 = equality2 

875 

876 if diffs[pointer - 1][1] != bestEquality1: 

877 # We have an improvement, save it back to the diff. 

878 if bestEquality1: 

879 diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1) 

880 else: 

881 del diffs[pointer - 1] 

882 pointer -= 1 

883 diffs[pointer] = (diffs[pointer][0], bestEdit) 

884 if bestEquality2: 

885 diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2) 

886 else: 

887 del diffs[pointer + 1] 

888 pointer -= 1 

889 pointer += 1 

890 

891 # Define some regex patterns for matching boundaries. 

892 BLANKLINEEND = re.compile(r"\n\r?\n$") 

893 BLANKLINESTART = re.compile(r"^\r?\n\r?\n") 

894 

895 def diff_cleanupEfficiency(self, diffs): 

896 """Reduce the number of edits by eliminating operationally trivial 

897 equalities. 

898 

899 Args: 

900 diffs: Array of diff tuples. 

901 """ 

902 changes = False 

903 equalities = [] # Stack of indices where equalities are found. 

904 lastEquality = None # Always equal to diffs[equalities[-1]][1] 

905 pointer = 0 # Index of current position. 

906 pre_ins = False # Is there an insertion operation before the last equality. 

907 pre_del = False # Is there a deletion operation before the last equality. 

908 post_ins = False # Is there an insertion operation after the last equality. 

909 post_del = False # Is there a deletion operation after the last equality. 

910 while pointer < len(diffs): 

911 if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found. 

912 if len(diffs[pointer][1]) < self.Diff_EditCost and ( 

913 post_ins or post_del 

914 ): 

915 # Candidate found. 

916 equalities.append(pointer) 

917 pre_ins = post_ins 

918 pre_del = post_del 

919 lastEquality = diffs[pointer][1] 

920 else: 

921 # Not a candidate, and can never become one. 

922 equalities = [] 

923 lastEquality = None 

924 

925 post_ins = post_del = False 

926 else: # An insertion or deletion. 

927 if diffs[pointer][0] == self.DIFF_DELETE: 

928 post_del = True 

929 else: 

930 post_ins = True 

931 

932 # Five types to be split: 

933 # <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del> 

934 # <ins>A</ins>X<ins>C</ins><del>D</del> 

935 # <ins>A</ins><del>B</del>X<ins>C</ins> 

936 # <ins>A</del>X<ins>C</ins><del>D</del> 

937 # <ins>A</ins><del>B</del>X<del>C</del> 

938 

939 if lastEquality and ( 

940 (pre_ins and pre_del and post_ins and post_del) 

941 or ( 

942 (len(lastEquality) < self.Diff_EditCost / 2) 

943 and (pre_ins + pre_del + post_ins + post_del) == 3 

944 ) 

945 ): 

946 # Duplicate record. 

947 diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality)) 

948 # Change second copy to insert. 

949 diffs[equalities[-1] + 1] = ( 

950 self.DIFF_INSERT, 

951 diffs[equalities[-1] + 1][1], 

952 ) 

953 equalities.pop() # Throw away the equality we just deleted. 

954 lastEquality = None 

955 if pre_ins and pre_del: 

956 # No changes made which could affect previous entry, keep going. 

957 post_ins = post_del = True 

958 equalities = [] 

959 else: 

960 if len(equalities): 

961 equalities.pop() # Throw away the previous equality. 

962 if len(equalities): 

963 pointer = equalities[-1] 

964 else: 

965 pointer = -1 

966 post_ins = post_del = False 

967 changes = True 

968 pointer += 1 

969 

970 if changes: 

971 self.diff_cleanupMerge(diffs) 

972 

973 def diff_cleanupMerge(self, diffs): 

974 """Reorder and merge like edit sections. Merge equalities. 

975 Any edit section can move as long as it doesn't cross an equality. 

976 

977 Args: 

978 diffs: Array of diff tuples. 

979 """ 

980 diffs.append((self.DIFF_EQUAL, "")) # Add a dummy entry at the end. 

981 pointer = 0 

982 count_delete = 0 

983 count_insert = 0 

984 text_delete = "" 

985 text_insert = "" 

986 while pointer < len(diffs): 

987 if diffs[pointer][0] == self.DIFF_INSERT: 

988 count_insert += 1 

989 text_insert += diffs[pointer][1] 

990 pointer += 1 

991 elif diffs[pointer][0] == self.DIFF_DELETE: 

992 count_delete += 1 

993 text_delete += diffs[pointer][1] 

994 pointer += 1 

995 elif diffs[pointer][0] == self.DIFF_EQUAL: 

996 # Upon reaching an equality, check for prior redundancies. 

997 if count_delete + count_insert > 1: 

998 if count_delete != 0 and count_insert != 0: 

999 # Factor out any common prefixies. 

1000 commonlength = self.diff_commonPrefix(text_insert, text_delete) 

1001 if commonlength != 0: 

1002 x = pointer - count_delete - count_insert - 1 

1003 if x >= 0 and diffs[x][0] == self.DIFF_EQUAL: 

1004 diffs[x] = ( 

1005 diffs[x][0], 

1006 diffs[x][1] + text_insert[:commonlength], 

1007 ) 

1008 else: 

1009 diffs.insert( 

1010 0, (self.DIFF_EQUAL, text_insert[:commonlength]) 

1011 ) 

1012 pointer += 1 

1013 text_insert = text_insert[commonlength:] 

1014 text_delete = text_delete[commonlength:] 

1015 # Factor out any common suffixies. 

1016 commonlength = self.diff_commonSuffix(text_insert, text_delete) 

1017 if commonlength != 0: 

1018 diffs[pointer] = ( 

1019 diffs[pointer][0], 

1020 text_insert[-commonlength:] + diffs[pointer][1], 

1021 ) 

1022 text_insert = text_insert[:-commonlength] 

1023 text_delete = text_delete[:-commonlength] 

1024 # Delete the offending records and add the merged ones. 

1025 new_ops = [] 

1026 if len(text_delete) != 0: 

1027 new_ops.append((self.DIFF_DELETE, text_delete)) 

1028 if len(text_insert) != 0: 

1029 new_ops.append((self.DIFF_INSERT, text_insert)) 

1030 pointer -= count_delete + count_insert 

1031 diffs[pointer : pointer + count_delete + count_insert] = new_ops 

1032 pointer += len(new_ops) + 1 

1033 elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL: 

1034 # Merge this equality with the previous one. 

1035 diffs[pointer - 1] = ( 

1036 diffs[pointer - 1][0], 

1037 diffs[pointer - 1][1] + diffs[pointer][1], 

1038 ) 

1039 del diffs[pointer] 

1040 else: 

1041 pointer += 1 

1042 

1043 count_insert = 0 

1044 count_delete = 0 

1045 text_delete = "" 

1046 text_insert = "" 

1047 

1048 if diffs[-1][1] == "": 

1049 diffs.pop() # Remove the dummy entry at the end. 

1050 

1051 # Second pass: look for single edits surrounded on both sides by equalities 

1052 # which can be shifted sideways to eliminate an equality. 

1053 # e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC 

1054 changes = False 

1055 pointer = 1 

1056 # Intentionally ignore the first and last element (don't need checking). 

1057 while pointer < len(diffs) - 1: 

1058 if ( 

1059 diffs[pointer - 1][0] == self.DIFF_EQUAL 

1060 and diffs[pointer + 1][0] == self.DIFF_EQUAL 

1061 ): 

1062 # This is a single edit surrounded by equalities. 

1063 if diffs[pointer][1].endswith(diffs[pointer - 1][1]): 

1064 # Shift the edit over the previous equality. 

1065 if diffs[pointer - 1][1] != "": 

1066 diffs[pointer] = ( 

1067 diffs[pointer][0], 

1068 diffs[pointer - 1][1] 

1069 + diffs[pointer][1][: -len(diffs[pointer - 1][1])], 

1070 ) 

1071 diffs[pointer + 1] = ( 

1072 diffs[pointer + 1][0], 

1073 diffs[pointer - 1][1] + diffs[pointer + 1][1], 

1074 ) 

1075 del diffs[pointer - 1] 

1076 changes = True 

1077 elif diffs[pointer][1].startswith(diffs[pointer + 1][1]): 

1078 # Shift the edit over the next equality. 

1079 diffs[pointer - 1] = ( 

1080 diffs[pointer - 1][0], 

1081 diffs[pointer - 1][1] + diffs[pointer + 1][1], 

1082 ) 

1083 diffs[pointer] = ( 

1084 diffs[pointer][0], 

1085 diffs[pointer][1][len(diffs[pointer + 1][1]) :] 

1086 + diffs[pointer + 1][1], 

1087 ) 

1088 del diffs[pointer + 1] 

1089 changes = True 

1090 pointer += 1 

1091 

1092 # If shifts were made, the diff needs reordering and another shift sweep. 

1093 if changes: 

1094 self.diff_cleanupMerge(diffs) 

1095 

1096 def diff_xIndex(self, diffs, loc): 

1097 """loc is a location in text1, compute and return the equivalent location 

1098 in text2. e.g. "The cat" vs "The big cat", 1->1, 5->8 

1099 

1100 Args: 

1101 diffs: Array of diff tuples. 

1102 loc: Location within text1. 

1103 

1104 Returns: 

1105 Location within text2. 

1106 """ 

1107 chars1 = 0 

1108 chars2 = 0 

1109 last_chars1 = 0 

1110 last_chars2 = 0 

1111 for x in range(len(diffs)): 

1112 (op, text) = diffs[x] 

1113 if op != self.DIFF_INSERT: # Equality or deletion. 

1114 chars1 += len(text) 

1115 if op != self.DIFF_DELETE: # Equality or insertion. 

1116 chars2 += len(text) 

1117 if chars1 > loc: # Overshot the location. 

1118 break 

1119 last_chars1 = chars1 

1120 last_chars2 = chars2 

1121 

1122 if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE: 

1123 # The location was deleted. 

1124 return last_chars2 

1125 # Add the remaining len(character). 

1126 return last_chars2 + (loc - last_chars1) 

1127 

1128 def diff_prettyHtml(self, diffs): 

1129 """Convert a diff array into a pretty HTML report. 

1130 

1131 Args: 

1132 diffs: Array of diff tuples. 

1133 

1134 Returns: 

1135 HTML representation. 

1136 """ 

1137 html = [] 

1138 for (op, data) in diffs: 

1139 text = ( 

1140 data.replace("&", "&amp;") 

1141 .replace("<", "&lt;") 

1142 .replace(">", "&gt;") 

1143 .replace("\n", "&para;<br>") 

1144 ) 

1145 if op == self.DIFF_INSERT: 

1146 html.append('<ins style="background:#e6ffe6;">%s</ins>' % text) 

1147 elif op == self.DIFF_DELETE: 

1148 html.append('<del style="background:#ffe6e6;">%s</del>' % text) 

1149 elif op == self.DIFF_EQUAL: 

1150 html.append("<span>%s</span>" % text) 

1151 return "".join(html) 

1152 

1153 def diff_text1(self, diffs): 

1154 """Compute and return the source text (all equalities and deletions). 

1155 

1156 Args: 

1157 diffs: Array of diff tuples. 

1158 

1159 Returns: 

1160 Source text. 

1161 """ 

1162 text = [] 

1163 for (op, data) in diffs: 

1164 if op != self.DIFF_INSERT: 

1165 text.append(data) 

1166 return "".join(text) 

1167 

1168 def diff_text2(self, diffs): 

1169 """Compute and return the destination text (all equalities and insertions). 

1170 

1171 Args: 

1172 diffs: Array of diff tuples. 

1173 

1174 Returns: 

1175 Destination text. 

1176 """ 

1177 text = [] 

1178 for (op, data) in diffs: 

1179 if op != self.DIFF_DELETE: 

1180 text.append(data) 

1181 return "".join(text) 

1182 

1183 def diff_levenshtein(self, diffs): 

1184 """Compute the Levenshtein distance; the number of inserted, deleted or 

1185 substituted characters. 

1186 

1187 Args: 

1188 diffs: Array of diff tuples. 

1189 

1190 Returns: 

1191 Number of changes. 

1192 """ 

1193 levenshtein = 0 

1194 insertions = 0 

1195 deletions = 0 

1196 for (op, data) in diffs: 

1197 if op == self.DIFF_INSERT: 

1198 insertions += len(data) 

1199 elif op == self.DIFF_DELETE: 

1200 deletions += len(data) 

1201 elif op == self.DIFF_EQUAL: 

1202 # A deletion and an insertion is one substitution. 

1203 levenshtein += max(insertions, deletions) 

1204 insertions = 0 

1205 deletions = 0 

1206 levenshtein += max(insertions, deletions) 

1207 return levenshtein 

1208 

1209 def diff_toDelta(self, diffs): 

1210 """Crush the diff into an encoded string which describes the operations 

1211 required to transform text1 into text2. 

1212 E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. 

1213 Operations are tab-separated. Inserted text is escaped using %xx notation. 

1214 

1215 Args: 

1216 diffs: Array of diff tuples. 

1217 

1218 Returns: 

1219 Delta text. 

1220 """ 

1221 text = [] 

1222 for (op, data) in diffs: 

1223 if op == self.DIFF_INSERT: 

1224 # High ascii will raise UnicodeDecodeError. Use Unicode instead. 

1225 data = data.encode("utf-8") 

1226 text.append("+" + urllib.parse.quote(data, "!~*'();/?:@&=+$,# ")) 

1227 elif op == self.DIFF_DELETE: 

1228 text.append("-%d" % len(data)) 

1229 elif op == self.DIFF_EQUAL: 

1230 text.append("=%d" % len(data)) 

1231 return "\t".join(text) 

1232 

1233 def diff_fromDelta(self, text1, delta): 

1234 """Given the original text1, and an encoded string which describes the 

1235 operations required to transform text1 into text2, compute the full diff. 

1236 

1237 Args: 

1238 text1: Source string for the diff. 

1239 delta: Delta text. 

1240 

1241 Returns: 

1242 Array of diff tuples. 

1243 

1244 Raises: 

1245 ValueError: If invalid input. 

1246 """ 

1247 diffs = [] 

1248 pointer = 0 # Cursor in text1 

1249 tokens = delta.split("\t") 

1250 for token in tokens: 

1251 if token == "": 

1252 # Blank tokens are ok (from a trailing \t). 

1253 continue 

1254 # Each token begins with a one character parameter which specifies the 

1255 # operation of this token (delete, insert, equality). 

1256 param = token[1:] 

1257 if token[0] == "+": 

1258 param = urllib.parse.unquote(param) 

1259 diffs.append((self.DIFF_INSERT, param)) 

1260 elif token[0] == "-" or token[0] == "=": 

1261 try: 

1262 n = int(param) 

1263 except ValueError: 

1264 raise ValueError("Invalid number in diff_fromDelta: " + param) 

1265 if n < 0: 

1266 raise ValueError("Negative number in diff_fromDelta: " + param) 

1267 text = text1[pointer : pointer + n] 

1268 pointer += n 

1269 if token[0] == "=": 

1270 diffs.append((self.DIFF_EQUAL, text)) 

1271 else: 

1272 diffs.append((self.DIFF_DELETE, text)) 

1273 else: 

1274 # Anything else is an error. 

1275 raise ValueError( 

1276 "Invalid diff operation in diff_fromDelta: " + token[0] 

1277 ) 

1278 if pointer != len(text1): 

1279 raise ValueError( 

1280 "Delta length (%d) does not equal source text length (%d)." 

1281 % (pointer, len(text1)) 

1282 ) 

1283 return diffs 

1284 

1285 # MATCH FUNCTIONS 

1286 

1287 def match_main(self, text, pattern, loc): 

1288 """Locate the best instance of 'pattern' in 'text' near 'loc'. 

1289 

1290 Args: 

1291 text: The text to search. 

1292 pattern: The pattern to search for. 

1293 loc: The location to search around. 

1294 

1295 Returns: 

1296 Best match index or -1. 

1297 """ 

1298 # Check for null inputs. 

1299 if text == None or pattern == None: 

1300 raise ValueError("Null inputs. (match_main)") 

1301 

1302 loc = max(0, min(loc, len(text))) 

1303 if text == pattern: 

1304 # Shortcut (potentially not guaranteed by the algorithm) 

1305 return 0 

1306 elif not text: 

1307 # Nothing to match. 

1308 return -1 

1309 elif text[loc : loc + len(pattern)] == pattern: 

1310 # Perfect match at the perfect spot! (Includes case of null pattern) 

1311 return loc 

1312 else: 

1313 # Do a fuzzy compare. 

1314 match = self.match_bitap(text, pattern, loc) 

1315 return match 

1316 

1317 def match_bitap(self, text, pattern, loc): 

1318 """Locate the best instance of 'pattern' in 'text' near 'loc' using the 

1319 Bitap algorithm. 

1320 

1321 Args: 

1322 text: The text to search. 

1323 pattern: The pattern to search for. 

1324 loc: The location to search around. 

1325 

1326 Returns: 

1327 Best match index or -1. 

1328 """ 

1329 # Python doesn't have a maxint limit, so ignore this check. 

1330 # if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits: 

1331 # raise ValueError("Pattern too long for this application.") 

1332 

1333 # Initialise the alphabet. 

1334 s = self.match_alphabet(pattern) 

1335 

1336 def match_bitapScore(e, x): 

1337 """Compute and return the score for a match with e errors and x location. 

1338 Accesses loc and pattern through being a closure. 

1339 

1340 Args: 

1341 e: Number of errors in match. 

1342 x: Location of match. 

1343 

1344 Returns: 

1345 Overall score for match (0.0 = good, 1.0 = bad). 

1346 """ 

1347 accuracy = float(e) / len(pattern) 

1348 proximity = abs(loc - x) 

1349 if not self.Match_Distance: 

1350 # Dodge divide by zero error. 

1351 return proximity and 1.0 or accuracy 

1352 return accuracy + (proximity / float(self.Match_Distance)) 

1353 

1354 # Highest score beyond which we give up. 

1355 score_threshold = self.Match_Threshold 

1356 # Is there a nearby exact match? (speedup) 

1357 best_loc = text.find(pattern, loc) 

1358 if best_loc != -1: 

1359 score_threshold = min(match_bitapScore(0, best_loc), score_threshold) 

1360 # What about in the other direction? (speedup) 

1361 best_loc = text.rfind(pattern, loc + len(pattern)) 

1362 if best_loc != -1: 

1363 score_threshold = min(match_bitapScore(0, best_loc), score_threshold) 

1364 

1365 # Initialise the bit arrays. 

1366 matchmask = 1 << (len(pattern) - 1) 

1367 best_loc = -1 

1368 

1369 bin_max = len(pattern) + len(text) 

1370 # Empty initialization added to appease pychecker. 

1371 last_rd = None 

1372 for d in range(len(pattern)): 

1373 # Scan for the best match each iteration allows for one more error. 

1374 # Run a binary search to determine how far from 'loc' we can stray at 

1375 # this error level. 

1376 bin_min = 0 

1377 bin_mid = bin_max 

1378 while bin_min < bin_mid: 

1379 if match_bitapScore(d, loc + bin_mid) <= score_threshold: 

1380 bin_min = bin_mid 

1381 else: 

1382 bin_max = bin_mid 

1383 bin_mid = (bin_max - bin_min) // 2 + bin_min 

1384 

1385 # Use the result from this iteration as the maximum for the next. 

1386 bin_max = bin_mid 

1387 start = max(1, loc - bin_mid + 1) 

1388 finish = min(loc + bin_mid, len(text)) + len(pattern) 

1389 

1390 rd = [0] * (finish + 2) 

1391 rd[finish + 1] = (1 << d) - 1 

1392 for j in range(finish, start - 1, -1): 

1393 if len(text) <= j - 1: 

1394 # Out of range. 

1395 charMatch = 0 

1396 else: 

1397 charMatch = s.get(text[j - 1], 0) 

1398 if d == 0: # First pass: exact match. 

1399 rd[j] = ((rd[j + 1] << 1) | 1) & charMatch 

1400 else: # Subsequent passes: fuzzy match. 

1401 rd[j] = ( 

1402 (((rd[j + 1] << 1) | 1) & charMatch) 

1403 | (((last_rd[j + 1] | last_rd[j]) << 1) | 1) 

1404 | last_rd[j + 1] 

1405 ) 

1406 if rd[j] & matchmask: 

1407 score = match_bitapScore(d, j - 1) 

1408 # This match will almost certainly be better than any existing match. 

1409 # But check anyway. 

1410 if score <= score_threshold: 

1411 # Told you so. 

1412 score_threshold = score 

1413 best_loc = j - 1 

1414 if best_loc > loc: 

1415 # When passing loc, don't exceed our current distance from loc. 

1416 start = max(1, 2 * loc - best_loc) 

1417 else: 

1418 # Already passed loc, downhill from here on in. 

1419 break 

1420 # No hope for a (better) match at greater error levels. 

1421 if match_bitapScore(d + 1, loc) > score_threshold: 

1422 break 

1423 last_rd = rd 

1424 return best_loc 

1425 

1426 def match_alphabet(self, pattern): 

1427 """Initialise the alphabet for the Bitap algorithm. 

1428 

1429 Args: 

1430 pattern: The text to encode. 

1431 

1432 Returns: 

1433 Hash of character locations. 

1434 """ 

1435 s = {} 

1436 for char in pattern: 

1437 s[char] = 0 

1438 for i in range(len(pattern)): 

1439 s[pattern[i]] |= 1 << (len(pattern) - i - 1) 

1440 return s 

1441 

1442 # PATCH FUNCTIONS 

1443 

1444 def patch_addContext(self, patch, text): 

1445 """Increase the context until it is unique, 

1446 but don't let the pattern expand beyond Match_MaxBits. 

1447 

1448 Args: 

1449 patch: The patch to grow. 

1450 text: Source text. 

1451 """ 

1452 if len(text) == 0: 

1453 return 

1454 pattern = text[patch.start2 : patch.start2 + patch.length1] 

1455 padding = 0 

1456 

1457 # Look for the first and last matches of pattern in text. If two different 

1458 # matches are found, increase the pattern length. 

1459 while text.find(pattern) != text.rfind(pattern) and ( 

1460 self.Match_MaxBits == 0 

1461 or len(pattern) < self.Match_MaxBits - self.Patch_Margin - self.Patch_Margin 

1462 ): 

1463 padding += self.Patch_Margin 

1464 pattern = text[ 

1465 max(0, patch.start2 - padding) : patch.start2 + patch.length1 + padding 

1466 ] 

1467 # Add one chunk for good luck. 

1468 padding += self.Patch_Margin 

1469 

1470 # Add the prefix. 

1471 prefix = text[max(0, patch.start2 - padding) : patch.start2] 

1472 if prefix: 

1473 patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)] 

1474 # Add the suffix. 

1475 suffix = text[ 

1476 patch.start2 + patch.length1 : patch.start2 + patch.length1 + padding 

1477 ] 

1478 if suffix: 

1479 patch.diffs.append((self.DIFF_EQUAL, suffix)) 

1480 

1481 # Roll back the start points. 

1482 patch.start1 -= len(prefix) 

1483 patch.start2 -= len(prefix) 

1484 # Extend lengths. 

1485 patch.length1 += len(prefix) + len(suffix) 

1486 patch.length2 += len(prefix) + len(suffix) 

1487 

1488 def patch_make(self, a, b=None, c=None): 

1489 """Compute a list of patches to turn text1 into text2. 

1490 Use diffs if provided, otherwise compute it ourselves. 

1491 There are four ways to call this function, depending on what data is 

1492 available to the caller: 

1493 Method 1: 

1494 a = text1, b = text2 

1495 Method 2: 

1496 a = diffs 

1497 Method 3 (optimal): 

1498 a = text1, b = diffs 

1499 Method 4 (deprecated, use method 3): 

1500 a = text1, b = text2, c = diffs 

1501 

1502 Args: 

1503 a: text1 (methods 1,3,4) or Array of diff tuples for text1 to 

1504 text2 (method 2). 

1505 b: text2 (methods 1,4) or Array of diff tuples for text1 to 

1506 text2 (method 3) or undefined (method 2). 

1507 c: Array of diff tuples for text1 to text2 (method 4) or 

1508 undefined (methods 1,2,3). 

1509 

1510 Returns: 

1511 Array of Patch objects. 

1512 """ 

1513 text1 = None 

1514 diffs = None 

1515 if isinstance(a, str) and isinstance(b, str) and c is None: 

1516 # Method 1: text1, text2 

1517 # Compute diffs from text1 and text2. 

1518 text1 = a 

1519 diffs = self.diff_main(text1, b, True) 

1520 if len(diffs) > 2: 

1521 self.diff_cleanupSemantic(diffs) 

1522 self.diff_cleanupEfficiency(diffs) 

1523 elif isinstance(a, list) and b is None and c is None: 

1524 # Method 2: diffs 

1525 # Compute text1 from diffs. 

1526 diffs = a 

1527 text1 = self.diff_text1(diffs) 

1528 elif isinstance(a, str) and isinstance(b, list) and c is None: 

1529 # Method 3: text1, diffs 

1530 text1 = a 

1531 diffs = b 

1532 elif isinstance(a, str) and isinstance(b, str) and isinstance(c, list): 

1533 # Method 4: text1, text2, diffs 

1534 # text2 is not used. 

1535 text1 = a 

1536 diffs = c 

1537 else: 

1538 raise ValueError("Unknown call format to patch_make.") 

1539 

1540 if not diffs: 

1541 return [] # Get rid of the None case. 

1542 patches = [] 

1543 patch = patch_obj() 

1544 char_count1 = 0 # Number of characters into the text1 string. 

1545 char_count2 = 0 # Number of characters into the text2 string. 

1546 prepatch_text = text1 # Recreate the patches to determine context info. 

1547 postpatch_text = text1 

1548 for x in range(len(diffs)): 

1549 (diff_type, diff_text) = diffs[x] 

1550 if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL: 

1551 # A new patch starts here. 

1552 patch.start1 = char_count1 

1553 patch.start2 = char_count2 

1554 if diff_type == self.DIFF_INSERT: 

1555 # Insertion 

1556 patch.diffs.append(diffs[x]) 

1557 patch.length2 += len(diff_text) 

1558 postpatch_text = ( 

1559 postpatch_text[:char_count2] 

1560 + diff_text 

1561 + postpatch_text[char_count2:] 

1562 ) 

1563 elif diff_type == self.DIFF_DELETE: 

1564 # Deletion. 

1565 patch.length1 += len(diff_text) 

1566 patch.diffs.append(diffs[x]) 

1567 postpatch_text = ( 

1568 postpatch_text[:char_count2] 

1569 + postpatch_text[char_count2 + len(diff_text) :] 

1570 ) 

1571 elif ( 

1572 diff_type == self.DIFF_EQUAL 

1573 and len(diff_text) <= 2 * self.Patch_Margin 

1574 and len(patch.diffs) != 0 

1575 and len(diffs) != x + 1 

1576 ): 

1577 # Small equality inside a patch. 

1578 patch.diffs.append(diffs[x]) 

1579 patch.length1 += len(diff_text) 

1580 patch.length2 += len(diff_text) 

1581 

1582 if diff_type == self.DIFF_EQUAL and len(diff_text) >= 2 * self.Patch_Margin: 

1583 # Time for a new patch. 

1584 if len(patch.diffs) != 0: 

1585 self.patch_addContext(patch, prepatch_text) 

1586 patches.append(patch) 

1587 patch = patch_obj() 

1588 # Unlike Unidiff, our patch lists have a rolling context. 

1589 # https://github.com/google/diff-match-patch/wiki/Unidiff 

1590 # Update prepatch text & pos to reflect the application of the 

1591 # just completed patch. 

1592 prepatch_text = postpatch_text 

1593 char_count1 = char_count2 

1594 

1595 # Update the current character count. 

1596 if diff_type != self.DIFF_INSERT: 

1597 char_count1 += len(diff_text) 

1598 if diff_type != self.DIFF_DELETE: 

1599 char_count2 += len(diff_text) 

1600 

1601 # Pick up the leftover patch if not empty. 

1602 if len(patch.diffs) != 0: 

1603 self.patch_addContext(patch, prepatch_text) 

1604 patches.append(patch) 

1605 return patches 

1606 

1607 def patch_deepCopy(self, patches): 

1608 """Given an array of patches, return another array that is identical. 

1609 

1610 Args: 

1611 patches: Array of Patch objects. 

1612 

1613 Returns: 

1614 Array of Patch objects. 

1615 """ 

1616 patchesCopy = [] 

1617 for patch in patches: 

1618 patchCopy = patch_obj() 

1619 # No need to deep copy the tuples since they are immutable. 

1620 patchCopy.diffs = patch.diffs[:] 

1621 patchCopy.start1 = patch.start1 

1622 patchCopy.start2 = patch.start2 

1623 patchCopy.length1 = patch.length1 

1624 patchCopy.length2 = patch.length2 

1625 patchesCopy.append(patchCopy) 

1626 return patchesCopy 

1627 

1628 def patch_apply(self, patches, text): 

1629 """Merge a set of patches onto the text. Return a patched text, as well 

1630 as a list of true/false values indicating which patches were applied. 

1631 

1632 Args: 

1633 patches: Array of Patch objects. 

1634 text: Old text. 

1635 

1636 Returns: 

1637 Two element Array, containing the new text and an array of boolean values. 

1638 """ 

1639 if not patches: 

1640 return (text, []) 

1641 

1642 # Deep copy the patches so that no changes are made to originals. 

1643 patches = self.patch_deepCopy(patches) 

1644 

1645 nullPadding = self.patch_addPadding(patches) 

1646 text = nullPadding + text + nullPadding 

1647 self.patch_splitMax(patches) 

1648 

1649 # delta keeps track of the offset between the expected and actual location 

1650 # of the previous patch. If there are patches expected at positions 10 and 

1651 # 20, but the first patch was found at 12, delta is 2 and the second patch 

1652 # has an effective expected position of 22. 

1653 delta = 0 

1654 results = [] 

1655 for patch in patches: 

1656 expected_loc = patch.start2 + delta 

1657 text1 = self.diff_text1(patch.diffs) 

1658 end_loc = -1 

1659 if len(text1) > self.Match_MaxBits: 

1660 # patch_splitMax will only provide an oversized pattern in the case of 

1661 # a monster delete. 

1662 start_loc = self.match_main( 

1663 text, text1[: self.Match_MaxBits], expected_loc 

1664 ) 

1665 if start_loc != -1: 

1666 end_loc = self.match_main( 

1667 text, 

1668 text1[-self.Match_MaxBits :], 

1669 expected_loc + len(text1) - self.Match_MaxBits, 

1670 ) 

1671 if end_loc == -1 or start_loc >= end_loc: 

1672 # Can't find valid trailing context. Drop this patch. 

1673 start_loc = -1 

1674 else: 

1675 start_loc = self.match_main(text, text1, expected_loc) 

1676 if start_loc == -1: 

1677 # No match found. :( 

1678 results.append(False) 

1679 # Subtract the delta for this failed patch from subsequent patches. 

1680 delta -= patch.length2 - patch.length1 

1681 else: 

1682 # Found a match. :) 

1683 results.append(True) 

1684 delta = start_loc - expected_loc 

1685 if end_loc == -1: 

1686 text2 = text[start_loc : start_loc + len(text1)] 

1687 else: 

1688 text2 = text[start_loc : end_loc + self.Match_MaxBits] 

1689 if text1 == text2: 

1690 # Perfect match, just shove the replacement text in. 

1691 text = ( 

1692 text[:start_loc] 

1693 + self.diff_text2(patch.diffs) 

1694 + text[start_loc + len(text1) :] 

1695 ) 

1696 else: 

1697 # Imperfect match. 

1698 # Run a diff to get a framework of equivalent indices. 

1699 diffs = self.diff_main(text1, text2, False) 

1700 if ( 

1701 len(text1) > self.Match_MaxBits 

1702 and self.diff_levenshtein(diffs) / float(len(text1)) 

1703 > self.Patch_DeleteThreshold 

1704 ): 

1705 # The end points match, but the content is unacceptably bad. 

1706 results[-1] = False 

1707 else: 

1708 self.diff_cleanupSemanticLossless(diffs) 

1709 index1 = 0 

1710 for (op, data) in patch.diffs: 

1711 if op != self.DIFF_EQUAL: 

1712 index2 = self.diff_xIndex(diffs, index1) 

1713 if op == self.DIFF_INSERT: # Insertion 

1714 text = ( 

1715 text[: start_loc + index2] 

1716 + data 

1717 + text[start_loc + index2 :] 

1718 ) 

1719 elif op == self.DIFF_DELETE: # Deletion 

1720 text = ( 

1721 text[: start_loc + index2] 

1722 + text[ 

1723 start_loc 

1724 + self.diff_xIndex(diffs, index1 + len(data)) : 

1725 ] 

1726 ) 

1727 if op != self.DIFF_DELETE: 

1728 index1 += len(data) 

1729 # Strip the padding off. 

1730 text = text[len(nullPadding) : -len(nullPadding)] 

1731 return (text, results) 

1732 

1733 def patch_addPadding(self, patches): 

1734 """Add some padding on text start and end so that edges can match 

1735 something. Intended to be called only from within patch_apply. 

1736 

1737 Args: 

1738 patches: Array of Patch objects. 

1739 

1740 Returns: 

1741 The padding string added to each side. 

1742 """ 

1743 paddingLength = self.Patch_Margin 

1744 nullPadding = "" 

1745 for x in range(1, paddingLength + 1): 

1746 nullPadding += chr(x) 

1747 

1748 # Bump all the patches forward. 

1749 for patch in patches: 

1750 patch.start1 += paddingLength 

1751 patch.start2 += paddingLength 

1752 

1753 # Add some padding on start of first diff. 

1754 patch = patches[0] 

1755 diffs = patch.diffs 

1756 if not diffs or diffs[0][0] != self.DIFF_EQUAL: 

1757 # Add nullPadding equality. 

1758 diffs.insert(0, (self.DIFF_EQUAL, nullPadding)) 

1759 patch.start1 -= paddingLength # Should be 0. 

1760 patch.start2 -= paddingLength # Should be 0. 

1761 patch.length1 += paddingLength 

1762 patch.length2 += paddingLength 

1763 elif paddingLength > len(diffs[0][1]): 

1764 # Grow first equality. 

1765 extraLength = paddingLength - len(diffs[0][1]) 

1766 newText = nullPadding[len(diffs[0][1]) :] + diffs[0][1] 

1767 diffs[0] = (diffs[0][0], newText) 

1768 patch.start1 -= extraLength 

1769 patch.start2 -= extraLength 

1770 patch.length1 += extraLength 

1771 patch.length2 += extraLength 

1772 

1773 # Add some padding on end of last diff. 

1774 patch = patches[-1] 

1775 diffs = patch.diffs 

1776 if not diffs or diffs[-1][0] != self.DIFF_EQUAL: 

1777 # Add nullPadding equality. 

1778 diffs.append((self.DIFF_EQUAL, nullPadding)) 

1779 patch.length1 += paddingLength 

1780 patch.length2 += paddingLength 

1781 elif paddingLength > len(diffs[-1][1]): 

1782 # Grow last equality. 

1783 extraLength = paddingLength - len(diffs[-1][1]) 

1784 newText = diffs[-1][1] + nullPadding[:extraLength] 

1785 diffs[-1] = (diffs[-1][0], newText) 

1786 patch.length1 += extraLength 

1787 patch.length2 += extraLength 

1788 

1789 return nullPadding 

1790 

1791 def patch_splitMax(self, patches): 

1792 """Look through the patches and break up any which are longer than the 

1793 maximum limit of the match algorithm. 

1794 Intended to be called only from within patch_apply. 

1795 

1796 Args: 

1797 patches: Array of Patch objects. 

1798 """ 

1799 patch_size = self.Match_MaxBits 

1800 if patch_size == 0: 

1801 # Python has the option of not splitting strings due to its ability 

1802 # to handle integers of arbitrary precision. 

1803 return 

1804 for x in range(len(patches)): 

1805 if patches[x].length1 <= patch_size: 

1806 continue 

1807 bigpatch = patches[x] 

1808 # Remove the big old patch. 

1809 del patches[x] 

1810 x -= 1 

1811 start1 = bigpatch.start1 

1812 start2 = bigpatch.start2 

1813 precontext = "" 

1814 while len(bigpatch.diffs) != 0: 

1815 # Create one of several smaller patches. 

1816 patch = patch_obj() 

1817 empty = True 

1818 patch.start1 = start1 - len(precontext) 

1819 patch.start2 = start2 - len(precontext) 

1820 if precontext: 

1821 patch.length1 = patch.length2 = len(precontext) 

1822 patch.diffs.append((self.DIFF_EQUAL, precontext)) 

1823 

1824 while ( 

1825 len(bigpatch.diffs) != 0 

1826 and patch.length1 < patch_size - self.Patch_Margin 

1827 ): 

1828 (diff_type, diff_text) = bigpatch.diffs[0] 

1829 if diff_type == self.DIFF_INSERT: 

1830 # Insertions are harmless. 

1831 patch.length2 += len(diff_text) 

1832 start2 += len(diff_text) 

1833 patch.diffs.append(bigpatch.diffs.pop(0)) 

1834 empty = False 

1835 elif ( 

1836 diff_type == self.DIFF_DELETE 

1837 and len(patch.diffs) == 1 

1838 and patch.diffs[0][0] == self.DIFF_EQUAL 

1839 and len(diff_text) > 2 * patch_size 

1840 ): 

1841 # This is a large deletion. Let it pass in one chunk. 

1842 patch.length1 += len(diff_text) 

1843 start1 += len(diff_text) 

1844 empty = False 

1845 patch.diffs.append((diff_type, diff_text)) 

1846 del bigpatch.diffs[0] 

1847 else: 

1848 # Deletion or equality. Only take as much as we can stomach. 

1849 diff_text = diff_text[ 

1850 : patch_size - patch.length1 - self.Patch_Margin 

1851 ] 

1852 patch.length1 += len(diff_text) 

1853 start1 += len(diff_text) 

1854 if diff_type == self.DIFF_EQUAL: 

1855 patch.length2 += len(diff_text) 

1856 start2 += len(diff_text) 

1857 else: 

1858 empty = False 

1859 

1860 patch.diffs.append((diff_type, diff_text)) 

1861 if diff_text == bigpatch.diffs[0][1]: 

1862 del bigpatch.diffs[0] 

1863 else: 

1864 bigpatch.diffs[0] = ( 

1865 bigpatch.diffs[0][0], 

1866 bigpatch.diffs[0][1][len(diff_text) :], 

1867 ) 

1868 

1869 # Compute the head context for the next patch. 

1870 precontext = self.diff_text2(patch.diffs) 

1871 precontext = precontext[-self.Patch_Margin :] 

1872 # Append the end context for this patch. 

1873 postcontext = self.diff_text1(bigpatch.diffs)[: self.Patch_Margin] 

1874 if postcontext: 

1875 patch.length1 += len(postcontext) 

1876 patch.length2 += len(postcontext) 

1877 if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL: 

1878 patch.diffs[-1] = ( 

1879 self.DIFF_EQUAL, 

1880 patch.diffs[-1][1] + postcontext, 

1881 ) 

1882 else: 

1883 patch.diffs.append((self.DIFF_EQUAL, postcontext)) 

1884 

1885 if not empty: 

1886 x += 1 

1887 patches.insert(x, patch) 

1888 

1889 def patch_toText(self, patches): 

1890 """Take a list of patches and return a textual representation. 

1891 

1892 Args: 

1893 patches: Array of Patch objects. 

1894 

1895 Returns: 

1896 Text representation of patches. 

1897 """ 

1898 text = [] 

1899 for patch in patches: 

1900 text.append(str(patch)) 

1901 return "".join(text) 

1902 

1903 def patch_fromText(self, textline): 

1904 """Parse a textual representation of patches and return a list of patch 

1905 objects. 

1906 

1907 Args: 

1908 textline: Text representation of patches. 

1909 

1910 Returns: 

1911 Array of Patch objects. 

1912 

1913 Raises: 

1914 ValueError: If invalid input. 

1915 """ 

1916 patches = [] 

1917 if not textline: 

1918 return patches 

1919 text = textline.split("\n") 

1920 while len(text) != 0: 

1921 m = re.match(r"^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0]) 

1922 if not m: 

1923 raise ValueError("Invalid patch string: " + text[0]) 

1924 patch = patch_obj() 

1925 patches.append(patch) 

1926 patch.start1 = int(m.group(1)) 

1927 if m.group(2) == "": 

1928 patch.start1 -= 1 

1929 patch.length1 = 1 

1930 elif m.group(2) == "0": 

1931 patch.length1 = 0 

1932 else: 

1933 patch.start1 -= 1 

1934 patch.length1 = int(m.group(2)) 

1935 

1936 patch.start2 = int(m.group(3)) 

1937 if m.group(4) == "": 

1938 patch.start2 -= 1 

1939 patch.length2 = 1 

1940 elif m.group(4) == "0": 

1941 patch.length2 = 0 

1942 else: 

1943 patch.start2 -= 1 

1944 patch.length2 = int(m.group(4)) 

1945 

1946 del text[0] 

1947 

1948 while len(text) != 0: 

1949 if text[0]: 

1950 sign = text[0][0] 

1951 else: 

1952 sign = "" 

1953 line = urllib.parse.unquote(text[0][1:]) 

1954 if sign == "+": 

1955 # Insertion. 

1956 patch.diffs.append((self.DIFF_INSERT, line)) 

1957 elif sign == "-": 

1958 # Deletion. 

1959 patch.diffs.append((self.DIFF_DELETE, line)) 

1960 elif sign == " ": 

1961 # Minor equality. 

1962 patch.diffs.append((self.DIFF_EQUAL, line)) 

1963 elif sign == "@": 

1964 # Start of next patch. 

1965 break 

1966 elif sign == "": 

1967 # Blank line? Whatever. 

1968 pass 

1969 else: 

1970 # WTF? 

1971 raise ValueError("Invalid patch mode: '%s'\n%s" % (sign, line)) 

1972 del text[0] 

1973 return patches 

1974 

1975 

1976class patch_obj: 

1977 """Class representing one patch operation. 

1978 """ 

1979 

1980 def __init__(self): 

1981 """Initializes with an empty list of diffs. 

1982 """ 

1983 self.diffs = [] 

1984 self.start1 = None 

1985 self.start2 = None 

1986 self.length1 = 0 

1987 self.length2 = 0 

1988 

1989 def __str__(self): 

1990 """Emulate GNU diff's format. 

1991 Header: @@ -382,8 +481,9 @@ 

1992 Indices are printed as 1-based, not 0-based. 

1993 

1994 Returns: 

1995 The GNU diff string. 

1996 """ 

1997 if self.length1 == 0: 

1998 coords1 = str(self.start1) + ",0" 

1999 elif self.length1 == 1: 

2000 coords1 = str(self.start1 + 1) 

2001 else: 

2002 coords1 = str(self.start1 + 1) + "," + str(self.length1) 

2003 if self.length2 == 0: 

2004 coords2 = str(self.start2) + ",0" 

2005 elif self.length2 == 1: 

2006 coords2 = str(self.start2 + 1) 

2007 else: 

2008 coords2 = str(self.start2 + 1) + "," + str(self.length2) 

2009 text = ["@@ -", coords1, " +", coords2, " @@\n"] 

2010 # Escape the body of the patch with %xx notation. 

2011 for (op, data) in self.diffs: 

2012 if op == diff_match_patch.DIFF_INSERT: 

2013 text.append("+") 

2014 elif op == diff_match_patch.DIFF_DELETE: 

2015 text.append("-") 

2016 elif op == diff_match_patch.DIFF_EQUAL: 

2017 text.append(" ") 

2018 # High ascii will raise UnicodeDecodeError. Use Unicode instead. 

2019 data = data.encode("utf-8") 

2020 text.append(urllib.parse.quote(data, "!~*'();/?:@&=+$,# ") + "\n") 

2021 return "".join(text)