Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/diff_match_patch/diff_match

1#!/usr/bin/python3

3"""Diff Match and Patch

5https://github.com/google/diff-match-patch

7Licensed under the Apache License, Version 2.0 (the "License");

8you may not use this file except in compliance with the License.

9You may obtain a copy of the License at

11 http://www.apache.org/licenses/LICENSE-2.0

13Unless required by applicable law or agreed to in writing, software

14distributed under the License is distributed on an "AS IS" BASIS,

15WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

16See the License for the specific language governing permissions and

17limitations under the License.

18"""

20"""Functions for diff, match and patch.

22Computes the difference between two texts to create a patch.

23Applies the patch onto another text, allowing for errors.

24"""

26__author__ = "fraser@google.com (Neil Fraser)"

28import re

29import sys

30import time

31import urllib.parse

34class diff_match_patch:

35 """Class containing the diff, match and patch methods.

37 Also contains the behaviour settings.

38 """

40 def __init__(self):

41 """Inits a diff_match_patch object with default settings.

42 Redefine these in your program to override the defaults.

43 """

45 # Number of seconds to map a diff before giving up (0 for infinity).

46 self.Diff_Timeout = 1.0

47 # Cost of an empty edit operation in terms of edit characters.

48 self.Diff_EditCost = 4

49 # At what point is no match declared (0.0 = perfection, 1.0 = very loose).

50 self.Match_Threshold = 0.5

51 # How far to search for a match (0 = exact location, 1000+ = broad match).

52 # A match this many characters away from the expected location will add

53 # 1.0 to the score (0.0 is a perfect match).

54 self.Match_Distance = 1000

55 # When deleting a large block of text (over ~64 characters), how close do

56 # the contents have to be to match the expected contents. (0.0 = perfection,

57 # 1.0 = very loose). Note that Match_Threshold controls how closely the

58 # end points of a delete need to match.

59 self.Patch_DeleteThreshold = 0.5

60 # Chunk size for context length.

61 self.Patch_Margin = 4

63 # The number of bits in an int.

64 # Python has no maximum, thus to disable patch splitting set to 0.

65 # However to avoid long patches in certain pathological cases, use 32.

66 # Multiple short patches (using native ints) are much faster than long ones.

67 self.Match_MaxBits = 32

69 # DIFF FUNCTIONS

71 # The data structure representing a diff is an array of tuples:

72 # [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")]

73 # which means: delete "Hello", add "Goodbye" and keep " world."

74 DIFF_DELETE = -1

75 DIFF_INSERT = 1

76 DIFF_EQUAL = 0

78 def diff_main(self, text1, text2, checklines=True, deadline=None):

79 """Find the differences between two texts. Simplifies the problem by

80 stripping any common prefix or suffix off the texts before diffing.

82 Args:

83 text1: Old string to be diffed.

84 text2: New string to be diffed.

85 checklines: Optional speedup flag. If present and false, then don't run

86 a line-level diff first to identify the changed areas.

87 Defaults to true, which does a faster, slightly less optimal diff.

88 deadline: Optional time when the diff should be complete by. Used

89 internally for recursive calls. Users should set DiffTimeout instead.

91 Returns:

92 Array of changes.

93 """

94 # Set a deadline by which time the diff must be complete.

95 if deadline == None:

96 # Unlike in most languages, Python counts time in seconds.

97 if self.Diff_Timeout <= 0:

98 deadline = sys.maxsize

99 else:

100 deadline = time.time() + self.Diff_Timeout

101

102 # Check for null inputs.

103 if text1 == None or text2 == None:

104 raise ValueError("Null inputs. (diff_main)")

105

106 # Check for equality (speedup).

107 if text1 == text2:

108 if text1:

109 return [(self.DIFF_EQUAL, text1)]

110 return []

111

112 # Trim off common prefix (speedup).

113 commonlength = self.diff_commonPrefix(text1, text2)

114 commonprefix = text1[:commonlength]

115 text1 = text1[commonlength:]

116 text2 = text2[commonlength:]

117

118 # Trim off common suffix (speedup).

119 commonlength = self.diff_commonSuffix(text1, text2)

120 if commonlength == 0:

121 commonsuffix = ""

122 else:

123 commonsuffix = text1[-commonlength:]

124 text1 = text1[:-commonlength]

125 text2 = text2[:-commonlength]

126

127 # Compute the diff on the middle block.

128 diffs = self.diff_compute(text1, text2, checklines, deadline)

129

130 # Restore the prefix and suffix.

131 if commonprefix:

132 diffs[:0] = [(self.DIFF_EQUAL, commonprefix)]

133 if commonsuffix:

134 diffs.append((self.DIFF_EQUAL, commonsuffix))

135 self.diff_cleanupMerge(diffs)

136 return diffs

137

138 def diff_compute(self, text1, text2, checklines, deadline):

139 """Find the differences between two texts. Assumes that the texts do not

140 have any common prefix or suffix.

141

142 Args:

143 text1: Old string to be diffed.

144 text2: New string to be diffed.

145 checklines: Speedup flag. If false, then don't run a line-level diff

146 first to identify the changed areas.

147 If true, then run a faster, slightly less optimal diff.

148 deadline: Time when the diff should be complete by.

149

150 Returns:

151 Array of changes.

152 """

153 if not text1:

154 # Just add some text (speedup).

155 return [(self.DIFF_INSERT, text2)]

156

157 if not text2:

158 # Just delete some text (speedup).

159 return [(self.DIFF_DELETE, text1)]

160

161 if len(text1) > len(text2):

162 (longtext, shorttext) = (text1, text2)

163 else:

164 (shorttext, longtext) = (text1, text2)

165 i = longtext.find(shorttext)

166 if i != -1:

167 # Shorter text is inside the longer text (speedup).

168 diffs = [

169 (self.DIFF_INSERT, longtext[:i]),

170 (self.DIFF_EQUAL, shorttext),

171 (self.DIFF_INSERT, longtext[i + len(shorttext) :]),

172 ]

173 # Swap insertions for deletions if diff is reversed.

174 if len(text1) > len(text2):

175 diffs[0] = (self.DIFF_DELETE, diffs[0][1])

176 diffs[2] = (self.DIFF_DELETE, diffs[2][1])

177 return diffs

178

179 if len(shorttext) == 1:

180 # Single character string.

181 # After the previous speedup, the character can't be an equality.

182 return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]

183

184 # Check to see if the problem can be split in two.

185 hm = self.diff_halfMatch(text1, text2)

186 if hm:

187 # A half-match was found, sort out the return data.

188 (text1_a, text1_b, text2_a, text2_b, mid_common) = hm

189 # Send both pairs off for separate processing.

190 diffs_a = self.diff_main(text1_a, text2_a, checklines, deadline)

191 diffs_b = self.diff_main(text1_b, text2_b, checklines, deadline)

192 # Merge the results.

193 return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b

194

195 if checklines and len(text1) > 100 and len(text2) > 100:

196 return self.diff_lineMode(text1, text2, deadline)

197

198 return self.diff_bisect(text1, text2, deadline)

199

200 def diff_lineMode(self, text1, text2, deadline):

201 """Do a quick line-level diff on both strings, then rediff the parts for

202 greater accuracy.

203 This speedup can produce non-minimal diffs.

204

205 Args:

206 text1: Old string to be diffed.

207 text2: New string to be diffed.

208 deadline: Time when the diff should be complete by.

209

210 Returns:

211 Array of changes.

212 """

213

214 # Scan the text on a line-by-line basis first.

215 (text1, text2, linearray) = self.diff_linesToChars(text1, text2)

216

217 diffs = self.diff_main(text1, text2, False, deadline)

218

219 # Convert the diff back to original text.

220 self.diff_charsToLines(diffs, linearray)

221 # Eliminate freak matches (e.g. blank lines)

222 self.diff_cleanupSemantic(diffs)

223

224 # Rediff any replacement blocks, this time character-by-character.

225 # Add a dummy entry at the end.

226 diffs.append((self.DIFF_EQUAL, ""))

227 pointer = 0

228 count_delete = 0

229 count_insert = 0

230 text_delete = ""

231 text_insert = ""

232 while pointer < len(diffs):

233 if diffs[pointer][0] == self.DIFF_INSERT:

234 count_insert += 1

235 text_insert += diffs[pointer][1]

236 elif diffs[pointer][0] == self.DIFF_DELETE:

237 count_delete += 1

238 text_delete += diffs[pointer][1]

239 elif diffs[pointer][0] == self.DIFF_EQUAL:

240 # Upon reaching an equality, check for prior redundancies.

241 if count_delete >= 1 and count_insert >= 1:

242 # Delete the offending records and add the merged ones.

243 subDiff = self.diff_main(text_delete, text_insert, False, deadline)

244 diffs[pointer - count_delete - count_insert : pointer] = subDiff

245 pointer = pointer - count_delete - count_insert + len(subDiff)

246 count_insert = 0

247 count_delete = 0

248 text_delete = ""

249 text_insert = ""

250

251 pointer += 1

252

253 diffs.pop() # Remove the dummy entry at the end.

254

255 return diffs

256

257 def diff_bisect(self, text1, text2, deadline):

258 """Find the 'middle snake' of a diff, split the problem in two

259 and return the recursively constructed diff.

260 See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.

261

262 Args:

263 text1: Old string to be diffed.

264 text2: New string to be diffed.

265 deadline: Time at which to bail if not yet complete.

266

267 Returns:

268 Array of diff tuples.

269 """

270

271 # Cache the text lengths to prevent multiple calls.

272 text1_length = len(text1)

273 text2_length = len(text2)

274 max_d = (text1_length + text2_length + 1) // 2

275 v_offset = max_d

276 v_length = 2 * max_d

277 v1 = [-1] * v_length

278 v1[v_offset + 1] = 0

279 v2 = v1[:]

280 delta = text1_length - text2_length

281 # If the total number of characters is odd, then the front path will

282 # collide with the reverse path.

283 front = delta % 2 != 0

284 # Offsets for start and end of k loop.

285 # Prevents mapping of space beyond the grid.

286 k1start = 0

287 k1end = 0

288 k2start = 0

289 k2end = 0

290 for d in range(max_d):

291 # Bail out if deadline is reached.

292 if time.time() > deadline:

293 break

294

295 # Walk the front path one step.

296 for k1 in range(-d + k1start, d + 1 - k1end, 2):

297 k1_offset = v_offset + k1

298 if k1 == -d or (k1 != d and v1[k1_offset - 1] < v1[k1_offset + 1]):

299 x1 = v1[k1_offset + 1]

300 else:

301 x1 = v1[k1_offset - 1] + 1

302 y1 = x1 - k1

303 while (

304 x1 < text1_length and y1 < text2_length and text1[x1] == text2[y1]

305 ):

306 x1 += 1

307 y1 += 1

308 v1[k1_offset] = x1

309 if x1 > text1_length:

310 # Ran off the right of the graph.

311 k1end += 2

312 elif y1 > text2_length:

313 # Ran off the bottom of the graph.

314 k1start += 2

315 elif front:

316 k2_offset = v_offset + delta - k1

317 if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] != -1:

318 # Mirror x2 onto top-left coordinate system.

319 x2 = text1_length - v2[k2_offset]

320 if x1 >= x2:

321 # Overlap detected.

322 return self.diff_bisectSplit(text1, text2, x1, y1, deadline)

323

324 # Walk the reverse path one step.

325 for k2 in range(-d + k2start, d + 1 - k2end, 2):

326 k2_offset = v_offset + k2

327 if k2 == -d or (k2 != d and v2[k2_offset - 1] < v2[k2_offset + 1]):

328 x2 = v2[k2_offset + 1]

329 else:

330 x2 = v2[k2_offset - 1] + 1

331 y2 = x2 - k2

332 while (

333 x2 < text1_length

334 and y2 < text2_length

335 and text1[-x2 - 1] == text2[-y2 - 1]

336 ):

337 x2 += 1

338 y2 += 1

339 v2[k2_offset] = x2

340 if x2 > text1_length:

341 # Ran off the left of the graph.

342 k2end += 2

343 elif y2 > text2_length:

344 # Ran off the top of the graph.

345 k2start += 2

346 elif not front:

347 k1_offset = v_offset + delta - k2

348 if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] != -1:

349 x1 = v1[k1_offset]

350 y1 = v_offset + x1 - k1_offset

351 # Mirror x2 onto top-left coordinate system.

352 x2 = text1_length - x2

353 if x1 >= x2:

354 # Overlap detected.

355 return self.diff_bisectSplit(text1, text2, x1, y1, deadline)

356

357 # Diff took too long and hit the deadline or

358 # number of diffs equals number of characters, no commonality at all.

359 return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]

360

361 def diff_bisectSplit(self, text1, text2, x, y, deadline):

362 """Given the location of the 'middle snake', split the diff in two parts

363 and recurse.

364

365 Args:

366 text1: Old string to be diffed.

367 text2: New string to be diffed.

368 x: Index of split point in text1.

369 y: Index of split point in text2.

370 deadline: Time at which to bail if not yet complete.

371

372 Returns:

373 Array of diff tuples.

374 """

375 text1a = text1[:x]

376 text2a = text2[:y]

377 text1b = text1[x:]

378 text2b = text2[y:]

379

380 # Compute both diffs serially.

381 diffs = self.diff_main(text1a, text2a, False, deadline)

382 diffsb = self.diff_main(text1b, text2b, False, deadline)

383

384 return diffs + diffsb

385

386 def diff_linesToChars(self, text1, text2):

387 """Split two texts into an array of strings. Reduce the texts to a string

388 of hashes where each Unicode character represents one line.

389

390 Args:

391 text1: First string.

392 text2: Second string.

393

394 Returns:

395 Three element tuple, containing the encoded text1, the encoded text2 and

396 the array of unique strings. The zeroth element of the array of unique

397 strings is intentionally blank.

398 """

399 lineArray = [] # e.g. lineArray[4] == "Hello\n"

400 lineHash = {} # e.g. lineHash["Hello\n"] == 4

401

402 # "\x00" is a valid character, but various debuggers don't like it.

403 # So we'll insert a junk entry to avoid generating a null character.

404 lineArray.append("")

405

406 def diff_linesToCharsMunge(text):

407 """Split a text into an array of strings. Reduce the texts to a string

408 of hashes where each Unicode character represents one line.

409 Modifies linearray and linehash through being a closure.

410

411 Args:

412 text: String to encode.

413

414 Returns:

415 Encoded string.

416 """

417 chars = []

418 # Walk the text, pulling out a substring for each line.

419 # text.split('\n') would would temporarily double our memory footprint.

420 # Modifying text would create many large strings to garbage collect.

421 lineStart = 0

422 lineEnd = -1

423 while lineEnd < len(text) - 1:

424 lineEnd = text.find("\n", lineStart)

425 if lineEnd == -1:

426 lineEnd = len(text) - 1

427 line = text[lineStart : lineEnd + 1]

428

429 if line in lineHash:

430 chars.append(chr(lineHash[line]))

431 else:

432 if len(lineArray) == maxLines:

433 # Bail out at 1114111 because chr(1114112) throws.

434 line = text[lineStart:]

435 lineEnd = len(text)

436 lineArray.append(line)

437 lineHash[line] = len(lineArray) - 1

438 chars.append(chr(len(lineArray) - 1))

439 lineStart = lineEnd + 1

440 return "".join(chars)

441

442 # Allocate 2/3rds of the space for text1, the rest for text2.

443 maxLines = 666666

444 chars1 = diff_linesToCharsMunge(text1)

445 maxLines = 1114111

446 chars2 = diff_linesToCharsMunge(text2)

447 return (chars1, chars2, lineArray)

448

449 def diff_charsToLines(self, diffs, lineArray):

450 """Rehydrate the text in a diff from a string of line hashes to real lines

451 of text.

452

453 Args:

454 diffs: Array of diff tuples.

455 lineArray: Array of unique strings.

456 """

457 for i in range(len(diffs)):

458 text = []

459 for char in diffs[i][1]:

460 text.append(lineArray[ord(char)])

461 diffs[i] = (diffs[i][0], "".join(text))

462

463 def diff_commonPrefix(self, text1, text2):

464 """Determine the common prefix of two strings.

465

466 Args:

467 text1: First string.

468 text2: Second string.

469

470 Returns:

471 The number of characters common to the start of each string.

472 """

473 # Quick check for common null cases.

474 if not text1 or not text2 or text1[0] != text2[0]:

475 return 0

476 # Binary search.

477 # Performance analysis: https://neil.fraser.name/news/2007/10/09/

478 pointermin = 0

479 pointermax = min(len(text1), len(text2))

480 pointermid = pointermax

481 pointerstart = 0

482 while pointermin < pointermid:

483 if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]:

484 pointermin = pointermid

485 pointerstart = pointermin

486 else:

487 pointermax = pointermid

488 pointermid = (pointermax - pointermin) // 2 + pointermin

489 return pointermid

490

491 def diff_commonSuffix(self, text1, text2):

492 """Determine the common suffix of two strings.

493

494 Args:

495 text1: First string.

496 text2: Second string.

497

498 Returns:

499 The number of characters common to the end of each string.

500 """

501 # Quick check for common null cases.

502 if not text1 or not text2 or text1[-1] != text2[-1]:

503 return 0

504 # Binary search.

505 # Performance analysis: https://neil.fraser.name/news/2007/10/09/

506 pointermin = 0

507 pointermax = min(len(text1), len(text2))

508 pointermid = pointermax

509 pointerend = 0

510 while pointermin < pointermid:

511 if (

512 text1[-pointermid : len(text1) - pointerend]

513 == text2[-pointermid : len(text2) - pointerend]

514 ):

515 pointermin = pointermid

516 pointerend = pointermin

517 else:

518 pointermax = pointermid

519 pointermid = (pointermax - pointermin) // 2 + pointermin

520 return pointermid

521

522 def diff_commonOverlap(self, text1, text2):

523 """Determine if the suffix of one string is the prefix of another.

524

525 Args:

526 text1 First string.

527 text2 Second string.

528

529 Returns:

530 The number of characters common to the end of the first

531 string and the start of the second string.

532 """

533 # Cache the text lengths to prevent multiple calls.

534 text1_length = len(text1)

535 text2_length = len(text2)

536 # Eliminate the null case.

537 if text1_length == 0 or text2_length == 0:

538 return 0

539 # Truncate the longer string.

540 if text1_length > text2_length:

541 text1 = text1[-text2_length:]

542 elif text1_length < text2_length:

543 text2 = text2[:text1_length]

544 text_length = min(text1_length, text2_length)

545 # Quick check for the worst case.

546 if text1 == text2:

547 return text_length

548

549 # Start by looking for a single character match

550 # and increase length until no match is found.

551 # Performance analysis: https://neil.fraser.name/news/2010/11/04/

552 best = 0

553 length = 1

554 while True:

555 pattern = text1[-length:]

556 found = text2.find(pattern)

557 if found == -1:

558 return best

559 length += found

560 if found == 0 or text1[-length:] == text2[:length]:

561 best = length

562 length += 1

563

564 def diff_halfMatch(self, text1, text2):

565 """Do the two texts share a substring which is at least half the length of

566 the longer text?

567 This speedup can produce non-minimal diffs.

568

569 Args:

570 text1: First string.

571 text2: Second string.

572

573 Returns:

574 Five element Array, containing the prefix of text1, the suffix of text1,

575 the prefix of text2, the suffix of text2 and the common middle. Or None

576 if there was no match.

577 """

578 if self.Diff_Timeout <= 0:

579 # Don't risk returning a non-optimal diff if we have unlimited time.

580 return None

581 if len(text1) > len(text2):

582 (longtext, shorttext) = (text1, text2)

583 else:

584 (shorttext, longtext) = (text1, text2)

585 if len(longtext) < 4 or len(shorttext) * 2 < len(longtext):

586 return None # Pointless.

587

588 def diff_halfMatchI(longtext, shorttext, i):

589 """Does a substring of shorttext exist within longtext such that the

590 substring is at least half the length of longtext?

591 Closure, but does not reference any external variables.

592

593 Args:

594 longtext: Longer string.

595 shorttext: Shorter string.

596 i: Start index of quarter length substring within longtext.

597

598 Returns:

599 Five element Array, containing the prefix of longtext, the suffix of

600 longtext, the prefix of shorttext, the suffix of shorttext and the

601 common middle. Or None if there was no match.

602 """

603 seed = longtext[i : i + len(longtext) // 4]

604 best_common = ""

605 j = shorttext.find(seed)

606 while j != -1:

607 prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:])

608 suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j])

609 if len(best_common) < suffixLength + prefixLength:

610 best_common = (

611 shorttext[j - suffixLength : j]

612 + shorttext[j : j + prefixLength]

613 )

614 best_longtext_a = longtext[: i - suffixLength]

615 best_longtext_b = longtext[i + prefixLength :]

616 best_shorttext_a = shorttext[: j - suffixLength]

617 best_shorttext_b = shorttext[j + prefixLength :]

618 j = shorttext.find(seed, j + 1)

619

620 if len(best_common) * 2 >= len(longtext):

621 return (

622 best_longtext_a,

623 best_longtext_b,

624 best_shorttext_a,

625 best_shorttext_b,

626 best_common,

627 )

628 else:

629 return None

630

631 # First check if the second quarter is the seed for a half-match.

632 hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) // 4)

633 # Check again based on the third quarter.

634 hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) // 2)

635 if not hm1 and not hm2:

636 return None

637 elif not hm2:

638 hm = hm1

639 elif not hm1:

640 hm = hm2

641 else:

642 # Both matched. Select the longest.

643 if len(hm1[4]) > len(hm2[4]):

644 hm = hm1

645 else:

646 hm = hm2

647

648 # A half-match was found, sort out the return data.

649 if len(text1) > len(text2):

650 (text1_a, text1_b, text2_a, text2_b, mid_common) = hm

651 else:

652 (text2_a, text2_b, text1_a, text1_b, mid_common) = hm

653 return (text1_a, text1_b, text2_a, text2_b, mid_common)

654

655 def diff_cleanupSemantic(self, diffs):

656 """Reduce the number of edits by eliminating semantically trivial

657 equalities.

658

659 Args:

660 diffs: Array of diff tuples.

661 """

662 changes = False

663 equalities = [] # Stack of indices where equalities are found.

664 lastEquality = None # Always equal to diffs[equalities[-1]][1]

665 pointer = 0 # Index of current position.

666 # Number of chars that changed prior to the equality.

667 length_insertions1, length_deletions1 = 0, 0

668 # Number of chars that changed after the equality.

669 length_insertions2, length_deletions2 = 0, 0

670 while pointer < len(diffs):

671 if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found.

672 equalities.append(pointer)

673 length_insertions1, length_insertions2 = length_insertions2, 0

674 length_deletions1, length_deletions2 = length_deletions2, 0

675 lastEquality = diffs[pointer][1]

676 else: # An insertion or deletion.

677 if diffs[pointer][0] == self.DIFF_INSERT:

678 length_insertions2 += len(diffs[pointer][1])

679 else:

680 length_deletions2 += len(diffs[pointer][1])

681 # Eliminate an equality that is smaller or equal to the edits on both

682 # sides of it.

683 if (

684 lastEquality

685 and (

686 len(lastEquality) <= max(length_insertions1, length_deletions1)

687 )

688 and (

689 len(lastEquality) <= max(length_insertions2, length_deletions2)

690 )

691 ):

692 # Duplicate record.

693 diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality))

694 # Change second copy to insert.

695 diffs[equalities[-1] + 1] = (

696 self.DIFF_INSERT,

697 diffs[equalities[-1] + 1][1],

698 )

699 # Throw away the equality we just deleted.

700 equalities.pop()

701 # Throw away the previous equality (it needs to be reevaluated).

702 if len(equalities):

703 equalities.pop()

704 if len(equalities):

705 pointer = equalities[-1]

706 else:

707 pointer = -1

708 # Reset the counters.

709 length_insertions1, length_deletions1 = 0, 0

710 length_insertions2, length_deletions2 = 0, 0

711 lastEquality = None

712 changes = True

713 pointer += 1

714

715 # Normalize the diff.

716 if changes:

717 self.diff_cleanupMerge(diffs)

718 self.diff_cleanupSemanticLossless(diffs)

719

720 # Find any overlaps between deletions and insertions.

721 # e.g: <del>abcxxx</del><ins>xxxdef</ins>

722 # -> <del>abc</del>xxx<ins>def</ins>

723 # e.g: <del>xxxabc</del><ins>defxxx</ins>

724 # -> <ins>def</ins>xxx<del>abc</del>

725 # Only extract an overlap if it is as big as the edit ahead or behind it.

726 pointer = 1

727 while pointer < len(diffs):

728 if (

729 diffs[pointer - 1][0] == self.DIFF_DELETE

730 and diffs[pointer][0] == self.DIFF_INSERT

731 ):

732 deletion = diffs[pointer - 1][1]

733 insertion = diffs[pointer][1]

734 overlap_length1 = self.diff_commonOverlap(deletion, insertion)

735 overlap_length2 = self.diff_commonOverlap(insertion, deletion)

736 if overlap_length1 >= overlap_length2:

737 if (

738 overlap_length1 >= len(deletion) / 2.0

739 or overlap_length1 >= len(insertion) / 2.0

740 ):

741 # Overlap found. Insert an equality and trim the surrounding edits.

742 diffs.insert(

743 pointer, (self.DIFF_EQUAL, insertion[:overlap_length1])

744 )

745 diffs[pointer - 1] = (

746 self.DIFF_DELETE,

747 deletion[: len(deletion) - overlap_length1],

748 )

749 diffs[pointer + 1] = (

750 self.DIFF_INSERT,

751 insertion[overlap_length1:],

752 )

753 pointer += 1

754 else:

755 if (

756 overlap_length2 >= len(deletion) / 2.0

757 or overlap_length2 >= len(insertion) / 2.0

758 ):

759 # Reverse overlap found.

760 # Insert an equality and swap and trim the surrounding edits.

761 diffs.insert(

762 pointer, (self.DIFF_EQUAL, deletion[:overlap_length2])

763 )

764 diffs[pointer - 1] = (

765 self.DIFF_INSERT,

766 insertion[: len(insertion) - overlap_length2],

767 )

768 diffs[pointer + 1] = (

769 self.DIFF_DELETE,

770 deletion[overlap_length2:],

771 )

772 pointer += 1

773 pointer += 1

774 pointer += 1

775

776 def diff_cleanupSemanticLossless(self, diffs):

777 """Look for single edits surrounded on both sides by equalities

778 which can be shifted sideways to align the edit to a word boundary.

779 e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.

780

781 Args:

782 diffs: Array of diff tuples.

783 """

784

785 def diff_cleanupSemanticScore(one, two):

786 """Given two strings, compute a score representing whether the

787 internal boundary falls on logical boundaries.

788 Scores range from 6 (best) to 0 (worst).

789 Closure, but does not reference any external variables.

790

791 Args:

792 one: First string.

793 two: Second string.

794

795 Returns:

796 The score.

797 """

798 if not one or not two:

799 # Edges are the best.

800 return 6

801

802 # Each port of this function behaves slightly differently due to

803 # subtle differences in each language's definition of things like

804 # 'whitespace'. Since this function's purpose is largely cosmetic,

805 # the choice has been made to use each language's native features

806 # rather than force total conformity.

807 char1 = one[-1]

808 char2 = two[0]

809 nonAlphaNumeric1 = not char1.isalnum()

810 nonAlphaNumeric2 = not char2.isalnum()

811 whitespace1 = nonAlphaNumeric1 and char1.isspace()

812 whitespace2 = nonAlphaNumeric2 and char2.isspace()

813 lineBreak1 = whitespace1 and (char1 == "\r" or char1 == "\n")

814 lineBreak2 = whitespace2 and (char2 == "\r" or char2 == "\n")

815 blankLine1 = lineBreak1 and self.BLANKLINEEND.search(one)

816 blankLine2 = lineBreak2 and self.BLANKLINESTART.match(two)

817

818 if blankLine1 or blankLine2:

819 # Five points for blank lines.

820 return 5

821 elif lineBreak1 or lineBreak2:

822 # Four points for line breaks.

823 return 4

824 elif nonAlphaNumeric1 and not whitespace1 and whitespace2:

825 # Three points for end of sentences.

826 return 3

827 elif whitespace1 or whitespace2:

828 # Two points for whitespace.

829 return 2

830 elif nonAlphaNumeric1 or nonAlphaNumeric2:

831 # One point for non-alphanumeric.

832 return 1

833 return 0

834

835 pointer = 1

836 # Intentionally ignore the first and last element (don't need checking).

837 while pointer < len(diffs) - 1:

838 if (

839 diffs[pointer - 1][0] == self.DIFF_EQUAL

840 and diffs[pointer + 1][0] == self.DIFF_EQUAL

841 ):

842 # This is a single edit surrounded by equalities.

843 equality1 = diffs[pointer - 1][1]

844 edit = diffs[pointer][1]

845 equality2 = diffs[pointer + 1][1]

846

847 # First, shift the edit as far left as possible.

848 commonOffset = self.diff_commonSuffix(equality1, edit)

849 if commonOffset:

850 commonString = edit[-commonOffset:]

851 equality1 = equality1[:-commonOffset]

852 edit = commonString + edit[:-commonOffset]

853 equality2 = commonString + equality2

854

855 # Second, step character by character right, looking for the best fit.

856 bestEquality1 = equality1

857 bestEdit = edit

858 bestEquality2 = equality2

859 bestScore = diff_cleanupSemanticScore(

860 equality1, edit

861 ) + diff_cleanupSemanticScore(edit, equality2)

862 while edit and equality2 and edit[0] == equality2[0]:

863 equality1 += edit[0]

864 edit = edit[1:] + equality2[0]

865 equality2 = equality2[1:]

866 score = diff_cleanupSemanticScore(

867 equality1, edit

868 ) + diff_cleanupSemanticScore(edit, equality2)

869 # The >= encourages trailing rather than leading whitespace on edits.

870 if score >= bestScore:

871 bestScore = score

872 bestEquality1 = equality1

873 bestEdit = edit

874 bestEquality2 = equality2

875

876 if diffs[pointer - 1][1] != bestEquality1:

877 # We have an improvement, save it back to the diff.

878 if bestEquality1:

879 diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1)

880 else:

881 del diffs[pointer - 1]

882 pointer -= 1

883 diffs[pointer] = (diffs[pointer][0], bestEdit)

884 if bestEquality2:

885 diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2)

886 else:

887 del diffs[pointer + 1]

888 pointer -= 1

889 pointer += 1

890

891 # Define some regex patterns for matching boundaries.

892 BLANKLINEEND = re.compile(r"\n\r?\n$")

893 BLANKLINESTART = re.compile(r"^\r?\n\r?\n")

894

895 def diff_cleanupEfficiency(self, diffs):

896 """Reduce the number of edits by eliminating operationally trivial

897 equalities.

898

899 Args:

900 diffs: Array of diff tuples.

901 """

902 changes = False

903 equalities = [] # Stack of indices where equalities are found.

904 lastEquality = None # Always equal to diffs[equalities[-1]][1]

905 pointer = 0 # Index of current position.

906 pre_ins = False # Is there an insertion operation before the last equality.

907 pre_del = False # Is there a deletion operation before the last equality.

908 post_ins = False # Is there an insertion operation after the last equality.

909 post_del = False # Is there a deletion operation after the last equality.

910 while pointer < len(diffs):

911 if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found.

912 if len(diffs[pointer][1]) < self.Diff_EditCost and (

913 post_ins or post_del

914 ):

915 # Candidate found.

916 equalities.append(pointer)

917 pre_ins = post_ins

918 pre_del = post_del

919 lastEquality = diffs[pointer][1]

920 else:

921 # Not a candidate, and can never become one.

922 equalities = []

923 lastEquality = None

924

925 post_ins = post_del = False

926 else: # An insertion or deletion.

927 if diffs[pointer][0] == self.DIFF_DELETE:

928 post_del = True

929 else:

930 post_ins = True

931

932 # Five types to be split:

933 # <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>

934 # <ins>A</ins>X<ins>C</ins><del>D</del>

935 # <ins>A</ins><del>B</del>X<ins>C</ins>

936 # <ins>A</del>X<ins>C</ins><del>D</del>

937 # <ins>A</ins><del>B</del>X<del>C</del>

938

939 if lastEquality and (

940 (pre_ins and pre_del and post_ins and post_del)

941 or (

942 (len(lastEquality) < self.Diff_EditCost / 2)

943 and (pre_ins + pre_del + post_ins + post_del) == 3

944 )

945 ):

946 # Duplicate record.

947 diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality))

948 # Change second copy to insert.

949 diffs[equalities[-1] + 1] = (

950 self.DIFF_INSERT,

951 diffs[equalities[-1] + 1][1],

952 )

953 equalities.pop() # Throw away the equality we just deleted.

954 lastEquality = None

955 if pre_ins and pre_del:

956 # No changes made which could affect previous entry, keep going.

957 post_ins = post_del = True

958 equalities = []

959 else:

960 if len(equalities):

961 equalities.pop() # Throw away the previous equality.

962 if len(equalities):

963 pointer = equalities[-1]

964 else:

965 pointer = -1

966 post_ins = post_del = False

967 changes = True

968 pointer += 1

969

970 if changes:

971 self.diff_cleanupMerge(diffs)

972

973 def diff_cleanupMerge(self, diffs):

974 """Reorder and merge like edit sections. Merge equalities.

975 Any edit section can move as long as it doesn't cross an equality.

976

977 Args:

978 diffs: Array of diff tuples.

979 """

980 diffs.append((self.DIFF_EQUAL, "")) # Add a dummy entry at the end.

981 pointer = 0

982 count_delete = 0

983 count_insert = 0

984 text_delete = ""

985 text_insert = ""

986 while pointer < len(diffs):

987 if diffs[pointer][0] == self.DIFF_INSERT:

988 count_insert += 1

989 text_insert += diffs[pointer][1]

990 pointer += 1

991 elif diffs[pointer][0] == self.DIFF_DELETE:

992 count_delete += 1

993 text_delete += diffs[pointer][1]

994 pointer += 1

995 elif diffs[pointer][0] == self.DIFF_EQUAL:

996 # Upon reaching an equality, check for prior redundancies.

997 if count_delete + count_insert > 1:

998 if count_delete != 0 and count_insert != 0:

999 # Factor out any common prefixies.

1000 commonlength = self.diff_commonPrefix(text_insert, text_delete)

1001 if commonlength != 0:

1002 x = pointer - count_delete - count_insert - 1

1003 if x >= 0 and diffs[x][0] == self.DIFF_EQUAL:

1004 diffs[x] = (

1005 diffs[x][0],

1006 diffs[x][1] + text_insert[:commonlength],

1007 )

1008 else:

1009 diffs.insert(

1010 0, (self.DIFF_EQUAL, text_insert[:commonlength])

1011 )

1012 pointer += 1

1013 text_insert = text_insert[commonlength:]

1014 text_delete = text_delete[commonlength:]

1015 # Factor out any common suffixies.

1016 commonlength = self.diff_commonSuffix(text_insert, text_delete)

1017 if commonlength != 0:

1018 diffs[pointer] = (

1019 diffs[pointer][0],

1020 text_insert[-commonlength:] + diffs[pointer][1],

1021 )

1022 text_insert = text_insert[:-commonlength]

1023 text_delete = text_delete[:-commonlength]

1024 # Delete the offending records and add the merged ones.

1025 new_ops = []

1026 if len(text_delete) != 0:

1027 new_ops.append((self.DIFF_DELETE, text_delete))

1028 if len(text_insert) != 0:

1029 new_ops.append((self.DIFF_INSERT, text_insert))

1030 pointer -= count_delete + count_insert

1031 diffs[pointer : pointer + count_delete + count_insert] = new_ops

1032 pointer += len(new_ops) + 1

1033 elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL:

1034 # Merge this equality with the previous one.

1035 diffs[pointer - 1] = (

1036 diffs[pointer - 1][0],

1037 diffs[pointer - 1][1] + diffs[pointer][1],

1038 )

1039 del diffs[pointer]

1040 else:

1041 pointer += 1

1042

1043 count_insert = 0

1044 count_delete = 0

1045 text_delete = ""

1046 text_insert = ""

1047

1048 if diffs[-1][1] == "":

1049 diffs.pop() # Remove the dummy entry at the end.

1050

1051 # Second pass: look for single edits surrounded on both sides by equalities

1052 # which can be shifted sideways to eliminate an equality.

1053 # e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC

1054 changes = False

1055 pointer = 1

1056 # Intentionally ignore the first and last element (don't need checking).

1057 while pointer < len(diffs) - 1:

1058 if (

1059 diffs[pointer - 1][0] == self.DIFF_EQUAL

1060 and diffs[pointer + 1][0] == self.DIFF_EQUAL

1061 ):

1062 # This is a single edit surrounded by equalities.

1063 if diffs[pointer][1].endswith(diffs[pointer - 1][1]):

1064 # Shift the edit over the previous equality.

1065 if diffs[pointer - 1][1] != "":

1066 diffs[pointer] = (

1067 diffs[pointer][0],

1068 diffs[pointer - 1][1]

1069 + diffs[pointer][1][: -len(diffs[pointer - 1][1])],

1070 )

1071 diffs[pointer + 1] = (

1072 diffs[pointer + 1][0],

1073 diffs[pointer - 1][1] + diffs[pointer + 1][1],

1074 )

1075 del diffs[pointer - 1]

1076 changes = True

1077 elif diffs[pointer][1].startswith(diffs[pointer + 1][1]):

1078 # Shift the edit over the next equality.

1079 diffs[pointer - 1] = (

1080 diffs[pointer - 1][0],

1081 diffs[pointer - 1][1] + diffs[pointer + 1][1],

1082 )

1083 diffs[pointer] = (

1084 diffs[pointer][0],

1085 diffs[pointer][1][len(diffs[pointer + 1][1]) :]

1086 + diffs[pointer + 1][1],

1087 )

1088 del diffs[pointer + 1]

1089 changes = True

1090 pointer += 1

1091

1092 # If shifts were made, the diff needs reordering and another shift sweep.

1093 if changes:

1094 self.diff_cleanupMerge(diffs)

1095

1096 def diff_xIndex(self, diffs, loc):

1097 """loc is a location in text1, compute and return the equivalent location

1098 in text2. e.g. "The cat" vs "The big cat", 1->1, 5->8

1099

1100 Args:

1101 diffs: Array of diff tuples.

1102 loc: Location within text1.

1103

1104 Returns:

1105 Location within text2.

1106 """

1107 chars1 = 0

1108 chars2 = 0

1109 last_chars1 = 0

1110 last_chars2 = 0

1111 for x in range(len(diffs)):

1112 (op, text) = diffs[x]

1113 if op != self.DIFF_INSERT: # Equality or deletion.

1114 chars1 += len(text)

1115 if op != self.DIFF_DELETE: # Equality or insertion.

1116 chars2 += len(text)

1117 if chars1 > loc: # Overshot the location.

1118 break

1119 last_chars1 = chars1

1120 last_chars2 = chars2

1121

1122 if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE:

1123 # The location was deleted.

1124 return last_chars2

1125 # Add the remaining len(character).

1126 return last_chars2 + (loc - last_chars1)

1127

1128 def diff_prettyHtml(self, diffs):

1129 """Convert a diff array into a pretty HTML report.

1130

1131 Args:

1132 diffs: Array of diff tuples.

1133

1134 Returns:

1135 HTML representation.

1136 """

1137 html = []

1138 for (op, data) in diffs:

1139 text = (

1140 data.replace("&", "&")

1141 .replace("<", "<")

1142 .replace(">", ">")

1143 .replace("\n", "¶<br>")

1144 )

1145 if op == self.DIFF_INSERT:

1146 html.append('<ins style="background:#e6ffe6;">%s</ins>' % text)

1147 elif op == self.DIFF_DELETE:

1148 html.append('<del style="background:#ffe6e6;">%s</del>' % text)

1149 elif op == self.DIFF_EQUAL:

1150 html.append("<span>%s</span>" % text)

1151 return "".join(html)

1152

1153 def diff_text1(self, diffs):

1154 """Compute and return the source text (all equalities and deletions).

1155

1156 Args:

1157 diffs: Array of diff tuples.

1158

1159 Returns:

1160 Source text.

1161 """

1162 text = []

1163 for (op, data) in diffs:

1164 if op != self.DIFF_INSERT:

1165 text.append(data)

1166 return "".join(text)

1167

1168 def diff_text2(self, diffs):

1169 """Compute and return the destination text (all equalities and insertions).

1170

1171 Args:

1172 diffs: Array of diff tuples.

1173

1174 Returns:

1175 Destination text.

1176 """

1177 text = []

1178 for (op, data) in diffs:

1179 if op != self.DIFF_DELETE:

1180 text.append(data)

1181 return "".join(text)

1182

1183 def diff_levenshtein(self, diffs):

1184 """Compute the Levenshtein distance; the number of inserted, deleted or

1185 substituted characters.

1186

1187 Args:

1188 diffs: Array of diff tuples.

1189

1190 Returns:

1191 Number of changes.

1192 """

1193 levenshtein = 0

1194 insertions = 0

1195 deletions = 0

1196 for (op, data) in diffs:

1197 if op == self.DIFF_INSERT:

1198 insertions += len(data)

1199 elif op == self.DIFF_DELETE:

1200 deletions += len(data)

1201 elif op == self.DIFF_EQUAL:

1202 # A deletion and an insertion is one substitution.

1203 levenshtein += max(insertions, deletions)

1204 insertions = 0

1205 deletions = 0

1206 levenshtein += max(insertions, deletions)

1207 return levenshtein

1208

1209 def diff_toDelta(self, diffs):

1210 """Crush the diff into an encoded string which describes the operations

1211 required to transform text1 into text2.

1212 E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'.

1213 Operations are tab-separated. Inserted text is escaped using %xx notation.

1214

1215 Args:

1216 diffs: Array of diff tuples.

1217

1218 Returns:

1219 Delta text.

1220 """

1221 text = []

1222 for (op, data) in diffs:

1223 if op == self.DIFF_INSERT:

1224 # High ascii will raise UnicodeDecodeError. Use Unicode instead.

1225 data = data.encode("utf-8")

1226 text.append("+" + urllib.parse.quote(data, "!~*'();/?:@&=+$,# "))

1227 elif op == self.DIFF_DELETE:

1228 text.append("-%d" % len(data))

1229 elif op == self.DIFF_EQUAL:

1230 text.append("=%d" % len(data))

1231 return "\t".join(text)

1232

1233 def diff_fromDelta(self, text1, delta):

1234 """Given the original text1, and an encoded string which describes the

1235 operations required to transform text1 into text2, compute the full diff.

1236

1237 Args:

1238 text1: Source string for the diff.

1239 delta: Delta text.

1240

1241 Returns:

1242 Array of diff tuples.

1243

1244 Raises:

1245 ValueError: If invalid input.

1246 """

1247 diffs = []

1248 pointer = 0 # Cursor in text1

1249 tokens = delta.split("\t")

1250 for token in tokens:

1251 if token == "":

1252 # Blank tokens are ok (from a trailing \t).

1253 continue

1254 # Each token begins with a one character parameter which specifies the

1255 # operation of this token (delete, insert, equality).

1256 param = token[1:]

1257 if token[0] == "+":

1258 param = urllib.parse.unquote(param)

1259 diffs.append((self.DIFF_INSERT, param))

1260 elif token[0] == "-" or token[0] == "=":

1261 try:

1262 n = int(param)

1263 except ValueError:

1264 raise ValueError("Invalid number in diff_fromDelta: " + param)

1265 if n < 0:

1266 raise ValueError("Negative number in diff_fromDelta: " + param)

1267 text = text1[pointer : pointer + n]

1268 pointer += n

1269 if token[0] == "=":

1270 diffs.append((self.DIFF_EQUAL, text))

1271 else:

1272 diffs.append((self.DIFF_DELETE, text))

1273 else:

1274 # Anything else is an error.

1275 raise ValueError(

1276 "Invalid diff operation in diff_fromDelta: " + token[0]

1277 )

1278 if pointer != len(text1):

1279 raise ValueError(

1280 "Delta length (%d) does not equal source text length (%d)."

1281 % (pointer, len(text1))

1282 )

1283 return diffs

1284

1285 # MATCH FUNCTIONS

1286

1287 def match_main(self, text, pattern, loc):

1288 """Locate the best instance of 'pattern' in 'text' near 'loc'.

1289

1290 Args:

1291 text: The text to search.

1292 pattern: The pattern to search for.

1293 loc: The location to search around.

1294

1295 Returns:

1296 Best match index or -1.

1297 """

1298 # Check for null inputs.

1299 if text == None or pattern == None:

1300 raise ValueError("Null inputs. (match_main)")

1301

1302 loc = max(0, min(loc, len(text)))

1303 if text == pattern:

1304 # Shortcut (potentially not guaranteed by the algorithm)

1305 return 0

1306 elif not text:

1307 # Nothing to match.

1308 return -1

1309 elif text[loc : loc + len(pattern)] == pattern:

1310 # Perfect match at the perfect spot! (Includes case of null pattern)

1311 return loc

1312 else:

1313 # Do a fuzzy compare.

1314 match = self.match_bitap(text, pattern, loc)

1315 return match

1316

1317 def match_bitap(self, text, pattern, loc):

1318 """Locate the best instance of 'pattern' in 'text' near 'loc' using the

1319 Bitap algorithm.

1320

1321 Args:

1322 text: The text to search.

1323 pattern: The pattern to search for.

1324 loc: The location to search around.

1325

1326 Returns:

1327 Best match index or -1.

1328 """

1329 # Python doesn't have a maxint limit, so ignore this check.

1330 # if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits:

1331 # raise ValueError("Pattern too long for this application.")

1332

1333 # Initialise the alphabet.

1334 s = self.match_alphabet(pattern)

1335

1336 def match_bitapScore(e, x):

1337 """Compute and return the score for a match with e errors and x location.

1338 Accesses loc and pattern through being a closure.

1339

1340 Args:

1341 e: Number of errors in match.

1342 x: Location of match.

1343

1344 Returns:

1345 Overall score for match (0.0 = good, 1.0 = bad).

1346 """

1347 accuracy = float(e) / len(pattern)

1348 proximity = abs(loc - x)

1349 if not self.Match_Distance:

1350 # Dodge divide by zero error.

1351 return proximity and 1.0 or accuracy

1352 return accuracy + (proximity / float(self.Match_Distance))

1353

1354 # Highest score beyond which we give up.

1355 score_threshold = self.Match_Threshold

1356 # Is there a nearby exact match? (speedup)

1357 best_loc = text.find(pattern, loc)

1358 if best_loc != -1:

1359 score_threshold = min(match_bitapScore(0, best_loc), score_threshold)

1360 # What about in the other direction? (speedup)

1361 best_loc = text.rfind(pattern, loc + len(pattern))

1362 if best_loc != -1:

1363 score_threshold = min(match_bitapScore(0, best_loc), score_threshold)

1364

1365 # Initialise the bit arrays.

1366 matchmask = 1 << (len(pattern) - 1)

1367 best_loc = -1

1368

1369 bin_max = len(pattern) + len(text)

1370 # Empty initialization added to appease pychecker.

1371 last_rd = None

1372 for d in range(len(pattern)):

1373 # Scan for the best match each iteration allows for one more error.

1374 # Run a binary search to determine how far from 'loc' we can stray at

1375 # this error level.

1376 bin_min = 0

1377 bin_mid = bin_max

1378 while bin_min < bin_mid:

1379 if match_bitapScore(d, loc + bin_mid) <= score_threshold:

1380 bin_min = bin_mid

1381 else:

1382 bin_max = bin_mid

1383 bin_mid = (bin_max - bin_min) // 2 + bin_min

1384

1385 # Use the result from this iteration as the maximum for the next.

1386 bin_max = bin_mid

1387 start = max(1, loc - bin_mid + 1)

1388 finish = min(loc + bin_mid, len(text)) + len(pattern)

1389

1390 rd = [0] * (finish + 2)

1391 rd[finish + 1] = (1 << d) - 1

1392 for j in range(finish, start - 1, -1):

1393 if len(text) <= j - 1:

1394 # Out of range.

1395 charMatch = 0

1396 else:

1397 charMatch = s.get(text[j - 1], 0)

1398 if d == 0: # First pass: exact match.

1399 rd[j] = ((rd[j + 1] << 1) | 1) & charMatch

1400 else: # Subsequent passes: fuzzy match.

1401 rd[j] = (

1402 (((rd[j + 1] << 1) | 1) & charMatch)

1403 | (((last_rd[j + 1] | last_rd[j]) << 1) | 1)

1404 | last_rd[j + 1]

1405 )

1406 if rd[j] & matchmask:

1407 score = match_bitapScore(d, j - 1)

1408 # This match will almost certainly be better than any existing match.

1409 # But check anyway.

1410 if score <= score_threshold:

1411 # Told you so.

1412 score_threshold = score

1413 best_loc = j - 1

1414 if best_loc > loc:

1415 # When passing loc, don't exceed our current distance from loc.

1416 start = max(1, 2 * loc - best_loc)

1417 else:

1418 # Already passed loc, downhill from here on in.

1419 break

1420 # No hope for a (better) match at greater error levels.

1421 if match_bitapScore(d + 1, loc) > score_threshold:

1422 break

1423 last_rd = rd

1424 return best_loc

1425

1426 def match_alphabet(self, pattern):

1427 """Initialise the alphabet for the Bitap algorithm.

1428

1429 Args:

1430 pattern: The text to encode.

1431

1432 Returns:

1433 Hash of character locations.

1434 """

1435 s = {}

1436 for char in pattern:

1437 s[char] = 0

1438 for i in range(len(pattern)):

1439 s[pattern[i]] |= 1 << (len(pattern) - i - 1)

1440 return s

1441

1442 # PATCH FUNCTIONS

1443

1444 def patch_addContext(self, patch, text):

1445 """Increase the context until it is unique,

1446 but don't let the pattern expand beyond Match_MaxBits.

1447

1448 Args:

1449 patch: The patch to grow.

1450 text: Source text.

1451 """

1452 if len(text) == 0:

1453 return

1454 pattern = text[patch.start2 : patch.start2 + patch.length1]

1455 padding = 0

1456

1457 # Look for the first and last matches of pattern in text. If two different

1458 # matches are found, increase the pattern length.

1459 while text.find(pattern) != text.rfind(pattern) and (

1460 self.Match_MaxBits == 0

1461 or len(pattern) < self.Match_MaxBits - self.Patch_Margin - self.Patch_Margin

1462 ):

1463 padding += self.Patch_Margin

1464 pattern = text[

1465 max(0, patch.start2 - padding) : patch.start2 + patch.length1 + padding

1466 ]

1467 # Add one chunk for good luck.

1468 padding += self.Patch_Margin

1469

1470 # Add the prefix.

1471 prefix = text[max(0, patch.start2 - padding) : patch.start2]

1472 if prefix:

1473 patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)]

1474 # Add the suffix.

1475 suffix = text[

1476 patch.start2 + patch.length1 : patch.start2 + patch.length1 + padding

1477 ]

1478 if suffix:

1479 patch.diffs.append((self.DIFF_EQUAL, suffix))

1480

1481 # Roll back the start points.

1482 patch.start1 -= len(prefix)

1483 patch.start2 -= len(prefix)

1484 # Extend lengths.

1485 patch.length1 += len(prefix) + len(suffix)

1486 patch.length2 += len(prefix) + len(suffix)

1487

1488 def patch_make(self, a, b=None, c=None):

1489 """Compute a list of patches to turn text1 into text2.

1490 Use diffs if provided, otherwise compute it ourselves.

1491 There are four ways to call this function, depending on what data is

1492 available to the caller:

1493 Method 1:

1494 a = text1, b = text2

1495 Method 2:

1496 a = diffs

1497 Method 3 (optimal):

1498 a = text1, b = diffs

1499 Method 4 (deprecated, use method 3):

1500 a = text1, b = text2, c = diffs

1501

1502 Args:

1503 a: text1 (methods 1,3,4) or Array of diff tuples for text1 to

1504 text2 (method 2).

1505 b: text2 (methods 1,4) or Array of diff tuples for text1 to

1506 text2 (method 3) or undefined (method 2).

1507 c: Array of diff tuples for text1 to text2 (method 4) or

1508 undefined (methods 1,2,3).

1509

1510 Returns:

1511 Array of Patch objects.

1512 """

1513 text1 = None

1514 diffs = None

1515 if isinstance(a, str) and isinstance(b, str) and c is None:

1516 # Method 1: text1, text2

1517 # Compute diffs from text1 and text2.

1518 text1 = a

1519 diffs = self.diff_main(text1, b, True)

1520 if len(diffs) > 2:

1521 self.diff_cleanupSemantic(diffs)

1522 self.diff_cleanupEfficiency(diffs)

1523 elif isinstance(a, list) and b is None and c is None:

1524 # Method 2: diffs

1525 # Compute text1 from diffs.

1526 diffs = a

1527 text1 = self.diff_text1(diffs)

1528 elif isinstance(a, str) and isinstance(b, list) and c is None:

1529 # Method 3: text1, diffs

1530 text1 = a

1531 diffs = b

1532 elif isinstance(a, str) and isinstance(b, str) and isinstance(c, list):

1533 # Method 4: text1, text2, diffs

1534 # text2 is not used.

1535 text1 = a

1536 diffs = c

1537 else:

1538 raise ValueError("Unknown call format to patch_make.")

1539

1540 if not diffs:

1541 return [] # Get rid of the None case.

1542 patches = []

1543 patch = patch_obj()

1544 char_count1 = 0 # Number of characters into the text1 string.

1545 char_count2 = 0 # Number of characters into the text2 string.

1546 prepatch_text = text1 # Recreate the patches to determine context info.

1547 postpatch_text = text1

1548 for x in range(len(diffs)):

1549 (diff_type, diff_text) = diffs[x]

1550 if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL:

1551 # A new patch starts here.

1552 patch.start1 = char_count1

1553 patch.start2 = char_count2

1554 if diff_type == self.DIFF_INSERT:

1555 # Insertion

1556 patch.diffs.append(diffs[x])

1557 patch.length2 += len(diff_text)

1558 postpatch_text = (

1559 postpatch_text[:char_count2]

1560 + diff_text

1561 + postpatch_text[char_count2:]

1562 )

1563 elif diff_type == self.DIFF_DELETE:

1564 # Deletion.

1565 patch.length1 += len(diff_text)

1566 patch.diffs.append(diffs[x])

1567 postpatch_text = (

1568 postpatch_text[:char_count2]

1569 + postpatch_text[char_count2 + len(diff_text) :]

1570 )

1571 elif (

1572 diff_type == self.DIFF_EQUAL

1573 and len(diff_text) <= 2 * self.Patch_Margin

1574 and len(patch.diffs) != 0

1575 and len(diffs) != x + 1

1576 ):

1577 # Small equality inside a patch.

1578 patch.diffs.append(diffs[x])

1579 patch.length1 += len(diff_text)

1580 patch.length2 += len(diff_text)

1581

1582 if diff_type == self.DIFF_EQUAL and len(diff_text) >= 2 * self.Patch_Margin:

1583 # Time for a new patch.

1584 if len(patch.diffs) != 0:

1585 self.patch_addContext(patch, prepatch_text)

1586 patches.append(patch)

1587 patch = patch_obj()

1588 # Unlike Unidiff, our patch lists have a rolling context.

1589 # https://github.com/google/diff-match-patch/wiki/Unidiff

1590 # Update prepatch text & pos to reflect the application of the

1591 # just completed patch.

1592 prepatch_text = postpatch_text

1593 char_count1 = char_count2

1594

1595 # Update the current character count.

1596 if diff_type != self.DIFF_INSERT:

1597 char_count1 += len(diff_text)

1598 if diff_type != self.DIFF_DELETE:

1599 char_count2 += len(diff_text)

1600

1601 # Pick up the leftover patch if not empty.

1602 if len(patch.diffs) != 0:

1603 self.patch_addContext(patch, prepatch_text)

1604 patches.append(patch)

1605 return patches

1606

1607 def patch_deepCopy(self, patches):

1608 """Given an array of patches, return another array that is identical.

1609

1610 Args:

1611 patches: Array of Patch objects.

1612

1613 Returns:

1614 Array of Patch objects.

1615 """

1616 patchesCopy = []

1617 for patch in patches:

1618 patchCopy = patch_obj()

1619 # No need to deep copy the tuples since they are immutable.

1620 patchCopy.diffs = patch.diffs[:]

1621 patchCopy.start1 = patch.start1

1622 patchCopy.start2 = patch.start2

1623 patchCopy.length1 = patch.length1

1624 patchCopy.length2 = patch.length2

1625 patchesCopy.append(patchCopy)

1626 return patchesCopy

1627

1628 def patch_apply(self, patches, text):

1629 """Merge a set of patches onto the text. Return a patched text, as well

1630 as a list of true/false values indicating which patches were applied.

1631

1632 Args:

1633 patches: Array of Patch objects.

1634 text: Old text.

1635

1636 Returns:

1637 Two element Array, containing the new text and an array of boolean values.

1638 """

1639 if not patches:

1640 return (text, [])

1641

1642 # Deep copy the patches so that no changes are made to originals.

1643 patches = self.patch_deepCopy(patches)

1644

1645 nullPadding = self.patch_addPadding(patches)

1646 text = nullPadding + text + nullPadding

1647 self.patch_splitMax(patches)

1648

1649 # delta keeps track of the offset between the expected and actual location

1650 # of the previous patch. If there are patches expected at positions 10 and

1651 # 20, but the first patch was found at 12, delta is 2 and the second patch

1652 # has an effective expected position of 22.

1653 delta = 0

1654 results = []

1655 for patch in patches:

1656 expected_loc = patch.start2 + delta

1657 text1 = self.diff_text1(patch.diffs)

1658 end_loc = -1

1659 if len(text1) > self.Match_MaxBits:

1660 # patch_splitMax will only provide an oversized pattern in the case of

1661 # a monster delete.

1662 start_loc = self.match_main(

1663 text, text1[: self.Match_MaxBits], expected_loc

1664 )

1665 if start_loc != -1:

1666 end_loc = self.match_main(

1667 text,

1668 text1[-self.Match_MaxBits :],

1669 expected_loc + len(text1) - self.Match_MaxBits,

1670 )

1671 if end_loc == -1 or start_loc >= end_loc:

1672 # Can't find valid trailing context. Drop this patch.

1673 start_loc = -1

1674 else:

1675 start_loc = self.match_main(text, text1, expected_loc)

1676 if start_loc == -1:

1677 # No match found. :(

1678 results.append(False)

1679 # Subtract the delta for this failed patch from subsequent patches.

1680 delta -= patch.length2 - patch.length1

1681 else:

1682 # Found a match. :)

1683 results.append(True)

1684 delta = start_loc - expected_loc

1685 if end_loc == -1:

1686 text2 = text[start_loc : start_loc + len(text1)]

1687 else:

1688 text2 = text[start_loc : end_loc + self.Match_MaxBits]

1689 if text1 == text2:

1690 # Perfect match, just shove the replacement text in.

1691 text = (

1692 text[:start_loc]

1693 + self.diff_text2(patch.diffs)

1694 + text[start_loc + len(text1) :]

1695 )

1696 else:

1697 # Imperfect match.

1698 # Run a diff to get a framework of equivalent indices.

1699 diffs = self.diff_main(text1, text2, False)

1700 if (

1701 len(text1) > self.Match_MaxBits

1702 and self.diff_levenshtein(diffs) / float(len(text1))

1703 > self.Patch_DeleteThreshold

1704 ):

1705 # The end points match, but the content is unacceptably bad.

1706 results[-1] = False

1707 else:

1708 self.diff_cleanupSemanticLossless(diffs)

1709 index1 = 0

1710 for (op, data) in patch.diffs:

1711 if op != self.DIFF_EQUAL:

1712 index2 = self.diff_xIndex(diffs, index1)

1713 if op == self.DIFF_INSERT: # Insertion

1714 text = (

1715 text[: start_loc + index2]

1716 + data

1717 + text[start_loc + index2 :]

1718 )

1719 elif op == self.DIFF_DELETE: # Deletion

1720 text = (

1721 text[: start_loc + index2]

1722 + text[

1723 start_loc

1724 + self.diff_xIndex(diffs, index1 + len(data)) :

1725 ]

1726 )

1727 if op != self.DIFF_DELETE:

1728 index1 += len(data)

1729 # Strip the padding off.

1730 text = text[len(nullPadding) : -len(nullPadding)]

1731 return (text, results)

1732

1733 def patch_addPadding(self, patches):

1734 """Add some padding on text start and end so that edges can match

1735 something. Intended to be called only from within patch_apply.

1736

1737 Args:

1738 patches: Array of Patch objects.

1739

1740 Returns:

1741 The padding string added to each side.

1742 """

1743 paddingLength = self.Patch_Margin

1744 nullPadding = ""

1745 for x in range(1, paddingLength + 1):

1746 nullPadding += chr(x)

1747

1748 # Bump all the patches forward.

1749 for patch in patches:

1750 patch.start1 += paddingLength

1751 patch.start2 += paddingLength

1752

1753 # Add some padding on start of first diff.

1754 patch = patches[0]

1755 diffs = patch.diffs

1756 if not diffs or diffs[0][0] != self.DIFF_EQUAL:

1757 # Add nullPadding equality.

1758 diffs.insert(0, (self.DIFF_EQUAL, nullPadding))

1759 patch.start1 -= paddingLength # Should be 0.

1760 patch.start2 -= paddingLength # Should be 0.

1761 patch.length1 += paddingLength

1762 patch.length2 += paddingLength

1763 elif paddingLength > len(diffs[0][1]):

1764 # Grow first equality.

1765 extraLength = paddingLength - len(diffs[0][1])

1766 newText = nullPadding[len(diffs[0][1]) :] + diffs[0][1]

1767 diffs[0] = (diffs[0][0], newText)

1768 patch.start1 -= extraLength

1769 patch.start2 -= extraLength

1770 patch.length1 += extraLength

1771 patch.length2 += extraLength

1772

1773 # Add some padding on end of last diff.

1774 patch = patches[-1]

1775 diffs = patch.diffs

1776 if not diffs or diffs[-1][0] != self.DIFF_EQUAL:

1777 # Add nullPadding equality.

1778 diffs.append((self.DIFF_EQUAL, nullPadding))

1779 patch.length1 += paddingLength

1780 patch.length2 += paddingLength

1781 elif paddingLength > len(diffs[-1][1]):

1782 # Grow last equality.

1783 extraLength = paddingLength - len(diffs[-1][1])

1784 newText = diffs[-1][1] + nullPadding[:extraLength]

1785 diffs[-1] = (diffs[-1][0], newText)

1786 patch.length1 += extraLength

1787 patch.length2 += extraLength

1788

1789 return nullPadding

1790

1791 def patch_splitMax(self, patches):

1792 """Look through the patches and break up any which are longer than the

1793 maximum limit of the match algorithm.

1794 Intended to be called only from within patch_apply.

1795

1796 Args:

1797 patches: Array of Patch objects.

1798 """

1799 patch_size = self.Match_MaxBits

1800 if patch_size == 0:

1801 # Python has the option of not splitting strings due to its ability

1802 # to handle integers of arbitrary precision.

1803 return

1804 for x in range(len(patches)):

1805 if patches[x].length1 <= patch_size:

1806 continue

1807 bigpatch = patches[x]

1808 # Remove the big old patch.

1809 del patches[x]

1810 x -= 1

1811 start1 = bigpatch.start1

1812 start2 = bigpatch.start2

1813 precontext = ""

1814 while len(bigpatch.diffs) != 0:

1815 # Create one of several smaller patches.

1816 patch = patch_obj()

1817 empty = True

1818 patch.start1 = start1 - len(precontext)

1819 patch.start2 = start2 - len(precontext)

1820 if precontext:

1821 patch.length1 = patch.length2 = len(precontext)

1822 patch.diffs.append((self.DIFF_EQUAL, precontext))

1823

1824 while (

1825 len(bigpatch.diffs) != 0

1826 and patch.length1 < patch_size - self.Patch_Margin

1827 ):

1828 (diff_type, diff_text) = bigpatch.diffs[0]

1829 if diff_type == self.DIFF_INSERT:

1830 # Insertions are harmless.

1831 patch.length2 += len(diff_text)

1832 start2 += len(diff_text)

1833 patch.diffs.append(bigpatch.diffs.pop(0))

1834 empty = False

1835 elif (

1836 diff_type == self.DIFF_DELETE

1837 and len(patch.diffs) == 1

1838 and patch.diffs[0][0] == self.DIFF_EQUAL

1839 and len(diff_text) > 2 * patch_size

1840 ):

1841 # This is a large deletion. Let it pass in one chunk.

1842 patch.length1 += len(diff_text)

1843 start1 += len(diff_text)

1844 empty = False

1845 patch.diffs.append((diff_type, diff_text))

1846 del bigpatch.diffs[0]

1847 else:

1848 # Deletion or equality. Only take as much as we can stomach.

1849 diff_text = diff_text[

1850 : patch_size - patch.length1 - self.Patch_Margin

1851 ]

1852 patch.length1 += len(diff_text)

1853 start1 += len(diff_text)

1854 if diff_type == self.DIFF_EQUAL:

1855 patch.length2 += len(diff_text)

1856 start2 += len(diff_text)

1857 else:

1858 empty = False

1859

1860 patch.diffs.append((diff_type, diff_text))

1861 if diff_text == bigpatch.diffs[0][1]:

1862 del bigpatch.diffs[0]

1863 else:

1864 bigpatch.diffs[0] = (

1865 bigpatch.diffs[0][0],

1866 bigpatch.diffs[0][1][len(diff_text) :],

1867 )

1868

1869 # Compute the head context for the next patch.

1870 precontext = self.diff_text2(patch.diffs)

1871 precontext = precontext[-self.Patch_Margin :]

1872 # Append the end context for this patch.

1873 postcontext = self.diff_text1(bigpatch.diffs)[: self.Patch_Margin]

1874 if postcontext:

1875 patch.length1 += len(postcontext)

1876 patch.length2 += len(postcontext)

1877 if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:

1878 patch.diffs[-1] = (

1879 self.DIFF_EQUAL,

1880 patch.diffs[-1][1] + postcontext,

1881 )

1882 else:

1883 patch.diffs.append((self.DIFF_EQUAL, postcontext))

1884

1885 if not empty:

1886 x += 1

1887 patches.insert(x, patch)

1888

1889 def patch_toText(self, patches):

1890 """Take a list of patches and return a textual representation.

1891

1892 Args:

1893 patches: Array of Patch objects.

1894

1895 Returns:

1896 Text representation of patches.

1897 """

1898 text = []

1899 for patch in patches:

1900 text.append(str(patch))

1901 return "".join(text)

1902

1903 def patch_fromText(self, textline):

1904 """Parse a textual representation of patches and return a list of patch

1905 objects.

1906

1907 Args:

1908 textline: Text representation of patches.

1909

1910 Returns:

1911 Array of Patch objects.

1912

1913 Raises:

1914 ValueError: If invalid input.

1915 """

1916 patches = []

1917 if not textline:

1918 return patches

1919 text = textline.split("\n")

1920 while len(text) != 0:

1921 m = re.match(r"^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0])

1922 if not m:

1923 raise ValueError("Invalid patch string: " + text[0])

1924 patch = patch_obj()

1925 patches.append(patch)

1926 patch.start1 = int(m.group(1))

1927 if m.group(2) == "":

1928 patch.start1 -= 1

1929 patch.length1 = 1

1930 elif m.group(2) == "0":

1931 patch.length1 = 0

1932 else:

1933 patch.start1 -= 1

1934 patch.length1 = int(m.group(2))

1935

1936 patch.start2 = int(m.group(3))

1937 if m.group(4) == "":

1938 patch.start2 -= 1

1939 patch.length2 = 1

1940 elif m.group(4) == "0":

1941 patch.length2 = 0

1942 else:

1943 patch.start2 -= 1

1944 patch.length2 = int(m.group(4))

1945

1946 del text[0]

1947

1948 while len(text) != 0:

1949 if text[0]:

1950 sign = text[0][0]

1951 else:

1952 sign = ""

1953 line = urllib.parse.unquote(text[0][1:])

1954 if sign == "+":

1955 # Insertion.

1956 patch.diffs.append((self.DIFF_INSERT, line))

1957 elif sign == "-":

1958 # Deletion.

1959 patch.diffs.append((self.DIFF_DELETE, line))

1960 elif sign == " ":

1961 # Minor equality.

1962 patch.diffs.append((self.DIFF_EQUAL, line))

1963 elif sign == "@":

1964 # Start of next patch.

1965 break

1966 elif sign == "":

1967 # Blank line? Whatever.

1968 pass

1969 else:

1970 # WTF?

1971 raise ValueError("Invalid patch mode: '%s'\n%s" % (sign, line))

1972 del text[0]

1973 return patches

1974

1975

1976class patch_obj:

1977 """Class representing one patch operation.

1978 """

1979

1980 def __init__(self):

1981 """Initializes with an empty list of diffs.

1982 """

1983 self.diffs = []

1984 self.start1 = None

1985 self.start2 = None

1986 self.length1 = 0

1987 self.length2 = 0

1988

1989 def __str__(self):

1990 """Emulate GNU diff's format.

1991 Header: @@ -382,8 +481,9 @@

1992 Indices are printed as 1-based, not 0-based.

1993

1994 Returns:

1995 The GNU diff string.

1996 """

1997 if self.length1 == 0:

1998 coords1 = str(self.start1) + ",0"

1999 elif self.length1 == 1:

2000 coords1 = str(self.start1 + 1)

2001 else:

2002 coords1 = str(self.start1 + 1) + "," + str(self.length1)

2003 if self.length2 == 0:

2004 coords2 = str(self.start2) + ",0"

2005 elif self.length2 == 1:

2006 coords2 = str(self.start2 + 1)

2007 else:

2008 coords2 = str(self.start2 + 1) + "," + str(self.length2)

2009 text = ["@@ -", coords1, " +", coords2, " @@\n"]

2010 # Escape the body of the patch with %xx notation.

2011 for (op, data) in self.diffs:

2012 if op == diff_match_patch.DIFF_INSERT:

2013 text.append("+")

2014 elif op == diff_match_patch.DIFF_DELETE:

2015 text.append("-")

2016 elif op == diff_match_patch.DIFF_EQUAL:

2017 text.append(" ")

2018 # High ascii will raise UnicodeDecodeError. Use Unicode instead.

2019 data = data.encode("utf-8")

2020 text.append(urllib.parse.quote(data, "!~*'();/?:@&=+$,# ") + "\n")

2021 return "".join(text)

Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/diff_match_patch/diff_match_patch.py: 4%

972 statements