Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/git/index/base.py: 12%
492 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1# index.py
2# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
3#
4# This module is part of GitPython and is released under
5# the BSD License: http://www.opensource.org/licenses/bsd-license.php
7import glob
8from io import BytesIO
9import os
10from stat import S_ISLNK
11import subprocess
12import tempfile
14from git.compat import (
15 force_bytes,
16 defenc,
17)
18from git.exc import GitCommandError, CheckoutError, GitError, InvalidGitRepositoryError
19from git.objects import (
20 Blob,
21 Submodule,
22 Tree,
23 Object,
24 Commit,
25)
26from git.objects.util import Serializable
27from git.util import (
28 LazyMixin,
29 LockedFD,
30 join_path_native,
31 file_contents_ro,
32 to_native_path_linux,
33 unbare_repo,
34 to_bin_sha,
35)
36from gitdb.base import IStream
37from gitdb.db import MemoryDB
39import git.diff as git_diff
40import os.path as osp
42from .fun import (
43 entry_key,
44 write_cache,
45 read_cache,
46 aggressive_tree_merge,
47 write_tree_from_cache,
48 stat_mode_to_index_mode,
49 S_IFGITLINK,
50 run_commit_hook,
51)
52from .typ import (
53 BaseIndexEntry,
54 IndexEntry,
55 StageType,
56)
57from .util import TemporaryFileSwap, post_clear_cache, default_index, git_working_dir
59# typing -----------------------------------------------------------------------------
61from typing import (
62 Any,
63 BinaryIO,
64 Callable,
65 Dict,
66 IO,
67 Iterable,
68 Iterator,
69 List,
70 NoReturn,
71 Sequence,
72 TYPE_CHECKING,
73 Tuple,
74 Type,
75 Union,
76)
78from git.types import Commit_ish, PathLike
80if TYPE_CHECKING: 80 ↛ 81line 80 didn't jump to line 81, because the condition on line 80 was never true
81 from subprocess import Popen
82 from git.repo import Repo
83 from git.refs.reference import Reference
84 from git.util import Actor
87Treeish = Union[Tree, Commit, str, bytes]
89# ------------------------------------------------------------------------------------
92__all__ = ("IndexFile", "CheckoutError", "StageType")
95class IndexFile(LazyMixin, git_diff.Diffable, Serializable):
97 """
98 Implements an Index that can be manipulated using a native implementation in
99 order to save git command function calls wherever possible.
101 It provides custom merging facilities allowing to merge without actually changing
102 your index or your working tree. This way you can perform own test-merges based
103 on the index only without having to deal with the working copy. This is useful
104 in case of partial working trees.
106 ``Entries``
108 The index contains an entries dict whose keys are tuples of type IndexEntry
109 to facilitate access.
111 You may read the entries dict or manipulate it using IndexEntry instance, i.e.::
113 index.entries[index.entry_key(index_entry_instance)] = index_entry_instance
115 Make sure you use index.write() once you are done manipulating the index directly
116 before operating on it using the git command"""
118 __slots__ = ("repo", "version", "entries", "_extension_data", "_file_path")
119 _VERSION = 2 # latest version we support
120 S_IFGITLINK = S_IFGITLINK # a submodule
122 def __init__(self, repo: "Repo", file_path: Union[PathLike, None] = None) -> None:
123 """Initialize this Index instance, optionally from the given ``file_path``.
124 If no file_path is given, we will be created from the current index file.
126 If a stream is not given, the stream will be initialized from the current
127 repository's index on demand."""
128 self.repo = repo
129 self.version = self._VERSION
130 self._extension_data = b""
131 self._file_path: PathLike = file_path or self._index_path()
133 def _set_cache_(self, attr: str) -> None:
134 if attr == "entries":
135 try:
136 fd = os.open(self._file_path, os.O_RDONLY)
137 except OSError:
138 # in new repositories, there may be no index, which means we are empty
139 self.entries: Dict[Tuple[PathLike, StageType], IndexEntry] = {}
140 return None
141 # END exception handling
143 try:
144 stream = file_contents_ro(fd, stream=True, allow_mmap=True)
145 finally:
146 os.close(fd)
148 self._deserialize(stream)
149 else:
150 super(IndexFile, self)._set_cache_(attr)
152 def _index_path(self) -> PathLike:
153 if self.repo.git_dir:
154 return join_path_native(self.repo.git_dir, "index")
155 else:
156 raise GitCommandError("No git directory given to join index path")
158 @property
159 def path(self) -> PathLike:
160 """:return: Path to the index file we are representing"""
161 return self._file_path
163 def _delete_entries_cache(self) -> None:
164 """Safely clear the entries cache so it can be recreated"""
165 try:
166 del self.entries
167 except AttributeError:
168 # fails in python 2.6.5 with this exception
169 pass
170 # END exception handling
172 # { Serializable Interface
174 def _deserialize(self, stream: IO) -> "IndexFile":
175 """Initialize this instance with index values read from the given stream"""
176 self.version, self.entries, self._extension_data, _conten_sha = read_cache(stream)
177 return self
179 def _entries_sorted(self) -> List[IndexEntry]:
180 """:return: list of entries, in a sorted fashion, first by path, then by stage"""
181 return sorted(self.entries.values(), key=lambda e: (e.path, e.stage))
183 def _serialize(self, stream: IO, ignore_extension_data: bool = False) -> "IndexFile":
184 entries = self._entries_sorted()
185 extension_data = self._extension_data # type: Union[None, bytes]
186 if ignore_extension_data:
187 extension_data = None
188 write_cache(entries, stream, extension_data)
189 return self
191 # } END serializable interface
193 def write(
194 self,
195 file_path: Union[None, PathLike] = None,
196 ignore_extension_data: bool = False,
197 ) -> None:
198 """Write the current state to our file path or to the given one
200 :param file_path:
201 If None, we will write to our stored file path from which we have
202 been initialized. Otherwise we write to the given file path.
203 Please note that this will change the file_path of this index to
204 the one you gave.
206 :param ignore_extension_data:
207 If True, the TREE type extension data read in the index will not
208 be written to disk. NOTE that no extension data is actually written.
209 Use this if you have altered the index and
210 would like to use git-write-tree afterwards to create a tree
211 representing your written changes.
212 If this data is present in the written index, git-write-tree
213 will instead write the stored/cached tree.
214 Alternatively, use IndexFile.write_tree() to handle this case
215 automatically
217 :return: self # does it? or returns None?"""
218 # make sure we have our entries read before getting a write lock
219 # else it would be done when streaming. This can happen
220 # if one doesn't change the index, but writes it right away
221 self.entries
222 lfd = LockedFD(file_path or self._file_path)
223 stream = lfd.open(write=True, stream=True)
225 ok = False
226 try:
227 self._serialize(stream, ignore_extension_data)
228 ok = True
229 finally:
230 if not ok:
231 lfd.rollback()
233 lfd.commit()
235 # make sure we represent what we have written
236 if file_path is not None:
237 self._file_path = file_path
239 @post_clear_cache
240 @default_index
241 def merge_tree(self, rhs: Treeish, base: Union[None, Treeish] = None) -> "IndexFile":
242 """Merge the given rhs treeish into the current index, possibly taking
243 a common base treeish into account.
245 As opposed to the :func:`IndexFile.from_tree` method, this allows you to use an already
246 existing tree as the left side of the merge
248 :param rhs:
249 treeish reference pointing to the 'other' side of the merge.
251 :param base:
252 optional treeish reference pointing to the common base of 'rhs' and
253 this index which equals lhs
255 :return:
256 self ( containing the merge and possibly unmerged entries in case of
257 conflicts )
259 :raise GitCommandError:
260 If there is a merge conflict. The error will
261 be raised at the first conflicting path. If you want to have proper
262 merge resolution to be done by yourself, you have to commit the changed
263 index ( or make a valid tree from it ) and retry with a three-way
264 index.from_tree call."""
265 # -i : ignore working tree status
266 # --aggressive : handle more merge cases
267 # -m : do an actual merge
268 args: List[Union[Treeish, str]] = ["--aggressive", "-i", "-m"]
269 if base is not None:
270 args.append(base)
271 args.append(rhs)
273 self.repo.git.read_tree(args)
274 return self
276 @classmethod
277 def new(cls, repo: "Repo", *tree_sha: Union[str, Tree]) -> "IndexFile":
278 """Merge the given treeish revisions into a new index which is returned.
279 This method behaves like git-read-tree --aggressive when doing the merge.
281 :param repo: The repository treeish are located in.
283 :param tree_sha:
284 20 byte or 40 byte tree sha or tree objects
286 :return:
287 New IndexFile instance. Its path will be undefined.
288 If you intend to write such a merged Index, supply an alternate file_path
289 to its 'write' method."""
290 tree_sha_bytes: List[bytes] = [to_bin_sha(str(t)) for t in tree_sha]
291 base_entries = aggressive_tree_merge(repo.odb, tree_sha_bytes)
293 inst = cls(repo)
294 # convert to entries dict
295 entries: Dict[Tuple[PathLike, int], IndexEntry] = dict(
296 zip(
297 ((e.path, e.stage) for e in base_entries),
298 (IndexEntry.from_base(e) for e in base_entries),
299 )
300 )
302 inst.entries = entries
303 return inst
305 @classmethod
306 def from_tree(cls, repo: "Repo", *treeish: Treeish, **kwargs: Any) -> "IndexFile":
307 """Merge the given treeish revisions into a new index which is returned.
308 The original index will remain unaltered
310 :param repo:
311 The repository treeish are located in.
313 :param treeish:
314 One, two or three Tree Objects, Commits or 40 byte hexshas. The result
315 changes according to the amount of trees.
316 If 1 Tree is given, it will just be read into a new index
317 If 2 Trees are given, they will be merged into a new index using a
318 two way merge algorithm. Tree 1 is the 'current' tree, tree 2 is the 'other'
319 one. It behaves like a fast-forward.
320 If 3 Trees are given, a 3-way merge will be performed with the first tree
321 being the common ancestor of tree 2 and tree 3. Tree 2 is the 'current' tree,
322 tree 3 is the 'other' one
324 :param kwargs:
325 Additional arguments passed to git-read-tree
327 :return:
328 New IndexFile instance. It will point to a temporary index location which
329 does not exist anymore. If you intend to write such a merged Index, supply
330 an alternate file_path to its 'write' method.
332 :note:
333 In the three-way merge case, --aggressive will be specified to automatically
334 resolve more cases in a commonly correct manner. Specify trivial=True as kwarg
335 to override that.
337 As the underlying git-read-tree command takes into account the current index,
338 it will be temporarily moved out of the way to assure there are no unsuspected
339 interferences."""
340 if len(treeish) == 0 or len(treeish) > 3:
341 raise ValueError("Please specify between 1 and 3 treeish, got %i" % len(treeish))
343 arg_list: List[Union[Treeish, str]] = []
344 # ignore that working tree and index possibly are out of date
345 if len(treeish) > 1:
346 # drop unmerged entries when reading our index and merging
347 arg_list.append("--reset")
348 # handle non-trivial cases the way a real merge does
349 arg_list.append("--aggressive")
350 # END merge handling
352 # tmp file created in git home directory to be sure renaming
353 # works - /tmp/ dirs could be on another device
354 tmp_index = tempfile.mktemp("", "", repo.git_dir)
355 arg_list.append("--index-output=%s" % tmp_index)
356 arg_list.extend(treeish)
358 # move current index out of the way - otherwise the merge may fail
359 # as it considers existing entries. moving it essentially clears the index.
360 # Unfortunately there is no 'soft' way to do it.
361 # The TemporaryFileSwap assure the original file get put back
362 if repo.git_dir:
363 index_handler = TemporaryFileSwap(join_path_native(repo.git_dir, "index"))
364 try:
365 repo.git.read_tree(*arg_list, **kwargs)
366 index = cls(repo, tmp_index)
367 index.entries # force it to read the file as we will delete the temp-file
368 del index_handler # release as soon as possible
369 finally:
370 if osp.exists(tmp_index):
371 os.remove(tmp_index)
372 # END index merge handling
374 return index
376 # UTILITIES
377 @unbare_repo
378 def _iter_expand_paths(self: "IndexFile", paths: Sequence[PathLike]) -> Iterator[PathLike]:
379 """Expand the directories in list of paths to the corresponding paths accordingly,
381 Note: git will add items multiple times even if a glob overlapped
382 with manually specified paths or if paths where specified multiple
383 times - we respect that and do not prune"""
385 def raise_exc(e: Exception) -> NoReturn:
386 raise e
388 r = str(self.repo.working_tree_dir)
389 rs = r + os.sep
390 for path in paths:
391 abs_path = str(path)
392 if not osp.isabs(abs_path):
393 abs_path = osp.join(r, path)
394 # END make absolute path
396 try:
397 st = os.lstat(abs_path) # handles non-symlinks as well
398 except OSError:
399 # the lstat call may fail as the path may contain globs as well
400 pass
401 else:
402 if S_ISLNK(st.st_mode):
403 yield abs_path.replace(rs, "")
404 continue
405 # end check symlink
407 # if the path is not already pointing to an existing file, resolve globs if possible
408 if not os.path.exists(abs_path) and ("?" in abs_path or "*" in abs_path or "[" in abs_path):
409 resolved_paths = glob.glob(abs_path)
410 # not abs_path in resolved_paths:
411 # a glob() resolving to the same path we are feeding it with
412 # is a glob() that failed to resolve. If we continued calling
413 # ourselves we'd endlessly recurse. If the condition below
414 # evaluates to true then we are likely dealing with a file
415 # whose name contains wildcard characters.
416 if abs_path not in resolved_paths:
417 for f in self._iter_expand_paths(glob.glob(abs_path)):
418 yield str(f).replace(rs, "")
419 continue
420 # END glob handling
421 try:
422 for root, _dirs, files in os.walk(abs_path, onerror=raise_exc):
423 for rela_file in files:
424 # add relative paths only
425 yield osp.join(root.replace(rs, ""), rela_file)
426 # END for each file in subdir
427 # END for each subdirectory
428 except OSError:
429 # was a file or something that could not be iterated
430 yield abs_path.replace(rs, "")
431 # END path exception handling
432 # END for each path
434 def _write_path_to_stdin(
435 self,
436 proc: "Popen",
437 filepath: PathLike,
438 item: PathLike,
439 fmakeexc: Callable[..., GitError],
440 fprogress: Callable[[PathLike, bool, PathLike], None],
441 read_from_stdout: bool = True,
442 ) -> Union[None, str]:
443 """Write path to proc.stdin and make sure it processes the item, including progress.
445 :return: stdout string
446 :param read_from_stdout: if True, proc.stdout will be read after the item
447 was sent to stdin. In that case, it will return None
448 :note: There is a bug in git-update-index that prevents it from sending
449 reports just in time. This is why we have a version that tries to
450 read stdout and one which doesn't. In fact, the stdout is not
451 important as the piped-in files are processed anyway and just in time
452 :note: Newlines are essential here, gits behaviour is somewhat inconsistent
453 on this depending on the version, hence we try our best to deal with
454 newlines carefully. Usually the last newline will not be sent, instead
455 we will close stdin to break the pipe."""
457 fprogress(filepath, False, item)
458 rval: Union[None, str] = None
460 if proc.stdin is not None:
461 try:
462 proc.stdin.write(("%s\n" % filepath).encode(defenc))
463 except IOError as e:
464 # pipe broke, usually because some error happened
465 raise fmakeexc() from e
466 # END write exception handling
467 proc.stdin.flush()
469 if read_from_stdout and proc.stdout is not None:
470 rval = proc.stdout.readline().strip()
471 fprogress(filepath, True, item)
472 return rval
474 def iter_blobs( 474 ↛ exitline 474 didn't jump to the function exit
475 self, predicate: Callable[[Tuple[StageType, Blob]], bool] = lambda t: True
476 ) -> Iterator[Tuple[StageType, Blob]]:
477 """
478 :return: Iterator yielding tuples of Blob objects and stages, tuple(stage, Blob)
480 :param predicate:
481 Function(t) returning True if tuple(stage, Blob) should be yielded by the
482 iterator. A default filter, the BlobFilter, allows you to yield blobs
483 only if they match a given list of paths."""
484 for entry in self.entries.values():
485 blob = entry.to_blob(self.repo)
486 blob.size = entry.size
487 output = (entry.stage, blob)
488 if predicate(output):
489 yield output
490 # END for each entry
492 def unmerged_blobs(self) -> Dict[PathLike, List[Tuple[StageType, Blob]]]:
493 """
494 :return:
495 Dict(path : list( tuple( stage, Blob, ...))), being
496 a dictionary associating a path in the index with a list containing
497 sorted stage/blob pairs
500 :note:
501 Blobs that have been removed in one side simply do not exist in the
502 given stage. I.e. a file removed on the 'other' branch whose entries
503 are at stage 3 will not have a stage 3 entry.
504 """
505 is_unmerged_blob = lambda t: t[0] != 0
506 path_map: Dict[PathLike, List[Tuple[StageType, Blob]]] = {}
507 for stage, blob in self.iter_blobs(is_unmerged_blob):
508 path_map.setdefault(blob.path, []).append((stage, blob))
509 # END for each unmerged blob
510 for line in path_map.values():
511 line.sort()
513 return path_map
515 @classmethod
516 def entry_key(cls, *entry: Union[BaseIndexEntry, PathLike, StageType]) -> Tuple[PathLike, StageType]:
517 return entry_key(*entry)
519 def resolve_blobs(self, iter_blobs: Iterator[Blob]) -> "IndexFile":
520 """Resolve the blobs given in blob iterator. This will effectively remove the
521 index entries of the respective path at all non-null stages and add the given
522 blob as new stage null blob.
524 For each path there may only be one blob, otherwise a ValueError will be raised
525 claiming the path is already at stage 0.
527 :raise ValueError: if one of the blobs already existed at stage 0
528 :return: self
530 :note:
531 You will have to write the index manually once you are done, i.e.
532 index.resolve_blobs(blobs).write()
533 """
534 for blob in iter_blobs:
535 stage_null_key = (blob.path, 0)
536 if stage_null_key in self.entries:
537 raise ValueError("Path %r already exists at stage 0" % str(blob.path))
538 # END assert blob is not stage 0 already
540 # delete all possible stages
541 for stage in (1, 2, 3):
542 try:
543 del self.entries[(blob.path, stage)]
544 except KeyError:
545 pass
546 # END ignore key errors
547 # END for each possible stage
549 self.entries[stage_null_key] = IndexEntry.from_blob(blob)
550 # END for each blob
552 return self
554 def update(self) -> "IndexFile":
555 """Reread the contents of our index file, discarding all cached information
556 we might have.
558 :note: This is a possibly dangerious operations as it will discard your changes
559 to index.entries
560 :return: self"""
561 self._delete_entries_cache()
562 # allows to lazily reread on demand
563 return self
565 def write_tree(self) -> Tree:
566 """Writes this index to a corresponding Tree object into the repository's
567 object database and return it.
569 :return: Tree object representing this index
570 :note: The tree will be written even if one or more objects the tree refers to
571 does not yet exist in the object database. This could happen if you added
572 Entries to the index directly.
573 :raise ValueError: if there are no entries in the cache
574 :raise UnmergedEntriesError:"""
575 # we obtain no lock as we just flush our contents to disk as tree
576 # If we are a new index, the entries access will load our data accordingly
577 mdb = MemoryDB()
578 entries = self._entries_sorted()
579 binsha, tree_items = write_tree_from_cache(entries, mdb, slice(0, len(entries)))
581 # copy changed trees only
582 mdb.stream_copy(mdb.sha_iter(), self.repo.odb)
584 # note: additional deserialization could be saved if write_tree_from_cache
585 # would return sorted tree entries
586 root_tree = Tree(self.repo, binsha, path="")
587 root_tree._cache = tree_items
588 return root_tree
590 def _process_diff_args(
591 self, # type: ignore[override]
592 args: List[Union[str, "git_diff.Diffable", Type["git_diff.Diffable.Index"]]],
593 ) -> List[Union[str, "git_diff.Diffable", Type["git_diff.Diffable.Index"]]]:
594 try:
595 args.pop(args.index(self))
596 except IndexError:
597 pass
598 # END remove self
599 return args
601 def _to_relative_path(self, path: PathLike) -> PathLike:
602 """
603 :return: Version of path relative to our git directory or raise ValueError
604 if it is not within our git directory"""
605 if not osp.isabs(path):
606 return path
607 if self.repo.bare:
608 raise InvalidGitRepositoryError("require non-bare repository")
609 if not str(path).startswith(str(self.repo.working_tree_dir)):
610 raise ValueError("Absolute path %r is not in git repository at %r" % (path, self.repo.working_tree_dir))
611 return os.path.relpath(path, self.repo.working_tree_dir)
613 def _preprocess_add_items(
614 self, items: Sequence[Union[PathLike, Blob, BaseIndexEntry, "Submodule"]]
615 ) -> Tuple[List[PathLike], List[BaseIndexEntry]]:
616 """Split the items into two lists of path strings and BaseEntries."""
617 paths = []
618 entries = []
619 # if it is a string put in list
620 if isinstance(items, (str, os.PathLike)):
621 items = [items]
623 for item in items:
624 if isinstance(item, (str, os.PathLike)):
625 paths.append(self._to_relative_path(item))
626 elif isinstance(item, (Blob, Submodule)):
627 entries.append(BaseIndexEntry.from_blob(item))
628 elif isinstance(item, BaseIndexEntry):
629 entries.append(item)
630 else:
631 raise TypeError("Invalid Type: %r" % item)
632 # END for each item
633 return paths, entries
635 def _store_path(self, filepath: PathLike, fprogress: Callable) -> BaseIndexEntry:
636 """Store file at filepath in the database and return the base index entry
637 Needs the git_working_dir decorator active ! This must be assured in the calling code"""
638 st = os.lstat(filepath) # handles non-symlinks as well
639 if S_ISLNK(st.st_mode):
640 # in PY3, readlink is string, but we need bytes. In PY2, it's just OS encoded bytes, we assume UTF-8
641 open_stream: Callable[[], BinaryIO] = lambda: BytesIO(force_bytes(os.readlink(filepath), encoding=defenc))
642 else:
643 open_stream = lambda: open(filepath, "rb")
644 with open_stream() as stream:
645 fprogress(filepath, False, filepath)
646 istream = self.repo.odb.store(IStream(Blob.type, st.st_size, stream))
647 fprogress(filepath, True, filepath)
648 return BaseIndexEntry(
649 (
650 stat_mode_to_index_mode(st.st_mode),
651 istream.binsha,
652 0,
653 to_native_path_linux(filepath),
654 )
655 )
657 @unbare_repo
658 @git_working_dir
659 def _entries_for_paths(
660 self,
661 paths: List[str],
662 path_rewriter: Callable,
663 fprogress: Callable,
664 entries: List[BaseIndexEntry],
665 ) -> List[BaseIndexEntry]:
666 entries_added: List[BaseIndexEntry] = []
667 if path_rewriter:
668 for path in paths:
669 if osp.isabs(path):
670 abspath = path
671 gitrelative_path = path[len(str(self.repo.working_tree_dir)) + 1 :]
672 else:
673 gitrelative_path = path
674 if self.repo.working_tree_dir:
675 abspath = osp.join(self.repo.working_tree_dir, gitrelative_path)
676 # end obtain relative and absolute paths
678 blob = Blob(
679 self.repo,
680 Blob.NULL_BIN_SHA,
681 stat_mode_to_index_mode(os.stat(abspath).st_mode),
682 to_native_path_linux(gitrelative_path),
683 )
684 # TODO: variable undefined
685 entries.append(BaseIndexEntry.from_blob(blob))
686 # END for each path
687 del paths[:]
688 # END rewrite paths
690 # HANDLE PATHS
691 assert len(entries_added) == 0
692 for filepath in self._iter_expand_paths(paths):
693 entries_added.append(self._store_path(filepath, fprogress))
694 # END for each filepath
695 # END path handling
696 return entries_added
698 def add( 698 ↛ exitline 698 didn't jump to the function exit
699 self,
700 items: Sequence[Union[PathLike, Blob, BaseIndexEntry, "Submodule"]],
701 force: bool = True,
702 fprogress: Callable = lambda *args: None,
703 path_rewriter: Union[Callable[..., PathLike], None] = None,
704 write: bool = True,
705 write_extension_data: bool = False,
706 ) -> List[BaseIndexEntry]:
707 """Add files from the working tree, specific blobs or BaseIndexEntries
708 to the index.
710 :param items:
711 Multiple types of items are supported, types can be mixed within one call.
712 Different types imply a different handling. File paths may generally be
713 relative or absolute.
715 - path string
716 strings denote a relative or absolute path into the repository pointing to
717 an existing file, i.e. CHANGES, lib/myfile.ext, '/home/gitrepo/lib/myfile.ext'.
719 Absolute paths must start with working tree directory of this index's repository
720 to be considered valid. For example, if it was initialized with a non-normalized path, like
721 `/root/repo/../repo`, absolute paths to be added must start with `/root/repo/../repo`.
723 Paths provided like this must exist. When added, they will be written
724 into the object database.
726 PathStrings may contain globs, such as 'lib/__init__*' or can be directories
727 like 'lib', the latter ones will add all the files within the directory and
728 subdirectories.
730 This equals a straight git-add.
732 They are added at stage 0
734 - Blob or Submodule object
735 Blobs are added as they are assuming a valid mode is set.
736 The file they refer to may or may not exist in the file system, but
737 must be a path relative to our repository.
739 If their sha is null ( 40*0 ), their path must exist in the file system
740 relative to the git repository as an object will be created from
741 the data at the path.
742 The handling now very much equals the way string paths are processed, except that
743 the mode you have set will be kept. This allows you to create symlinks
744 by settings the mode respectively and writing the target of the symlink
745 directly into the file. This equals a default Linux-Symlink which
746 is not dereferenced automatically, except that it can be created on
747 filesystems not supporting it as well.
749 Please note that globs or directories are not allowed in Blob objects.
751 They are added at stage 0
753 - BaseIndexEntry or type
754 Handling equals the one of Blob objects, but the stage may be
755 explicitly set. Please note that Index Entries require binary sha's.
757 :param force:
758 **CURRENTLY INEFFECTIVE**
759 If True, otherwise ignored or excluded files will be
760 added anyway.
761 As opposed to the git-add command, we enable this flag by default
762 as the API user usually wants the item to be added even though
763 they might be excluded.
765 :param fprogress:
766 Function with signature f(path, done=False, item=item) called for each
767 path to be added, one time once it is about to be added where done==False
768 and once after it was added where done=True.
769 item is set to the actual item we handle, either a Path or a BaseIndexEntry
770 Please note that the processed path is not guaranteed to be present
771 in the index already as the index is currently being processed.
773 :param path_rewriter:
774 Function with signature (string) func(BaseIndexEntry) function returning a path
775 for each passed entry which is the path to be actually recorded for the
776 object created from entry.path. This allows you to write an index which
777 is not identical to the layout of the actual files on your hard-disk.
778 If not None and ``items`` contain plain paths, these paths will be
779 converted to Entries beforehand and passed to the path_rewriter.
780 Please note that entry.path is relative to the git repository.
782 :param write:
783 If True, the index will be written once it was altered. Otherwise
784 the changes only exist in memory and are not available to git commands.
786 :param write_extension_data:
787 If True, extension data will be written back to the index. This can lead to issues in case
788 it is containing the 'TREE' extension, which will cause the `git commit` command to write an
789 old tree, instead of a new one representing the now changed index.
790 This doesn't matter if you use `IndexFile.commit()`, which ignores the `TREE` extension altogether.
791 You should set it to True if you intend to use `IndexFile.commit()` exclusively while maintaining
792 support for third-party extensions. Besides that, you can usually safely ignore the built-in
793 extensions when using GitPython on repositories that are not handled manually at all.
794 All current built-in extensions are listed here:
795 http://opensource.apple.com/source/Git/Git-26/src/git-htmldocs/technical/index-format.txt
797 :return:
798 List(BaseIndexEntries) representing the entries just actually added.
800 :raise OSError:
801 if a supplied Path did not exist. Please note that BaseIndexEntry
802 Objects that do not have a null sha will be added even if their paths
803 do not exist.
804 """
805 # sort the entries into strings and Entries, Blobs are converted to entries
806 # automatically
807 # paths can be git-added, for everything else we use git-update-index
808 paths, entries = self._preprocess_add_items(items)
809 entries_added: List[BaseIndexEntry] = []
810 # This code needs a working tree, therefore we try not to run it unless required.
811 # That way, we are OK on a bare repository as well.
812 # If there are no paths, the rewriter has nothing to do either
813 if paths:
814 entries_added.extend(self._entries_for_paths(paths, path_rewriter, fprogress, entries))
816 # HANDLE ENTRIES
817 if entries:
818 null_mode_entries = [e for e in entries if e.mode == 0]
819 if null_mode_entries:
820 raise ValueError(
821 "At least one Entry has a null-mode - please use index.remove to remove files for clarity"
822 )
823 # END null mode should be remove
825 # HANDLE ENTRY OBJECT CREATION
826 # create objects if required, otherwise go with the existing shas
827 null_entries_indices = [i for i, e in enumerate(entries) if e.binsha == Object.NULL_BIN_SHA]
828 if null_entries_indices:
830 @git_working_dir
831 def handle_null_entries(self: "IndexFile") -> None:
832 for ei in null_entries_indices:
833 null_entry = entries[ei]
834 new_entry = self._store_path(null_entry.path, fprogress)
836 # update null entry
837 entries[ei] = BaseIndexEntry(
838 (
839 null_entry.mode,
840 new_entry.binsha,
841 null_entry.stage,
842 null_entry.path,
843 )
844 )
845 # END for each entry index
847 # end closure
848 handle_null_entries(self)
849 # END null_entry handling
851 # REWRITE PATHS
852 # If we have to rewrite the entries, do so now, after we have generated
853 # all object sha's
854 if path_rewriter:
855 for i, e in enumerate(entries):
856 entries[i] = BaseIndexEntry((e.mode, e.binsha, e.stage, path_rewriter(e)))
857 # END for each entry
858 # END handle path rewriting
860 # just go through the remaining entries and provide progress info
861 for i, entry in enumerate(entries):
862 progress_sent = i in null_entries_indices
863 if not progress_sent:
864 fprogress(entry.path, False, entry)
865 fprogress(entry.path, True, entry)
866 # END handle progress
867 # END for each entry
868 entries_added.extend(entries)
869 # END if there are base entries
871 # FINALIZE
872 # add the new entries to this instance
873 for entry in entries_added:
874 self.entries[(entry.path, 0)] = IndexEntry.from_base(entry)
876 if write:
877 self.write(ignore_extension_data=not write_extension_data)
878 # END handle write
880 return entries_added
882 def _items_to_rela_paths(
883 self,
884 items: Union[PathLike, Sequence[Union[PathLike, BaseIndexEntry, Blob, Submodule]]],
885 ) -> List[PathLike]:
886 """Returns a list of repo-relative paths from the given items which
887 may be absolute or relative paths, entries or blobs"""
888 paths = []
889 # if string put in list
890 if isinstance(items, (str, os.PathLike)):
891 items = [items]
893 for item in items:
894 if isinstance(item, (BaseIndexEntry, (Blob, Submodule))):
895 paths.append(self._to_relative_path(item.path))
896 elif isinstance(item, str):
897 paths.append(self._to_relative_path(item))
898 else:
899 raise TypeError("Invalid item type: %r" % item)
900 # END for each item
901 return paths
903 @post_clear_cache
904 @default_index
905 def remove(
906 self,
907 items: Sequence[Union[PathLike, Blob, BaseIndexEntry, "Submodule"]],
908 working_tree: bool = False,
909 **kwargs: Any,
910 ) -> List[str]:
911 """Remove the given items from the index and optionally from
912 the working tree as well.
914 :param items:
915 Multiple types of items are supported which may be be freely mixed.
917 - path string
918 Remove the given path at all stages. If it is a directory, you must
919 specify the r=True keyword argument to remove all file entries
920 below it. If absolute paths are given, they will be converted
921 to a path relative to the git repository directory containing
922 the working tree
924 The path string may include globs, such as \\*.c.
926 - Blob Object
927 Only the path portion is used in this case.
929 - BaseIndexEntry or compatible type
930 The only relevant information here Yis the path. The stage is ignored.
932 :param working_tree:
933 If True, the entry will also be removed from the working tree, physically
934 removing the respective file. This may fail if there are uncommitted changes
935 in it.
937 :param kwargs:
938 Additional keyword arguments to be passed to git-rm, such
939 as 'r' to allow recursive removal of
941 :return:
942 List(path_string, ...) list of repository relative paths that have
943 been removed effectively.
944 This is interesting to know in case you have provided a directory or
945 globs. Paths are relative to the repository."""
946 args = []
947 if not working_tree:
948 args.append("--cached")
949 args.append("--")
951 # preprocess paths
952 paths = self._items_to_rela_paths(items)
953 removed_paths = self.repo.git.rm(args, paths, **kwargs).splitlines()
955 # process output to gain proper paths
956 # rm 'path'
957 return [p[4:-1] for p in removed_paths]
959 @post_clear_cache
960 @default_index
961 def move(
962 self,
963 items: Sequence[Union[PathLike, Blob, BaseIndexEntry, "Submodule"]],
964 skip_errors: bool = False,
965 **kwargs: Any,
966 ) -> List[Tuple[str, str]]:
967 """Rename/move the items, whereas the last item is considered the destination of
968 the move operation. If the destination is a file, the first item ( of two )
969 must be a file as well. If the destination is a directory, it may be preceded
970 by one or more directories or files.
972 The working tree will be affected in non-bare repositories.
974 :parma items:
975 Multiple types of items are supported, please see the 'remove' method
976 for reference.
977 :param skip_errors:
978 If True, errors such as ones resulting from missing source files will
979 be skipped.
980 :param kwargs:
981 Additional arguments you would like to pass to git-mv, such as dry_run
982 or force.
984 :return:List(tuple(source_path_string, destination_path_string), ...)
985 A list of pairs, containing the source file moved as well as its
986 actual destination. Relative to the repository root.
988 :raise ValueError: If only one item was given
989 GitCommandError: If git could not handle your request"""
990 args = []
991 if skip_errors:
992 args.append("-k")
994 paths = self._items_to_rela_paths(items)
995 if len(paths) < 2:
996 raise ValueError("Please provide at least one source and one destination of the move operation")
998 was_dry_run = kwargs.pop("dry_run", kwargs.pop("n", None))
999 kwargs["dry_run"] = True
1001 # first execute rename in dryrun so the command tells us what it actually does
1002 # ( for later output )
1003 out = []
1004 mvlines = self.repo.git.mv(args, paths, **kwargs).splitlines()
1006 # parse result - first 0:n/2 lines are 'checking ', the remaining ones
1007 # are the 'renaming' ones which we parse
1008 for ln in range(int(len(mvlines) / 2), len(mvlines)):
1009 tokens = mvlines[ln].split(" to ")
1010 assert len(tokens) == 2, "Too many tokens in %s" % mvlines[ln]
1012 # [0] = Renaming x
1013 # [1] = y
1014 out.append((tokens[0][9:], tokens[1]))
1015 # END for each line to parse
1017 # either prepare for the real run, or output the dry-run result
1018 if was_dry_run:
1019 return out
1020 # END handle dryrun
1022 # now apply the actual operation
1023 kwargs.pop("dry_run")
1024 self.repo.git.mv(args, paths, **kwargs)
1026 return out
1028 def commit(
1029 self,
1030 message: str,
1031 parent_commits: Union[Commit_ish, None] = None,
1032 head: bool = True,
1033 author: Union[None, "Actor"] = None,
1034 committer: Union[None, "Actor"] = None,
1035 author_date: Union[str, None] = None,
1036 commit_date: Union[str, None] = None,
1037 skip_hooks: bool = False,
1038 ) -> Commit:
1039 """Commit the current default index file, creating a commit object.
1040 For more information on the arguments, see Commit.create_from_tree().
1042 :note: If you have manually altered the .entries member of this instance,
1043 don't forget to write() your changes to disk beforehand.
1044 Passing skip_hooks=True is the equivalent of using `-n`
1045 or `--no-verify` on the command line.
1046 :return: Commit object representing the new commit"""
1047 if not skip_hooks:
1048 run_commit_hook("pre-commit", self)
1050 self._write_commit_editmsg(message)
1051 run_commit_hook("commit-msg", self, self._commit_editmsg_filepath())
1052 message = self._read_commit_editmsg()
1053 self._remove_commit_editmsg()
1054 tree = self.write_tree()
1055 rval = Commit.create_from_tree(
1056 self.repo,
1057 tree,
1058 message,
1059 parent_commits,
1060 head,
1061 author=author,
1062 committer=committer,
1063 author_date=author_date,
1064 commit_date=commit_date,
1065 )
1066 if not skip_hooks:
1067 run_commit_hook("post-commit", self)
1068 return rval
1070 def _write_commit_editmsg(self, message: str) -> None:
1071 with open(self._commit_editmsg_filepath(), "wb") as commit_editmsg_file:
1072 commit_editmsg_file.write(message.encode(defenc))
1074 def _remove_commit_editmsg(self) -> None:
1075 os.remove(self._commit_editmsg_filepath())
1077 def _read_commit_editmsg(self) -> str:
1078 with open(self._commit_editmsg_filepath(), "rb") as commit_editmsg_file:
1079 return commit_editmsg_file.read().decode(defenc)
1081 def _commit_editmsg_filepath(self) -> str:
1082 return osp.join(self.repo.common_dir, "COMMIT_EDITMSG")
1084 def _flush_stdin_and_wait(cls, proc: "Popen[bytes]", ignore_stdout: bool = False) -> bytes:
1085 stdin_IO = proc.stdin
1086 if stdin_IO:
1087 stdin_IO.flush()
1088 stdin_IO.close()
1090 stdout = b""
1091 if not ignore_stdout and proc.stdout:
1092 stdout = proc.stdout.read()
1094 if proc.stdout:
1095 proc.stdout.close()
1096 proc.wait()
1097 return stdout
1099 @default_index
1100 def checkout( 1100 ↛ exitline 1100 didn't jump to the function exit
1101 self,
1102 paths: Union[None, Iterable[PathLike]] = None,
1103 force: bool = False,
1104 fprogress: Callable = lambda *args: None,
1105 **kwargs: Any,
1106 ) -> Union[None, Iterator[PathLike], Sequence[PathLike]]:
1107 """Checkout the given paths or all files from the version known to the index into
1108 the working tree.
1110 :note: Be sure you have written pending changes using the ``write`` method
1111 in case you have altered the enties dictionary directly
1113 :param paths:
1114 If None, all paths in the index will be checked out. Otherwise an iterable
1115 of relative or absolute paths or a single path pointing to files or directories
1116 in the index is expected.
1118 :param force:
1119 If True, existing files will be overwritten even if they contain local modifications.
1120 If False, these will trigger a CheckoutError.
1122 :param fprogress:
1123 see :func:`IndexFile.add` for signature and explanation.
1124 The provided progress information will contain None as path and item if no
1125 explicit paths are given. Otherwise progress information will be send
1126 prior and after a file has been checked out
1128 :param kwargs:
1129 Additional arguments to be passed to git-checkout-index
1131 :return:
1132 iterable yielding paths to files which have been checked out and are
1133 guaranteed to match the version stored in the index
1135 :raise exc.CheckoutError:
1136 If at least one file failed to be checked out. This is a summary,
1137 hence it will checkout as many files as it can anyway.
1138 If one of files or directories do not exist in the index
1139 ( as opposed to the original git command who ignores them ).
1140 Raise GitCommandError if error lines could not be parsed - this truly is
1141 an exceptional state
1143 .. note:: The checkout is limited to checking out the files in the
1144 index. Files which are not in the index anymore and exist in
1145 the working tree will not be deleted. This behaviour is fundamentally
1146 different to *head.checkout*, i.e. if you want git-checkout like behaviour,
1147 use head.checkout instead of index.checkout.
1148 """
1149 args = ["--index"]
1150 if force:
1151 args.append("--force")
1153 failed_files = []
1154 failed_reasons = []
1155 unknown_lines = []
1157 def handle_stderr(proc: "Popen[bytes]", iter_checked_out_files: Iterable[PathLike]) -> None:
1159 stderr_IO = proc.stderr
1160 if not stderr_IO:
1161 return None # return early if stderr empty
1162 else:
1163 stderr_bytes = stderr_IO.read()
1164 # line contents:
1165 stderr = stderr_bytes.decode(defenc)
1166 # git-checkout-index: this already exists
1167 endings = (
1168 " already exists",
1169 " is not in the cache",
1170 " does not exist at stage",
1171 " is unmerged",
1172 )
1173 for line in stderr.splitlines():
1174 if not line.startswith("git checkout-index: ") and not line.startswith("git-checkout-index: "):
1175 is_a_dir = " is a directory"
1176 unlink_issue = "unable to unlink old '"
1177 already_exists_issue = " already exists, no checkout" # created by entry.c:checkout_entry(...)
1178 if line.endswith(is_a_dir):
1179 failed_files.append(line[: -len(is_a_dir)])
1180 failed_reasons.append(is_a_dir)
1181 elif line.startswith(unlink_issue):
1182 failed_files.append(line[len(unlink_issue) : line.rfind("'")])
1183 failed_reasons.append(unlink_issue)
1184 elif line.endswith(already_exists_issue):
1185 failed_files.append(line[: -len(already_exists_issue)])
1186 failed_reasons.append(already_exists_issue)
1187 else:
1188 unknown_lines.append(line)
1189 continue
1190 # END special lines parsing
1192 for e in endings:
1193 if line.endswith(e):
1194 failed_files.append(line[20 : -len(e)])
1195 failed_reasons.append(e)
1196 break
1197 # END if ending matches
1198 # END for each possible ending
1199 # END for each line
1200 if unknown_lines:
1201 raise GitCommandError(("git-checkout-index",), 128, stderr)
1202 if failed_files:
1203 valid_files = list(set(iter_checked_out_files) - set(failed_files))
1204 raise CheckoutError(
1205 "Some files could not be checked out from the index due to local modifications",
1206 failed_files,
1207 valid_files,
1208 failed_reasons,
1209 )
1211 # END stderr handler
1213 if paths is None:
1214 args.append("--all")
1215 kwargs["as_process"] = 1
1216 fprogress(None, False, None)
1217 proc = self.repo.git.checkout_index(*args, **kwargs)
1218 proc.wait()
1219 fprogress(None, True, None)
1220 rval_iter = (e.path for e in self.entries.values())
1221 handle_stderr(proc, rval_iter)
1222 return rval_iter
1223 else:
1224 if isinstance(paths, str):
1225 paths = [paths]
1227 # make sure we have our entries loaded before we start checkout_index
1228 # which will hold a lock on it. We try to get the lock as well during
1229 # our entries initialization
1230 self.entries
1232 args.append("--stdin")
1233 kwargs["as_process"] = True
1234 kwargs["istream"] = subprocess.PIPE
1235 proc = self.repo.git.checkout_index(args, **kwargs)
1236 # FIXME: Reading from GIL!
1237 make_exc = lambda: GitCommandError(("git-checkout-index",) + tuple(args), 128, proc.stderr.read())
1238 checked_out_files: List[PathLike] = []
1240 for path in paths:
1241 co_path = to_native_path_linux(self._to_relative_path(path))
1242 # if the item is not in the index, it could be a directory
1243 path_is_directory = False
1245 try:
1246 self.entries[(co_path, 0)]
1247 except KeyError:
1248 folder = str(co_path)
1249 if not folder.endswith("/"):
1250 folder += "/"
1251 for entry in self.entries.values():
1252 if str(entry.path).startswith(folder):
1253 p = entry.path
1254 self._write_path_to_stdin(proc, p, p, make_exc, fprogress, read_from_stdout=False)
1255 checked_out_files.append(p)
1256 path_is_directory = True
1257 # END if entry is in directory
1258 # END for each entry
1259 # END path exception handlnig
1261 if not path_is_directory:
1262 self._write_path_to_stdin(proc, co_path, path, make_exc, fprogress, read_from_stdout=False)
1263 checked_out_files.append(co_path)
1264 # END path is a file
1265 # END for each path
1266 try:
1267 self._flush_stdin_and_wait(proc, ignore_stdout=True)
1268 except GitCommandError:
1269 # Without parsing stdout we don't know what failed.
1270 raise CheckoutError(
1271 "Some files could not be checked out from the index, probably because they didn't exist.",
1272 failed_files,
1273 [],
1274 failed_reasons,
1275 )
1277 handle_stderr(proc, checked_out_files)
1278 return checked_out_files
1279 # END paths handling
1281 @default_index
1282 def reset(
1283 self,
1284 commit: Union[Commit, "Reference", str] = "HEAD",
1285 working_tree: bool = False,
1286 paths: Union[None, Iterable[PathLike]] = None,
1287 head: bool = False,
1288 **kwargs: Any,
1289 ) -> "IndexFile":
1290 """Reset the index to reflect the tree at the given commit. This will not
1291 adjust our HEAD reference as opposed to HEAD.reset by default.
1293 :param commit:
1294 Revision, Reference or Commit specifying the commit we should represent.
1295 If you want to specify a tree only, use IndexFile.from_tree and overwrite
1296 the default index.
1298 :param working_tree:
1299 If True, the files in the working tree will reflect the changed index.
1300 If False, the working tree will not be touched
1301 Please note that changes to the working copy will be discarded without
1302 warning !
1304 :param head:
1305 If True, the head will be set to the given commit. This is False by default,
1306 but if True, this method behaves like HEAD.reset.
1308 :param paths: if given as an iterable of absolute or repository-relative paths,
1309 only these will be reset to their state at the given commit'ish.
1310 The paths need to exist at the commit, otherwise an exception will be
1311 raised.
1313 :param kwargs:
1314 Additional keyword arguments passed to git-reset
1316 .. note:: IndexFile.reset, as opposed to HEAD.reset, will not delete anyfiles
1317 in order to maintain a consistent working tree. Instead, it will just
1318 checkout the files according to their state in the index.
1319 If you want git-reset like behaviour, use *HEAD.reset* instead.
1321 :return: self"""
1322 # what we actually want to do is to merge the tree into our existing
1323 # index, which is what git-read-tree does
1324 new_inst = type(self).from_tree(self.repo, commit)
1325 if not paths:
1326 self.entries = new_inst.entries
1327 else:
1328 nie = new_inst.entries
1329 for path in paths:
1330 path = self._to_relative_path(path)
1331 try:
1332 key = entry_key(path, 0)
1333 self.entries[key] = nie[key]
1334 except KeyError:
1335 # if key is not in theirs, it musn't be in ours
1336 try:
1337 del self.entries[key]
1338 except KeyError:
1339 pass
1340 # END handle deletion keyerror
1341 # END handle keyerror
1342 # END for each path
1343 # END handle paths
1344 self.write()
1346 if working_tree:
1347 self.checkout(paths=paths, force=True)
1348 # END handle working tree
1350 if head:
1351 self.repo.head.set_commit(self.repo.commit(commit), logmsg="%s: Updating HEAD" % commit)
1352 # END handle head change
1354 return self
1356 # @ default_index, breaks typing for some reason, copied into function
1357 def diff(
1358 self, # type: ignore[override]
1359 other: Union[Type["git_diff.Diffable.Index"], "Tree", "Commit", str, None] = git_diff.Diffable.Index,
1360 paths: Union[PathLike, List[PathLike], Tuple[PathLike, ...], None] = None,
1361 create_patch: bool = False,
1362 **kwargs: Any,
1363 ) -> git_diff.DiffIndex:
1364 """Diff this index against the working copy or a Tree or Commit object
1366 For a documentation of the parameters and return values, see,
1367 Diffable.diff
1369 :note:
1370 Will only work with indices that represent the default git index as
1371 they have not been initialized with a stream.
1372 """
1374 # only run if we are the default repository index
1375 if self._file_path != self._index_path():
1376 raise AssertionError("Cannot call %r on indices that do not represent the default git index" % self.diff())
1377 # index against index is always empty
1378 if other is self.Index:
1379 return git_diff.DiffIndex()
1381 # index against anything but None is a reverse diff with the respective
1382 # item. Handle existing -R flags properly. Transform strings to the object
1383 # so that we can call diff on it
1384 if isinstance(other, str):
1385 other = self.repo.rev_parse(other)
1386 # END object conversion
1388 if isinstance(other, Object): # for Tree or Commit
1389 # invert the existing R flag
1390 cur_val = kwargs.get("R", False)
1391 kwargs["R"] = not cur_val
1392 return other.diff(self.Index, paths, create_patch, **kwargs)
1393 # END diff against other item handling
1395 # if other is not None here, something is wrong
1396 if other is not None:
1397 raise ValueError("other must be None, Diffable.Index, a Tree or Commit, was %r" % other)
1399 # diff against working copy - can be handled by superclass natively
1400 return super(IndexFile, self).diff(other, paths, create_patch, **kwargs)