Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/gitdb/db/pack.py: 20%

88 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors 

2# 

3# This module is part of GitDB and is released under 

4# the New BSD License: http://www.opensource.org/licenses/bsd-license.php 

5"""Module containing a database to deal with packs""" 

6from gitdb.db.base import ( 

7 FileDBBase, 

8 ObjectDBR, 

9 CachingDB 

10) 

11 

12from gitdb.util import LazyMixin 

13 

14from gitdb.exc import ( 

15 BadObject, 

16 UnsupportedOperation, 

17 AmbiguousObjectName 

18) 

19 

20from gitdb.pack import PackEntity 

21 

22from functools import reduce 

23 

24import os 

25import glob 

26 

27__all__ = ('PackedDB', ) 

28 

29#{ Utilities 

30 

31 

32class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): 

33 

34 """A database operating on a set of object packs""" 

35 

36 # sort the priority list every N queries 

37 # Higher values are better, performance tests don't show this has 

38 # any effect, but it should have one 

39 _sort_interval = 500 

40 

41 def __init__(self, root_path): 

42 super().__init__(root_path) 

43 # list of lists with three items: 

44 # * hits - number of times the pack was hit with a request 

45 # * entity - Pack entity instance 

46 # * sha_to_index - PackIndexFile.sha_to_index method for direct cache query 

47 # self._entities = list() # lazy loaded list 

48 self._hit_count = 0 # amount of hits 

49 self._st_mtime = 0 # last modification data of our root path 

50 

51 def _set_cache_(self, attr): 

52 if attr == '_entities': 

53 self._entities = list() 

54 self.update_cache(force=True) 

55 # END handle entities initialization 

56 

57 def _sort_entities(self): 

58 self._entities.sort(key=lambda l: l[0], reverse=True) 

59 

60 def _pack_info(self, sha): 

61 """:return: tuple(entity, index) for an item at the given sha 

62 :param sha: 20 or 40 byte sha 

63 :raise BadObject: 

64 **Note:** This method is not thread-safe, but may be hit in multi-threaded 

65 operation. The worst thing that can happen though is a counter that 

66 was not incremented, or the list being in wrong order. So we safe 

67 the time for locking here, lets see how that goes""" 

68 # presort ? 

69 if self._hit_count % self._sort_interval == 0: 

70 self._sort_entities() 

71 # END update sorting 

72 

73 for item in self._entities: 

74 index = item[2](sha) 

75 if index is not None: 

76 item[0] += 1 # one hit for you 

77 self._hit_count += 1 # general hit count 

78 return (item[1], index) 

79 # END index found in pack 

80 # END for each item 

81 

82 # no hit, see whether we have to update packs 

83 # NOTE: considering packs don't change very often, we safe this call 

84 # and leave it to the super-caller to trigger that 

85 raise BadObject(sha) 

86 

87 #{ Object DB Read 

88 

89 def has_object(self, sha): 

90 try: 

91 self._pack_info(sha) 

92 return True 

93 except BadObject: 

94 return False 

95 # END exception handling 

96 

97 def info(self, sha): 

98 entity, index = self._pack_info(sha) 

99 return entity.info_at_index(index) 

100 

101 def stream(self, sha): 

102 entity, index = self._pack_info(sha) 

103 return entity.stream_at_index(index) 

104 

105 def sha_iter(self): 

106 for entity in self.entities(): 

107 index = entity.index() 

108 sha_by_index = index.sha 

109 for index in range(index.size()): 

110 yield sha_by_index(index) 

111 # END for each index 

112 # END for each entity 

113 

114 def size(self): 

115 sizes = [item[1].index().size() for item in self._entities] 

116 return reduce(lambda x, y: x + y, sizes, 0) 

117 

118 #} END object db read 

119 

120 #{ object db write 

121 

122 def store(self, istream): 

123 """Storing individual objects is not feasible as a pack is designed to 

124 hold multiple objects. Writing or rewriting packs for single objects is 

125 inefficient""" 

126 raise UnsupportedOperation() 

127 

128 #} END object db write 

129 

130 #{ Interface 

131 

132 def update_cache(self, force=False): 

133 """ 

134 Update our cache with the actually existing packs on disk. Add new ones, 

135 and remove deleted ones. We keep the unchanged ones 

136 

137 :param force: If True, the cache will be updated even though the directory 

138 does not appear to have changed according to its modification timestamp. 

139 :return: True if the packs have been updated so there is new information, 

140 False if there was no change to the pack database""" 

141 stat = os.stat(self.root_path()) 

142 if not force and stat.st_mtime <= self._st_mtime: 

143 return False 

144 # END abort early on no change 

145 self._st_mtime = stat.st_mtime 

146 

147 # packs are supposed to be prefixed with pack- by git-convention 

148 # get all pack files, figure out what changed 

149 pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack"))) 

150 our_pack_files = {item[1].pack().path() for item in self._entities} 

151 

152 # new packs 

153 for pack_file in (pack_files - our_pack_files): 

154 # init the hit-counter/priority with the size, a good measure for hit- 

155 # probability. Its implemented so that only 12 bytes will be read 

156 entity = PackEntity(pack_file) 

157 self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index]) 

158 # END for each new packfile 

159 

160 # removed packs 

161 for pack_file in (our_pack_files - pack_files): 

162 del_index = -1 

163 for i, item in enumerate(self._entities): 

164 if item[1].pack().path() == pack_file: 

165 del_index = i 

166 break 

167 # END found index 

168 # END for each entity 

169 assert del_index != -1 

170 del(self._entities[del_index]) 

171 # END for each removed pack 

172 

173 # reinitialize prioritiess 

174 self._sort_entities() 

175 return True 

176 

177 def entities(self): 

178 """:return: list of pack entities operated upon by this database""" 

179 return [item[1] for item in self._entities] 

180 

181 def partial_to_complete_sha(self, partial_binsha, canonical_length): 

182 """:return: 20 byte sha as inferred by the given partial binary sha 

183 :param partial_binsha: binary sha with less than 20 bytes 

184 :param canonical_length: length of the corresponding canonical representation. 

185 It is required as binary sha's cannot display whether the original hex sha 

186 had an odd or even number of characters 

187 :raise AmbiguousObjectName: 

188 :raise BadObject: """ 

189 candidate = None 

190 for item in self._entities: 

191 item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length) 

192 if item_index is not None: 

193 sha = item[1].index().sha(item_index) 

194 if candidate and candidate != sha: 

195 raise AmbiguousObjectName(partial_binsha) 

196 candidate = sha 

197 # END handle full sha could be found 

198 # END for each entity 

199 

200 if candidate: 

201 return candidate 

202 

203 # still not found ? 

204 raise BadObject(partial_binsha) 

205 

206 #} END interface