Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/smmap/mman.py: 23%

243 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1"""Module containing a memory memory manager which provides a sliding window on a number of memory mapped files""" 

2from .util import ( 

3 MapWindow, 

4 MapRegion, 

5 MapRegionList, 

6 is_64_bit, 

7) 

8 

9import sys 

10from functools import reduce 

11 

12__all__ = ["StaticWindowMapManager", "SlidingWindowMapManager", "WindowCursor"] 

13#{ Utilities 

14 

15#}END utilities 

16 

17 

18class WindowCursor: 

19 

20 """ 

21 Pointer into the mapped region of the memory manager, keeping the map 

22 alive until it is destroyed and no other client uses it. 

23 

24 Cursors should not be created manually, but are instead returned by the SlidingWindowMapManager 

25 

26 **Note:**: The current implementation is suited for static and sliding window managers, but it also means 

27 that it must be suited for the somewhat quite different sliding manager. It could be improved, but 

28 I see no real need to do so.""" 

29 __slots__ = ( 

30 '_manager', # the manger keeping all file regions 

31 '_rlist', # a regions list with regions for our file 

32 '_region', # our current class:`MapRegion` or None 

33 '_ofs', # relative offset from the actually mapped area to our start area 

34 '_size' # maximum size we should provide 

35 ) 

36 

37 def __init__(self, manager=None, regions=None): 

38 self._manager = manager 

39 self._rlist = regions 

40 self._region = None 

41 self._ofs = 0 

42 self._size = 0 

43 

44 def __del__(self): 

45 self._destroy() 

46 

47 def __enter__(self): 

48 return self 

49 

50 def __exit__(self, exc_type, exc_value, traceback): 

51 self._destroy() 

52 

53 def _destroy(self): 

54 """Destruction code to decrement counters""" 

55 self.unuse_region() 

56 

57 if self._rlist is not None: 

58 # Actual client count, which doesn't include the reference kept by the manager, nor ours 

59 # as we are about to be deleted 

60 try: 

61 if len(self._rlist) == 0: 

62 # Free all resources associated with the mapped file 

63 self._manager._fdict.pop(self._rlist.path_or_fd()) 

64 # END remove regions list from manager 

65 except (TypeError, KeyError): 

66 # sometimes, during shutdown, getrefcount is None. Its possible 

67 # to re-import it, however, its probably better to just ignore 

68 # this python problem (for now). 

69 # The next step is to get rid of the error prone getrefcount alltogether. 

70 pass 

71 # END exception handling 

72 # END handle regions 

73 

74 def _copy_from(self, rhs): 

75 """Copy all data from rhs into this instance, handles usage count""" 

76 self._manager = rhs._manager 

77 self._rlist = type(rhs._rlist)(rhs._rlist) 

78 self._region = rhs._region 

79 self._ofs = rhs._ofs 

80 self._size = rhs._size 

81 

82 for region in self._rlist: 

83 region.increment_client_count() 

84 

85 if self._region is not None: 

86 self._region.increment_client_count() 

87 # END handle regions 

88 

89 def __copy__(self): 

90 """copy module interface""" 

91 cpy = type(self)() 

92 cpy._copy_from(self) 

93 return cpy 

94 

95 #{ Interface 

96 def assign(self, rhs): 

97 """Assign rhs to this instance. This is required in order to get a real copy. 

98 Alternativly, you can copy an existing instance using the copy module""" 

99 self._destroy() 

100 self._copy_from(rhs) 

101 

102 def use_region(self, offset=0, size=0, flags=0): 

103 """Assure we point to a window which allows access to the given offset into the file 

104 

105 :param offset: absolute offset in bytes into the file 

106 :param size: amount of bytes to map. If 0, all available bytes will be mapped 

107 :param flags: additional flags to be given to os.open in case a file handle is initially opened 

108 for mapping. Has no effect if a region can actually be reused. 

109 :return: this instance - it should be queried for whether it points to a valid memory region. 

110 This is not the case if the mapping failed because we reached the end of the file 

111 

112 **Note:**: The size actually mapped may be smaller than the given size. If that is the case, 

113 either the file has reached its end, or the map was created between two existing regions""" 

114 need_region = True 

115 man = self._manager 

116 fsize = self._rlist.file_size() 

117 size = min(size or fsize, man.window_size() or fsize) # clamp size to window size 

118 

119 if self._region is not None: 

120 if self._region.includes_ofs(offset): 

121 need_region = False 

122 else: 

123 self.unuse_region() 

124 # END handle existing region 

125 # END check existing region 

126 

127 # offset too large ? 

128 if offset >= fsize: 

129 return self 

130 # END handle offset 

131 

132 if need_region: 

133 self._region = man._obtain_region(self._rlist, offset, size, flags, False) 

134 self._region.increment_client_count() 

135 # END need region handling 

136 

137 self._ofs = offset - self._region._b 

138 self._size = min(size, self._region.ofs_end() - offset) 

139 

140 return self 

141 

142 def unuse_region(self): 

143 """Unuse the current region. Does nothing if we have no current region 

144 

145 **Note:** the cursor unuses the region automatically upon destruction. It is recommended 

146 to un-use the region once you are done reading from it in persistent cursors as it 

147 helps to free up resource more quickly""" 

148 if self._region is not None: 

149 self._region.increment_client_count(-1) 

150 self._region = None 

151 # note: should reset ofs and size, but we spare that for performance. Its not 

152 # allowed to query information if we are not valid ! 

153 

154 def buffer(self): 

155 """Return a buffer object which allows access to our memory region from our offset 

156 to the window size. Please note that it might be smaller than you requested when calling use_region() 

157 

158 **Note:** You can only obtain a buffer if this instance is_valid() ! 

159 

160 **Note:** buffers should not be cached passed the duration of your access as it will 

161 prevent resources from being freed even though they might not be accounted for anymore !""" 

162 return memoryview(self._region.buffer())[self._ofs:self._ofs+self._size] 

163 

164 def map(self): 

165 """ 

166 :return: the underlying raw memory map. Please not that the offset and size is likely to be different 

167 to what you set as offset and size. Use it only if you are sure about the region it maps, which is the whole 

168 file in case of StaticWindowMapManager""" 

169 return self._region.map() 

170 

171 def is_valid(self): 

172 """:return: True if we have a valid and usable region""" 

173 return self._region is not None 

174 

175 def is_associated(self): 

176 """:return: True if we are associated with a specific file already""" 

177 return self._rlist is not None 

178 

179 def ofs_begin(self): 

180 """:return: offset to the first byte pointed to by our cursor 

181 

182 **Note:** only if is_valid() is True""" 

183 return self._region._b + self._ofs 

184 

185 def ofs_end(self): 

186 """:return: offset to one past the last available byte""" 

187 # unroll method calls for performance ! 

188 return self._region._b + self._ofs + self._size 

189 

190 def size(self): 

191 """:return: amount of bytes we point to""" 

192 return self._size 

193 

194 def region(self): 

195 """:return: our mapped region, or None if nothing is mapped yet 

196 :raise AssertionError: if we have no current region. This is only useful for debugging""" 

197 return self._region 

198 

199 def includes_ofs(self, ofs): 

200 """:return: True if the given absolute offset is contained in the cursors 

201 current region 

202 

203 **Note:** cursor must be valid for this to work""" 

204 # unroll methods 

205 return (self._region._b + self._ofs) <= ofs < (self._region._b + self._ofs + self._size) 

206 

207 def file_size(self): 

208 """:return: size of the underlying file""" 

209 return self._rlist.file_size() 

210 

211 def path_or_fd(self): 

212 """:return: path or file descriptor of the underlying mapped file""" 

213 return self._rlist.path_or_fd() 

214 

215 def path(self): 

216 """:return: path of the underlying mapped file 

217 :raise ValueError: if attached path is not a path""" 

218 if isinstance(self._rlist.path_or_fd(), int): 

219 raise ValueError("Path queried although mapping was applied to a file descriptor") 

220 # END handle type 

221 return self._rlist.path_or_fd() 

222 

223 def fd(self): 

224 """:return: file descriptor used to create the underlying mapping. 

225 

226 **Note:** it is not required to be valid anymore 

227 :raise ValueError: if the mapping was not created by a file descriptor""" 

228 if isinstance(self._rlist.path_or_fd(), str): 

229 raise ValueError("File descriptor queried although mapping was generated from path") 

230 # END handle type 

231 return self._rlist.path_or_fd() 

232 

233 #} END interface 

234 

235 

236class StaticWindowMapManager: 

237 

238 """Provides a manager which will produce single size cursors that are allowed 

239 to always map the whole file. 

240 

241 Clients must be written to specifically know that they are accessing their data 

242 through a StaticWindowMapManager, as they otherwise have to deal with their window size. 

243 

244 These clients would have to use a SlidingWindowMapBuffer to hide this fact. 

245 

246 This type will always use a maximum window size, and optimize certain methods to 

247 accommodate this fact""" 

248 

249 __slots__ = [ 

250 '_fdict', # mapping of path -> StorageHelper (of some kind 

251 '_window_size', # maximum size of a window 

252 '_max_memory_size', # maximum amount of memory we may allocate 

253 '_max_handle_count', # maximum amount of handles to keep open 

254 '_memory_size', # currently allocated memory size 

255 '_handle_count', # amount of currently allocated file handles 

256 ] 

257 

258 #{ Configuration 

259 MapRegionListCls = MapRegionList 

260 MapWindowCls = MapWindow 

261 MapRegionCls = MapRegion 

262 WindowCursorCls = WindowCursor 

263 #} END configuration 

264 

265 _MB_in_bytes = 1024 * 1024 

266 

267 def __init__(self, window_size=0, max_memory_size=0, max_open_handles=sys.maxsize): 

268 """initialize the manager with the given parameters. 

269 :param window_size: if -1, a default window size will be chosen depending on 

270 the operating system's architecture. It will internally be quantified to a multiple of the page size 

271 If 0, the window may have any size, which basically results in mapping the whole file at one 

272 :param max_memory_size: maximum amount of memory we may map at once before releasing mapped regions. 

273 If 0, a viable default will be set depending on the system's architecture. 

274 It is a soft limit that is tried to be kept, but nothing bad happens if we have to over-allocate 

275 :param max_open_handles: if not maxint, limit the amount of open file handles to the given number. 

276 Otherwise the amount is only limited by the system itself. If a system or soft limit is hit, 

277 the manager will free as many handles as possible""" 

278 self._fdict = dict() 

279 self._window_size = window_size 

280 self._max_memory_size = max_memory_size 

281 self._max_handle_count = max_open_handles 

282 self._memory_size = 0 

283 self._handle_count = 0 

284 

285 if window_size < 0: 285 ↛ 293line 285 didn't jump to line 293, because the condition on line 285 was never false

286 coeff = 64 

287 if is_64_bit(): 287 ↛ 290line 287 didn't jump to line 290, because the condition on line 287 was never false

288 coeff = 1024 

289 # END handle arch 

290 self._window_size = coeff * self._MB_in_bytes 

291 # END handle max window size 

292 

293 if max_memory_size == 0: 293 ↛ exitline 293 didn't return from function '__init__', because the condition on line 293 was never false

294 coeff = 1024 

295 if is_64_bit(): 295 ↛ 298line 295 didn't jump to line 298, because the condition on line 295 was never false

296 coeff = 8192 

297 # END handle arch 

298 self._max_memory_size = coeff * self._MB_in_bytes 

299 # END handle max memory size 

300 

301 #{ Internal Methods 

302 

303 def _collect_lru_region(self, size): 

304 """Unmap the region which was least-recently used and has no client 

305 :param size: size of the region we want to map next (assuming its not already mapped partially or full 

306 if 0, we try to free any available region 

307 :return: Amount of freed regions 

308 

309 .. Note:: 

310 We don't raise exceptions anymore, in order to keep the system working, allowing temporary overallocation. 

311 If the system runs out of memory, it will tell. 

312 

313 .. TODO:: 

314 implement a case where all unusued regions are discarded efficiently. 

315 Currently its only brute force 

316 """ 

317 num_found = 0 

318 while (size == 0) or (self._memory_size + size > self._max_memory_size): 

319 lru_region = None 

320 lru_list = None 

321 for regions in self._fdict.values(): 

322 for region in regions: 

323 # check client count - if it's 1, it's just us 

324 if (region.client_count() == 1 and 

325 (lru_region is None or region._uc < lru_region._uc)): 

326 lru_region = region 

327 lru_list = regions 

328 # END update lru_region 

329 # END for each region 

330 # END for each regions list 

331 

332 if lru_region is None: 

333 break 

334 # END handle region not found 

335 

336 num_found += 1 

337 del(lru_list[lru_list.index(lru_region)]) 

338 lru_region.increment_client_count(-1) 

339 self._memory_size -= lru_region.size() 

340 self._handle_count -= 1 

341 # END while there is more memory to free 

342 return num_found 

343 

344 def _obtain_region(self, a, offset, size, flags, is_recursive): 

345 """Utilty to create a new region - for more information on the parameters, 

346 see MapCursor.use_region. 

347 :param a: A regions (a)rray 

348 :return: The newly created region""" 

349 if self._memory_size + size > self._max_memory_size: 

350 self._collect_lru_region(size) 

351 # END handle collection 

352 

353 r = None 

354 if a: 

355 assert len(a) == 1 

356 r = a[0] 

357 else: 

358 try: 

359 r = self.MapRegionCls(a.path_or_fd(), 0, sys.maxsize, flags) 

360 except Exception: 

361 # apparently we are out of system resources or hit a limit 

362 # As many more operations are likely to fail in that condition ( 

363 # like reading a file from disk, etc) we free up as much as possible 

364 # As this invalidates our insert position, we have to recurse here 

365 if is_recursive: 

366 # we already tried this, and still have no success in obtaining 

367 # a mapping. This is an exception, so we propagate it 

368 raise 

369 # END handle existing recursion 

370 self._collect_lru_region(0) 

371 return self._obtain_region(a, offset, size, flags, True) 

372 # END handle exceptions 

373 

374 self._handle_count += 1 

375 self._memory_size += r.size() 

376 a.append(r) 

377 # END handle array 

378 

379 assert r.includes_ofs(offset) 

380 return r 

381 

382 #}END internal methods 

383 

384 #{ Interface 

385 def make_cursor(self, path_or_fd): 

386 """ 

387 :return: a cursor pointing to the given path or file descriptor. 

388 It can be used to map new regions of the file into memory 

389 

390 **Note:** if a file descriptor is given, it is assumed to be open and valid, 

391 but may be closed afterwards. To refer to the same file, you may reuse 

392 your existing file descriptor, but keep in mind that new windows can only 

393 be mapped as long as it stays valid. This is why the using actual file paths 

394 are preferred unless you plan to keep the file descriptor open. 

395 

396 **Note:** file descriptors are problematic as they are not necessarily unique, as two 

397 different files opened and closed in succession might have the same file descriptor id. 

398 

399 **Note:** Using file descriptors directly is faster once new windows are mapped as it 

400 prevents the file to be opened again just for the purpose of mapping it.""" 

401 regions = self._fdict.get(path_or_fd) 

402 if regions is None: 

403 regions = self.MapRegionListCls(path_or_fd) 

404 self._fdict[path_or_fd] = regions 

405 # END obtain region for path 

406 return self.WindowCursorCls(self, regions) 

407 

408 def collect(self): 

409 """Collect all available free-to-collect mapped regions 

410 :return: Amount of freed handles""" 

411 return self._collect_lru_region(0) 

412 

413 def num_file_handles(self): 

414 """:return: amount of file handles in use. Each mapped region uses one file handle""" 

415 return self._handle_count 

416 

417 def num_open_files(self): 

418 """Amount of opened files in the system""" 

419 return reduce(lambda x, y: x + y, (1 for rlist in self._fdict.values() if len(rlist) > 0), 0) 

420 

421 def window_size(self): 

422 """:return: size of each window when allocating new regions""" 

423 return self._window_size 

424 

425 def mapped_memory_size(self): 

426 """:return: amount of bytes currently mapped in total""" 

427 return self._memory_size 

428 

429 def max_file_handles(self): 

430 """:return: maximium amount of handles we may have opened""" 

431 return self._max_handle_count 

432 

433 def max_mapped_memory_size(self): 

434 """:return: maximum amount of memory we may allocate""" 

435 return self._max_memory_size 

436 

437 #} END interface 

438 

439 #{ Special Purpose Interface 

440 

441 def force_map_handle_removal_win(self, base_path): 

442 """ONLY AVAILABLE ON WINDOWS 

443 On windows removing files is not allowed if anybody still has it opened. 

444 If this process is ourselves, and if the whole process uses this memory 

445 manager (as far as the parent framework is concerned) we can enforce 

446 closing all memory maps whose path matches the given base path to 

447 allow the respective operation after all. 

448 The respective system must NOT access the closed memory regions anymore ! 

449 This really may only be used if you know that the items which keep 

450 the cursors alive will not be using it anymore. They need to be recreated ! 

451 :return: Amount of closed handles 

452 

453 **Note:** does nothing on non-windows platforms""" 

454 if sys.platform != 'win32': 

455 return 

456 # END early bailout 

457 

458 num_closed = 0 

459 for path, rlist in self._fdict.items(): 

460 if path.startswith(base_path): 

461 for region in rlist: 

462 region.release() 

463 num_closed += 1 

464 # END path matches 

465 # END for each path 

466 return num_closed 

467 #} END special purpose interface 

468 

469 

470class SlidingWindowMapManager(StaticWindowMapManager): 

471 

472 """Maintains a list of ranges of mapped memory regions in one or more files and allows to easily 

473 obtain additional regions assuring there is no overlap. 

474 Once a certain memory limit is reached globally, or if there cannot be more open file handles 

475 which result from each mmap call, the least recently used, and currently unused mapped regions 

476 are unloaded automatically. 

477 

478 **Note:** currently not thread-safe ! 

479 

480 **Note:** in the current implementation, we will automatically unload windows if we either cannot 

481 create more memory maps (as the open file handles limit is hit) or if we have allocated more than 

482 a safe amount of memory already, which would possibly cause memory allocations to fail as our address 

483 space is full.""" 

484 

485 __slots__ = tuple() 

486 

487 def __init__(self, window_size=-1, max_memory_size=0, max_open_handles=sys.maxsize): 

488 """Adjusts the default window size to -1""" 

489 super().__init__(window_size, max_memory_size, max_open_handles) 

490 

491 def _obtain_region(self, a, offset, size, flags, is_recursive): 

492 # bisect to find an existing region. The c++ implementation cannot 

493 # do that as it uses a linked list for regions. 

494 r = None 

495 lo = 0 

496 hi = len(a) 

497 while lo < hi: 

498 mid = (lo + hi) // 2 

499 ofs = a[mid]._b 

500 if ofs <= offset: 

501 if a[mid].includes_ofs(offset): 

502 r = a[mid] 

503 break 

504 # END have region 

505 lo = mid + 1 

506 else: 

507 hi = mid 

508 # END handle position 

509 # END while bisecting 

510 

511 if r is None: 

512 window_size = self._window_size 

513 left = self.MapWindowCls(0, 0) 

514 mid = self.MapWindowCls(offset, size) 

515 right = self.MapWindowCls(a.file_size(), 0) 

516 

517 # we want to honor the max memory size, and assure we have anough 

518 # memory available 

519 # Save calls ! 

520 if self._memory_size + window_size > self._max_memory_size: 

521 self._collect_lru_region(window_size) 

522 # END handle collection 

523 

524 # we assume the list remains sorted by offset 

525 insert_pos = 0 

526 len_regions = len(a) 

527 if len_regions == 1: 

528 if a[0]._b <= offset: 

529 insert_pos = 1 

530 # END maintain sort 

531 else: 

532 # find insert position 

533 insert_pos = len_regions 

534 for i, region in enumerate(a): 

535 if region._b > offset: 

536 insert_pos = i 

537 break 

538 # END if insert position is correct 

539 # END for each region 

540 # END obtain insert pos 

541 

542 # adjust the actual offset and size values to create the largest 

543 # possible mapping 

544 if insert_pos == 0: 

545 if len_regions: 

546 right = self.MapWindowCls.from_region(a[insert_pos]) 

547 # END adjust right side 

548 else: 

549 if insert_pos != len_regions: 

550 right = self.MapWindowCls.from_region(a[insert_pos]) 

551 # END adjust right window 

552 left = self.MapWindowCls.from_region(a[insert_pos - 1]) 

553 # END adjust surrounding windows 

554 

555 mid.extend_left_to(left, window_size) 

556 mid.extend_right_to(right, window_size) 

557 mid.align() 

558 

559 # it can happen that we align beyond the end of the file 

560 if mid.ofs_end() > right.ofs: 

561 mid.size = right.ofs - mid.ofs 

562 # END readjust size 

563 

564 # insert new region at the right offset to keep the order 

565 try: 

566 if self._handle_count >= self._max_handle_count: 

567 raise Exception 

568 # END assert own imposed max file handles 

569 r = self.MapRegionCls(a.path_or_fd(), mid.ofs, mid.size, flags) 

570 except Exception: 

571 # apparently we are out of system resources or hit a limit 

572 # As many more operations are likely to fail in that condition ( 

573 # like reading a file from disk, etc) we free up as much as possible 

574 # As this invalidates our insert position, we have to recurse here 

575 if is_recursive: 

576 # we already tried this, and still have no success in obtaining 

577 # a mapping. This is an exception, so we propagate it 

578 raise 

579 # END handle existing recursion 

580 self._collect_lru_region(0) 

581 return self._obtain_region(a, offset, size, flags, True) 

582 # END handle exceptions 

583 

584 self._handle_count += 1 

585 self._memory_size += r.size() 

586 a.insert(insert_pos, r) 

587 # END create new region 

588 return r