Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/smmap/mman.py: 23%
243 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1"""Module containing a memory memory manager which provides a sliding window on a number of memory mapped files"""
2from .util import (
3 MapWindow,
4 MapRegion,
5 MapRegionList,
6 is_64_bit,
7)
9import sys
10from functools import reduce
12__all__ = ["StaticWindowMapManager", "SlidingWindowMapManager", "WindowCursor"]
13#{ Utilities
15#}END utilities
18class WindowCursor:
20 """
21 Pointer into the mapped region of the memory manager, keeping the map
22 alive until it is destroyed and no other client uses it.
24 Cursors should not be created manually, but are instead returned by the SlidingWindowMapManager
26 **Note:**: The current implementation is suited for static and sliding window managers, but it also means
27 that it must be suited for the somewhat quite different sliding manager. It could be improved, but
28 I see no real need to do so."""
29 __slots__ = (
30 '_manager', # the manger keeping all file regions
31 '_rlist', # a regions list with regions for our file
32 '_region', # our current class:`MapRegion` or None
33 '_ofs', # relative offset from the actually mapped area to our start area
34 '_size' # maximum size we should provide
35 )
37 def __init__(self, manager=None, regions=None):
38 self._manager = manager
39 self._rlist = regions
40 self._region = None
41 self._ofs = 0
42 self._size = 0
44 def __del__(self):
45 self._destroy()
47 def __enter__(self):
48 return self
50 def __exit__(self, exc_type, exc_value, traceback):
51 self._destroy()
53 def _destroy(self):
54 """Destruction code to decrement counters"""
55 self.unuse_region()
57 if self._rlist is not None:
58 # Actual client count, which doesn't include the reference kept by the manager, nor ours
59 # as we are about to be deleted
60 try:
61 if len(self._rlist) == 0:
62 # Free all resources associated with the mapped file
63 self._manager._fdict.pop(self._rlist.path_or_fd())
64 # END remove regions list from manager
65 except (TypeError, KeyError):
66 # sometimes, during shutdown, getrefcount is None. Its possible
67 # to re-import it, however, its probably better to just ignore
68 # this python problem (for now).
69 # The next step is to get rid of the error prone getrefcount alltogether.
70 pass
71 # END exception handling
72 # END handle regions
74 def _copy_from(self, rhs):
75 """Copy all data from rhs into this instance, handles usage count"""
76 self._manager = rhs._manager
77 self._rlist = type(rhs._rlist)(rhs._rlist)
78 self._region = rhs._region
79 self._ofs = rhs._ofs
80 self._size = rhs._size
82 for region in self._rlist:
83 region.increment_client_count()
85 if self._region is not None:
86 self._region.increment_client_count()
87 # END handle regions
89 def __copy__(self):
90 """copy module interface"""
91 cpy = type(self)()
92 cpy._copy_from(self)
93 return cpy
95 #{ Interface
96 def assign(self, rhs):
97 """Assign rhs to this instance. This is required in order to get a real copy.
98 Alternativly, you can copy an existing instance using the copy module"""
99 self._destroy()
100 self._copy_from(rhs)
102 def use_region(self, offset=0, size=0, flags=0):
103 """Assure we point to a window which allows access to the given offset into the file
105 :param offset: absolute offset in bytes into the file
106 :param size: amount of bytes to map. If 0, all available bytes will be mapped
107 :param flags: additional flags to be given to os.open in case a file handle is initially opened
108 for mapping. Has no effect if a region can actually be reused.
109 :return: this instance - it should be queried for whether it points to a valid memory region.
110 This is not the case if the mapping failed because we reached the end of the file
112 **Note:**: The size actually mapped may be smaller than the given size. If that is the case,
113 either the file has reached its end, or the map was created between two existing regions"""
114 need_region = True
115 man = self._manager
116 fsize = self._rlist.file_size()
117 size = min(size or fsize, man.window_size() or fsize) # clamp size to window size
119 if self._region is not None:
120 if self._region.includes_ofs(offset):
121 need_region = False
122 else:
123 self.unuse_region()
124 # END handle existing region
125 # END check existing region
127 # offset too large ?
128 if offset >= fsize:
129 return self
130 # END handle offset
132 if need_region:
133 self._region = man._obtain_region(self._rlist, offset, size, flags, False)
134 self._region.increment_client_count()
135 # END need region handling
137 self._ofs = offset - self._region._b
138 self._size = min(size, self._region.ofs_end() - offset)
140 return self
142 def unuse_region(self):
143 """Unuse the current region. Does nothing if we have no current region
145 **Note:** the cursor unuses the region automatically upon destruction. It is recommended
146 to un-use the region once you are done reading from it in persistent cursors as it
147 helps to free up resource more quickly"""
148 if self._region is not None:
149 self._region.increment_client_count(-1)
150 self._region = None
151 # note: should reset ofs and size, but we spare that for performance. Its not
152 # allowed to query information if we are not valid !
154 def buffer(self):
155 """Return a buffer object which allows access to our memory region from our offset
156 to the window size. Please note that it might be smaller than you requested when calling use_region()
158 **Note:** You can only obtain a buffer if this instance is_valid() !
160 **Note:** buffers should not be cached passed the duration of your access as it will
161 prevent resources from being freed even though they might not be accounted for anymore !"""
162 return memoryview(self._region.buffer())[self._ofs:self._ofs+self._size]
164 def map(self):
165 """
166 :return: the underlying raw memory map. Please not that the offset and size is likely to be different
167 to what you set as offset and size. Use it only if you are sure about the region it maps, which is the whole
168 file in case of StaticWindowMapManager"""
169 return self._region.map()
171 def is_valid(self):
172 """:return: True if we have a valid and usable region"""
173 return self._region is not None
175 def is_associated(self):
176 """:return: True if we are associated with a specific file already"""
177 return self._rlist is not None
179 def ofs_begin(self):
180 """:return: offset to the first byte pointed to by our cursor
182 **Note:** only if is_valid() is True"""
183 return self._region._b + self._ofs
185 def ofs_end(self):
186 """:return: offset to one past the last available byte"""
187 # unroll method calls for performance !
188 return self._region._b + self._ofs + self._size
190 def size(self):
191 """:return: amount of bytes we point to"""
192 return self._size
194 def region(self):
195 """:return: our mapped region, or None if nothing is mapped yet
196 :raise AssertionError: if we have no current region. This is only useful for debugging"""
197 return self._region
199 def includes_ofs(self, ofs):
200 """:return: True if the given absolute offset is contained in the cursors
201 current region
203 **Note:** cursor must be valid for this to work"""
204 # unroll methods
205 return (self._region._b + self._ofs) <= ofs < (self._region._b + self._ofs + self._size)
207 def file_size(self):
208 """:return: size of the underlying file"""
209 return self._rlist.file_size()
211 def path_or_fd(self):
212 """:return: path or file descriptor of the underlying mapped file"""
213 return self._rlist.path_or_fd()
215 def path(self):
216 """:return: path of the underlying mapped file
217 :raise ValueError: if attached path is not a path"""
218 if isinstance(self._rlist.path_or_fd(), int):
219 raise ValueError("Path queried although mapping was applied to a file descriptor")
220 # END handle type
221 return self._rlist.path_or_fd()
223 def fd(self):
224 """:return: file descriptor used to create the underlying mapping.
226 **Note:** it is not required to be valid anymore
227 :raise ValueError: if the mapping was not created by a file descriptor"""
228 if isinstance(self._rlist.path_or_fd(), str):
229 raise ValueError("File descriptor queried although mapping was generated from path")
230 # END handle type
231 return self._rlist.path_or_fd()
233 #} END interface
236class StaticWindowMapManager:
238 """Provides a manager which will produce single size cursors that are allowed
239 to always map the whole file.
241 Clients must be written to specifically know that they are accessing their data
242 through a StaticWindowMapManager, as they otherwise have to deal with their window size.
244 These clients would have to use a SlidingWindowMapBuffer to hide this fact.
246 This type will always use a maximum window size, and optimize certain methods to
247 accommodate this fact"""
249 __slots__ = [
250 '_fdict', # mapping of path -> StorageHelper (of some kind
251 '_window_size', # maximum size of a window
252 '_max_memory_size', # maximum amount of memory we may allocate
253 '_max_handle_count', # maximum amount of handles to keep open
254 '_memory_size', # currently allocated memory size
255 '_handle_count', # amount of currently allocated file handles
256 ]
258 #{ Configuration
259 MapRegionListCls = MapRegionList
260 MapWindowCls = MapWindow
261 MapRegionCls = MapRegion
262 WindowCursorCls = WindowCursor
263 #} END configuration
265 _MB_in_bytes = 1024 * 1024
267 def __init__(self, window_size=0, max_memory_size=0, max_open_handles=sys.maxsize):
268 """initialize the manager with the given parameters.
269 :param window_size: if -1, a default window size will be chosen depending on
270 the operating system's architecture. It will internally be quantified to a multiple of the page size
271 If 0, the window may have any size, which basically results in mapping the whole file at one
272 :param max_memory_size: maximum amount of memory we may map at once before releasing mapped regions.
273 If 0, a viable default will be set depending on the system's architecture.
274 It is a soft limit that is tried to be kept, but nothing bad happens if we have to over-allocate
275 :param max_open_handles: if not maxint, limit the amount of open file handles to the given number.
276 Otherwise the amount is only limited by the system itself. If a system or soft limit is hit,
277 the manager will free as many handles as possible"""
278 self._fdict = dict()
279 self._window_size = window_size
280 self._max_memory_size = max_memory_size
281 self._max_handle_count = max_open_handles
282 self._memory_size = 0
283 self._handle_count = 0
285 if window_size < 0: 285 ↛ 293line 285 didn't jump to line 293, because the condition on line 285 was never false
286 coeff = 64
287 if is_64_bit(): 287 ↛ 290line 287 didn't jump to line 290, because the condition on line 287 was never false
288 coeff = 1024
289 # END handle arch
290 self._window_size = coeff * self._MB_in_bytes
291 # END handle max window size
293 if max_memory_size == 0: 293 ↛ exitline 293 didn't return from function '__init__', because the condition on line 293 was never false
294 coeff = 1024
295 if is_64_bit(): 295 ↛ 298line 295 didn't jump to line 298, because the condition on line 295 was never false
296 coeff = 8192
297 # END handle arch
298 self._max_memory_size = coeff * self._MB_in_bytes
299 # END handle max memory size
301 #{ Internal Methods
303 def _collect_lru_region(self, size):
304 """Unmap the region which was least-recently used and has no client
305 :param size: size of the region we want to map next (assuming its not already mapped partially or full
306 if 0, we try to free any available region
307 :return: Amount of freed regions
309 .. Note::
310 We don't raise exceptions anymore, in order to keep the system working, allowing temporary overallocation.
311 If the system runs out of memory, it will tell.
313 .. TODO::
314 implement a case where all unusued regions are discarded efficiently.
315 Currently its only brute force
316 """
317 num_found = 0
318 while (size == 0) or (self._memory_size + size > self._max_memory_size):
319 lru_region = None
320 lru_list = None
321 for regions in self._fdict.values():
322 for region in regions:
323 # check client count - if it's 1, it's just us
324 if (region.client_count() == 1 and
325 (lru_region is None or region._uc < lru_region._uc)):
326 lru_region = region
327 lru_list = regions
328 # END update lru_region
329 # END for each region
330 # END for each regions list
332 if lru_region is None:
333 break
334 # END handle region not found
336 num_found += 1
337 del(lru_list[lru_list.index(lru_region)])
338 lru_region.increment_client_count(-1)
339 self._memory_size -= lru_region.size()
340 self._handle_count -= 1
341 # END while there is more memory to free
342 return num_found
344 def _obtain_region(self, a, offset, size, flags, is_recursive):
345 """Utilty to create a new region - for more information on the parameters,
346 see MapCursor.use_region.
347 :param a: A regions (a)rray
348 :return: The newly created region"""
349 if self._memory_size + size > self._max_memory_size:
350 self._collect_lru_region(size)
351 # END handle collection
353 r = None
354 if a:
355 assert len(a) == 1
356 r = a[0]
357 else:
358 try:
359 r = self.MapRegionCls(a.path_or_fd(), 0, sys.maxsize, flags)
360 except Exception:
361 # apparently we are out of system resources or hit a limit
362 # As many more operations are likely to fail in that condition (
363 # like reading a file from disk, etc) we free up as much as possible
364 # As this invalidates our insert position, we have to recurse here
365 if is_recursive:
366 # we already tried this, and still have no success in obtaining
367 # a mapping. This is an exception, so we propagate it
368 raise
369 # END handle existing recursion
370 self._collect_lru_region(0)
371 return self._obtain_region(a, offset, size, flags, True)
372 # END handle exceptions
374 self._handle_count += 1
375 self._memory_size += r.size()
376 a.append(r)
377 # END handle array
379 assert r.includes_ofs(offset)
380 return r
382 #}END internal methods
384 #{ Interface
385 def make_cursor(self, path_or_fd):
386 """
387 :return: a cursor pointing to the given path or file descriptor.
388 It can be used to map new regions of the file into memory
390 **Note:** if a file descriptor is given, it is assumed to be open and valid,
391 but may be closed afterwards. To refer to the same file, you may reuse
392 your existing file descriptor, but keep in mind that new windows can only
393 be mapped as long as it stays valid. This is why the using actual file paths
394 are preferred unless you plan to keep the file descriptor open.
396 **Note:** file descriptors are problematic as they are not necessarily unique, as two
397 different files opened and closed in succession might have the same file descriptor id.
399 **Note:** Using file descriptors directly is faster once new windows are mapped as it
400 prevents the file to be opened again just for the purpose of mapping it."""
401 regions = self._fdict.get(path_or_fd)
402 if regions is None:
403 regions = self.MapRegionListCls(path_or_fd)
404 self._fdict[path_or_fd] = regions
405 # END obtain region for path
406 return self.WindowCursorCls(self, regions)
408 def collect(self):
409 """Collect all available free-to-collect mapped regions
410 :return: Amount of freed handles"""
411 return self._collect_lru_region(0)
413 def num_file_handles(self):
414 """:return: amount of file handles in use. Each mapped region uses one file handle"""
415 return self._handle_count
417 def num_open_files(self):
418 """Amount of opened files in the system"""
419 return reduce(lambda x, y: x + y, (1 for rlist in self._fdict.values() if len(rlist) > 0), 0)
421 def window_size(self):
422 """:return: size of each window when allocating new regions"""
423 return self._window_size
425 def mapped_memory_size(self):
426 """:return: amount of bytes currently mapped in total"""
427 return self._memory_size
429 def max_file_handles(self):
430 """:return: maximium amount of handles we may have opened"""
431 return self._max_handle_count
433 def max_mapped_memory_size(self):
434 """:return: maximum amount of memory we may allocate"""
435 return self._max_memory_size
437 #} END interface
439 #{ Special Purpose Interface
441 def force_map_handle_removal_win(self, base_path):
442 """ONLY AVAILABLE ON WINDOWS
443 On windows removing files is not allowed if anybody still has it opened.
444 If this process is ourselves, and if the whole process uses this memory
445 manager (as far as the parent framework is concerned) we can enforce
446 closing all memory maps whose path matches the given base path to
447 allow the respective operation after all.
448 The respective system must NOT access the closed memory regions anymore !
449 This really may only be used if you know that the items which keep
450 the cursors alive will not be using it anymore. They need to be recreated !
451 :return: Amount of closed handles
453 **Note:** does nothing on non-windows platforms"""
454 if sys.platform != 'win32':
455 return
456 # END early bailout
458 num_closed = 0
459 for path, rlist in self._fdict.items():
460 if path.startswith(base_path):
461 for region in rlist:
462 region.release()
463 num_closed += 1
464 # END path matches
465 # END for each path
466 return num_closed
467 #} END special purpose interface
470class SlidingWindowMapManager(StaticWindowMapManager):
472 """Maintains a list of ranges of mapped memory regions in one or more files and allows to easily
473 obtain additional regions assuring there is no overlap.
474 Once a certain memory limit is reached globally, or if there cannot be more open file handles
475 which result from each mmap call, the least recently used, and currently unused mapped regions
476 are unloaded automatically.
478 **Note:** currently not thread-safe !
480 **Note:** in the current implementation, we will automatically unload windows if we either cannot
481 create more memory maps (as the open file handles limit is hit) or if we have allocated more than
482 a safe amount of memory already, which would possibly cause memory allocations to fail as our address
483 space is full."""
485 __slots__ = tuple()
487 def __init__(self, window_size=-1, max_memory_size=0, max_open_handles=sys.maxsize):
488 """Adjusts the default window size to -1"""
489 super().__init__(window_size, max_memory_size, max_open_handles)
491 def _obtain_region(self, a, offset, size, flags, is_recursive):
492 # bisect to find an existing region. The c++ implementation cannot
493 # do that as it uses a linked list for regions.
494 r = None
495 lo = 0
496 hi = len(a)
497 while lo < hi:
498 mid = (lo + hi) // 2
499 ofs = a[mid]._b
500 if ofs <= offset:
501 if a[mid].includes_ofs(offset):
502 r = a[mid]
503 break
504 # END have region
505 lo = mid + 1
506 else:
507 hi = mid
508 # END handle position
509 # END while bisecting
511 if r is None:
512 window_size = self._window_size
513 left = self.MapWindowCls(0, 0)
514 mid = self.MapWindowCls(offset, size)
515 right = self.MapWindowCls(a.file_size(), 0)
517 # we want to honor the max memory size, and assure we have anough
518 # memory available
519 # Save calls !
520 if self._memory_size + window_size > self._max_memory_size:
521 self._collect_lru_region(window_size)
522 # END handle collection
524 # we assume the list remains sorted by offset
525 insert_pos = 0
526 len_regions = len(a)
527 if len_regions == 1:
528 if a[0]._b <= offset:
529 insert_pos = 1
530 # END maintain sort
531 else:
532 # find insert position
533 insert_pos = len_regions
534 for i, region in enumerate(a):
535 if region._b > offset:
536 insert_pos = i
537 break
538 # END if insert position is correct
539 # END for each region
540 # END obtain insert pos
542 # adjust the actual offset and size values to create the largest
543 # possible mapping
544 if insert_pos == 0:
545 if len_regions:
546 right = self.MapWindowCls.from_region(a[insert_pos])
547 # END adjust right side
548 else:
549 if insert_pos != len_regions:
550 right = self.MapWindowCls.from_region(a[insert_pos])
551 # END adjust right window
552 left = self.MapWindowCls.from_region(a[insert_pos - 1])
553 # END adjust surrounding windows
555 mid.extend_left_to(left, window_size)
556 mid.extend_right_to(right, window_size)
557 mid.align()
559 # it can happen that we align beyond the end of the file
560 if mid.ofs_end() > right.ofs:
561 mid.size = right.ofs - mid.ofs
562 # END readjust size
564 # insert new region at the right offset to keep the order
565 try:
566 if self._handle_count >= self._max_handle_count:
567 raise Exception
568 # END assert own imposed max file handles
569 r = self.MapRegionCls(a.path_or_fd(), mid.ofs, mid.size, flags)
570 except Exception:
571 # apparently we are out of system resources or hit a limit
572 # As many more operations are likely to fail in that condition (
573 # like reading a file from disk, etc) we free up as much as possible
574 # As this invalidates our insert position, we have to recurse here
575 if is_recursive:
576 # we already tried this, and still have no success in obtaining
577 # a mapping. This is an exception, so we propagate it
578 raise
579 # END handle existing recursion
580 self._collect_lru_region(0)
581 return self._obtain_region(a, offset, size, flags, True)
582 # END handle exceptions
584 self._handle_count += 1
585 self._memory_size += r.size()
586 a.insert(insert_pos, r)
587 # END create new region
588 return r