Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/numpy/core/memmap.py: 16%
91 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1from contextlib import nullcontext
3import numpy as np
4from .numeric import uint8, ndarray, dtype
5from numpy.compat import os_fspath, is_pathlib_path
6from numpy.core.overrides import set_module
8__all__ = ['memmap']
10dtypedescr = dtype
11valid_filemodes = ["r", "c", "r+", "w+"]
12writeable_filemodes = ["r+", "w+"]
14mode_equivalents = {
15 "readonly":"r",
16 "copyonwrite":"c",
17 "readwrite":"r+",
18 "write":"w+"
19 }
22@set_module('numpy')
23class memmap(ndarray):
24 """Create a memory-map to an array stored in a *binary* file on disk.
26 Memory-mapped files are used for accessing small segments of large files
27 on disk, without reading the entire file into memory. NumPy's
28 memmap's are array-like objects. This differs from Python's ``mmap``
29 module, which uses file-like objects.
31 This subclass of ndarray has some unpleasant interactions with
32 some operations, because it doesn't quite fit properly as a subclass.
33 An alternative to using this subclass is to create the ``mmap``
34 object yourself, then create an ndarray with ndarray.__new__ directly,
35 passing the object created in its 'buffer=' parameter.
37 This class may at some point be turned into a factory function
38 which returns a view into an mmap buffer.
40 Flush the memmap instance to write the changes to the file. Currently there
41 is no API to close the underlying ``mmap``. It is tricky to ensure the
42 resource is actually closed, since it may be shared between different
43 memmap instances.
46 Parameters
47 ----------
48 filename : str, file-like object, or pathlib.Path instance
49 The file name or file object to be used as the array data buffer.
50 dtype : data-type, optional
51 The data-type used to interpret the file contents.
52 Default is `uint8`.
53 mode : {'r+', 'r', 'w+', 'c'}, optional
54 The file is opened in this mode:
56 +------+-------------------------------------------------------------+
57 | 'r' | Open existing file for reading only. |
58 +------+-------------------------------------------------------------+
59 | 'r+' | Open existing file for reading and writing. |
60 +------+-------------------------------------------------------------+
61 | 'w+' | Create or overwrite existing file for reading and writing. |
62 +------+-------------------------------------------------------------+
63 | 'c' | Copy-on-write: assignments affect data in memory, but |
64 | | changes are not saved to disk. The file on disk is |
65 | | read-only. |
66 +------+-------------------------------------------------------------+
68 Default is 'r+'.
69 offset : int, optional
70 In the file, array data starts at this offset. Since `offset` is
71 measured in bytes, it should normally be a multiple of the byte-size
72 of `dtype`. When ``mode != 'r'``, even positive offsets beyond end of
73 file are valid; The file will be extended to accommodate the
74 additional data. By default, ``memmap`` will start at the beginning of
75 the file, even if ``filename`` is a file pointer ``fp`` and
76 ``fp.tell() != 0``.
77 shape : tuple, optional
78 The desired shape of the array. If ``mode == 'r'`` and the number
79 of remaining bytes after `offset` is not a multiple of the byte-size
80 of `dtype`, you must specify `shape`. By default, the returned array
81 will be 1-D with the number of elements determined by file size
82 and data-type.
83 order : {'C', 'F'}, optional
84 Specify the order of the ndarray memory layout:
85 :term:`row-major`, C-style or :term:`column-major`,
86 Fortran-style. This only has an effect if the shape is
87 greater than 1-D. The default order is 'C'.
89 Attributes
90 ----------
91 filename : str or pathlib.Path instance
92 Path to the mapped file.
93 offset : int
94 Offset position in the file.
95 mode : str
96 File mode.
98 Methods
99 -------
100 flush
101 Flush any changes in memory to file on disk.
102 When you delete a memmap object, flush is called first to write
103 changes to disk.
106 See also
107 --------
108 lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file.
110 Notes
111 -----
112 The memmap object can be used anywhere an ndarray is accepted.
113 Given a memmap ``fp``, ``isinstance(fp, numpy.ndarray)`` returns
114 ``True``.
116 Memory-mapped files cannot be larger than 2GB on 32-bit systems.
118 When a memmap causes a file to be created or extended beyond its
119 current size in the filesystem, the contents of the new part are
120 unspecified. On systems with POSIX filesystem semantics, the extended
121 part will be filled with zero bytes.
123 Examples
124 --------
125 >>> data = np.arange(12, dtype='float32')
126 >>> data.resize((3,4))
128 This example uses a temporary file so that doctest doesn't write
129 files to your directory. You would use a 'normal' filename.
131 >>> from tempfile import mkdtemp
132 >>> import os.path as path
133 >>> filename = path.join(mkdtemp(), 'newfile.dat')
135 Create a memmap with dtype and shape that matches our data:
137 >>> fp = np.memmap(filename, dtype='float32', mode='w+', shape=(3,4))
138 >>> fp
139 memmap([[0., 0., 0., 0.],
140 [0., 0., 0., 0.],
141 [0., 0., 0., 0.]], dtype=float32)
143 Write data to memmap array:
145 >>> fp[:] = data[:]
146 >>> fp
147 memmap([[ 0., 1., 2., 3.],
148 [ 4., 5., 6., 7.],
149 [ 8., 9., 10., 11.]], dtype=float32)
151 >>> fp.filename == path.abspath(filename)
152 True
154 Flushes memory changes to disk in order to read them back
156 >>> fp.flush()
158 Load the memmap and verify data was stored:
160 >>> newfp = np.memmap(filename, dtype='float32', mode='r', shape=(3,4))
161 >>> newfp
162 memmap([[ 0., 1., 2., 3.],
163 [ 4., 5., 6., 7.],
164 [ 8., 9., 10., 11.]], dtype=float32)
166 Read-only memmap:
168 >>> fpr = np.memmap(filename, dtype='float32', mode='r', shape=(3,4))
169 >>> fpr.flags.writeable
170 False
172 Copy-on-write memmap:
174 >>> fpc = np.memmap(filename, dtype='float32', mode='c', shape=(3,4))
175 >>> fpc.flags.writeable
176 True
178 It's possible to assign to copy-on-write array, but values are only
179 written into the memory copy of the array, and not written to disk:
181 >>> fpc
182 memmap([[ 0., 1., 2., 3.],
183 [ 4., 5., 6., 7.],
184 [ 8., 9., 10., 11.]], dtype=float32)
185 >>> fpc[0,:] = 0
186 >>> fpc
187 memmap([[ 0., 0., 0., 0.],
188 [ 4., 5., 6., 7.],
189 [ 8., 9., 10., 11.]], dtype=float32)
191 File on disk is unchanged:
193 >>> fpr
194 memmap([[ 0., 1., 2., 3.],
195 [ 4., 5., 6., 7.],
196 [ 8., 9., 10., 11.]], dtype=float32)
198 Offset into a memmap:
200 >>> fpo = np.memmap(filename, dtype='float32', mode='r', offset=16)
201 >>> fpo
202 memmap([ 4., 5., 6., 7., 8., 9., 10., 11.], dtype=float32)
204 """
206 __array_priority__ = -100.0
208 def __new__(subtype, filename, dtype=uint8, mode='r+', offset=0,
209 shape=None, order='C'):
210 # Import here to minimize 'import numpy' overhead
211 import mmap
212 import os.path
213 try:
214 mode = mode_equivalents[mode]
215 except KeyError as e:
216 if mode not in valid_filemodes:
217 raise ValueError(
218 "mode must be one of {!r} (got {!r})"
219 .format(valid_filemodes + list(mode_equivalents.keys()), mode)
220 ) from None
222 if mode == 'w+' and shape is None:
223 raise ValueError("shape must be given")
225 if hasattr(filename, 'read'):
226 f_ctx = nullcontext(filename)
227 else:
228 f_ctx = open(os_fspath(filename), ('r' if mode == 'c' else mode)+'b')
230 with f_ctx as fid:
231 fid.seek(0, 2)
232 flen = fid.tell()
233 descr = dtypedescr(dtype)
234 _dbytes = descr.itemsize
236 if shape is None:
237 bytes = flen - offset
238 if bytes % _dbytes:
239 raise ValueError("Size of available data is not a "
240 "multiple of the data-type size.")
241 size = bytes // _dbytes
242 shape = (size,)
243 else:
244 if not isinstance(shape, tuple):
245 shape = (shape,)
246 size = np.intp(1) # avoid default choice of np.int_, which might overflow
247 for k in shape:
248 size *= k
250 bytes = int(offset + size*_dbytes)
252 if mode in ('w+', 'r+') and flen < bytes:
253 fid.seek(bytes - 1, 0)
254 fid.write(b'\0')
255 fid.flush()
257 if mode == 'c':
258 acc = mmap.ACCESS_COPY
259 elif mode == 'r':
260 acc = mmap.ACCESS_READ
261 else:
262 acc = mmap.ACCESS_WRITE
264 start = offset - offset % mmap.ALLOCATIONGRANULARITY
265 bytes -= start
266 array_offset = offset - start
267 mm = mmap.mmap(fid.fileno(), bytes, access=acc, offset=start)
269 self = ndarray.__new__(subtype, shape, dtype=descr, buffer=mm,
270 offset=array_offset, order=order)
271 self._mmap = mm
272 self.offset = offset
273 self.mode = mode
275 if is_pathlib_path(filename):
276 # special case - if we were constructed with a pathlib.path,
277 # then filename is a path object, not a string
278 self.filename = filename.resolve()
279 elif hasattr(fid, "name") and isinstance(fid.name, str):
280 # py3 returns int for TemporaryFile().name
281 self.filename = os.path.abspath(fid.name)
282 # same as memmap copies (e.g. memmap + 1)
283 else:
284 self.filename = None
286 return self
288 def __array_finalize__(self, obj):
289 if hasattr(obj, '_mmap') and np.may_share_memory(self, obj):
290 self._mmap = obj._mmap
291 self.filename = obj.filename
292 self.offset = obj.offset
293 self.mode = obj.mode
294 else:
295 self._mmap = None
296 self.filename = None
297 self.offset = None
298 self.mode = None
300 def flush(self):
301 """
302 Write any changes in the array to the file on disk.
304 For further information, see `memmap`.
306 Parameters
307 ----------
308 None
310 See Also
311 --------
312 memmap
314 """
315 if self.base is not None and hasattr(self.base, 'flush'):
316 self.base.flush()
318 def __array_wrap__(self, arr, context=None):
319 arr = super().__array_wrap__(arr, context)
321 # Return a memmap if a memmap was given as the output of the
322 # ufunc. Leave the arr class unchanged if self is not a memmap
323 # to keep original memmap subclasses behavior
324 if self is arr or type(self) is not memmap:
325 return arr
326 # Return scalar instead of 0d memmap, e.g. for np.sum with
327 # axis=None
328 if arr.shape == ():
329 return arr[()]
330 # Return ndarray otherwise
331 return arr.view(np.ndarray)
333 def __getitem__(self, index):
334 res = super().__getitem__(index)
335 if type(res) is memmap and res._mmap is None:
336 return res.view(type=ndarray)
337 return res