Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/numpy/core/memmap.py: 16%

91 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from contextlib import nullcontext 

2 

3import numpy as np 

4from .numeric import uint8, ndarray, dtype 

5from numpy.compat import os_fspath, is_pathlib_path 

6from numpy.core.overrides import set_module 

7 

8__all__ = ['memmap'] 

9 

10dtypedescr = dtype 

11valid_filemodes = ["r", "c", "r+", "w+"] 

12writeable_filemodes = ["r+", "w+"] 

13 

14mode_equivalents = { 

15 "readonly":"r", 

16 "copyonwrite":"c", 

17 "readwrite":"r+", 

18 "write":"w+" 

19 } 

20 

21 

22@set_module('numpy') 

23class memmap(ndarray): 

24 """Create a memory-map to an array stored in a *binary* file on disk. 

25 

26 Memory-mapped files are used for accessing small segments of large files 

27 on disk, without reading the entire file into memory. NumPy's 

28 memmap's are array-like objects. This differs from Python's ``mmap`` 

29 module, which uses file-like objects. 

30 

31 This subclass of ndarray has some unpleasant interactions with 

32 some operations, because it doesn't quite fit properly as a subclass. 

33 An alternative to using this subclass is to create the ``mmap`` 

34 object yourself, then create an ndarray with ndarray.__new__ directly, 

35 passing the object created in its 'buffer=' parameter. 

36 

37 This class may at some point be turned into a factory function 

38 which returns a view into an mmap buffer. 

39 

40 Flush the memmap instance to write the changes to the file. Currently there 

41 is no API to close the underlying ``mmap``. It is tricky to ensure the 

42 resource is actually closed, since it may be shared between different 

43 memmap instances. 

44 

45 

46 Parameters 

47 ---------- 

48 filename : str, file-like object, or pathlib.Path instance 

49 The file name or file object to be used as the array data buffer. 

50 dtype : data-type, optional 

51 The data-type used to interpret the file contents. 

52 Default is `uint8`. 

53 mode : {'r+', 'r', 'w+', 'c'}, optional 

54 The file is opened in this mode: 

55 

56 +------+-------------------------------------------------------------+ 

57 | 'r' | Open existing file for reading only. | 

58 +------+-------------------------------------------------------------+ 

59 | 'r+' | Open existing file for reading and writing. | 

60 +------+-------------------------------------------------------------+ 

61 | 'w+' | Create or overwrite existing file for reading and writing. | 

62 +------+-------------------------------------------------------------+ 

63 | 'c' | Copy-on-write: assignments affect data in memory, but | 

64 | | changes are not saved to disk. The file on disk is | 

65 | | read-only. | 

66 +------+-------------------------------------------------------------+ 

67 

68 Default is 'r+'. 

69 offset : int, optional 

70 In the file, array data starts at this offset. Since `offset` is 

71 measured in bytes, it should normally be a multiple of the byte-size 

72 of `dtype`. When ``mode != 'r'``, even positive offsets beyond end of 

73 file are valid; The file will be extended to accommodate the 

74 additional data. By default, ``memmap`` will start at the beginning of 

75 the file, even if ``filename`` is a file pointer ``fp`` and 

76 ``fp.tell() != 0``. 

77 shape : tuple, optional 

78 The desired shape of the array. If ``mode == 'r'`` and the number 

79 of remaining bytes after `offset` is not a multiple of the byte-size 

80 of `dtype`, you must specify `shape`. By default, the returned array 

81 will be 1-D with the number of elements determined by file size 

82 and data-type. 

83 order : {'C', 'F'}, optional 

84 Specify the order of the ndarray memory layout: 

85 :term:`row-major`, C-style or :term:`column-major`, 

86 Fortran-style. This only has an effect if the shape is 

87 greater than 1-D. The default order is 'C'. 

88 

89 Attributes 

90 ---------- 

91 filename : str or pathlib.Path instance 

92 Path to the mapped file. 

93 offset : int 

94 Offset position in the file. 

95 mode : str 

96 File mode. 

97 

98 Methods 

99 ------- 

100 flush 

101 Flush any changes in memory to file on disk. 

102 When you delete a memmap object, flush is called first to write 

103 changes to disk. 

104 

105 

106 See also 

107 -------- 

108 lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file. 

109 

110 Notes 

111 ----- 

112 The memmap object can be used anywhere an ndarray is accepted. 

113 Given a memmap ``fp``, ``isinstance(fp, numpy.ndarray)`` returns 

114 ``True``. 

115 

116 Memory-mapped files cannot be larger than 2GB on 32-bit systems. 

117 

118 When a memmap causes a file to be created or extended beyond its 

119 current size in the filesystem, the contents of the new part are 

120 unspecified. On systems with POSIX filesystem semantics, the extended 

121 part will be filled with zero bytes. 

122 

123 Examples 

124 -------- 

125 >>> data = np.arange(12, dtype='float32') 

126 >>> data.resize((3,4)) 

127 

128 This example uses a temporary file so that doctest doesn't write 

129 files to your directory. You would use a 'normal' filename. 

130 

131 >>> from tempfile import mkdtemp 

132 >>> import os.path as path 

133 >>> filename = path.join(mkdtemp(), 'newfile.dat') 

134 

135 Create a memmap with dtype and shape that matches our data: 

136 

137 >>> fp = np.memmap(filename, dtype='float32', mode='w+', shape=(3,4)) 

138 >>> fp 

139 memmap([[0., 0., 0., 0.], 

140 [0., 0., 0., 0.], 

141 [0., 0., 0., 0.]], dtype=float32) 

142 

143 Write data to memmap array: 

144 

145 >>> fp[:] = data[:] 

146 >>> fp 

147 memmap([[ 0., 1., 2., 3.], 

148 [ 4., 5., 6., 7.], 

149 [ 8., 9., 10., 11.]], dtype=float32) 

150 

151 >>> fp.filename == path.abspath(filename) 

152 True 

153 

154 Flushes memory changes to disk in order to read them back 

155 

156 >>> fp.flush() 

157 

158 Load the memmap and verify data was stored: 

159 

160 >>> newfp = np.memmap(filename, dtype='float32', mode='r', shape=(3,4)) 

161 >>> newfp 

162 memmap([[ 0., 1., 2., 3.], 

163 [ 4., 5., 6., 7.], 

164 [ 8., 9., 10., 11.]], dtype=float32) 

165 

166 Read-only memmap: 

167 

168 >>> fpr = np.memmap(filename, dtype='float32', mode='r', shape=(3,4)) 

169 >>> fpr.flags.writeable 

170 False 

171 

172 Copy-on-write memmap: 

173 

174 >>> fpc = np.memmap(filename, dtype='float32', mode='c', shape=(3,4)) 

175 >>> fpc.flags.writeable 

176 True 

177 

178 It's possible to assign to copy-on-write array, but values are only 

179 written into the memory copy of the array, and not written to disk: 

180 

181 >>> fpc 

182 memmap([[ 0., 1., 2., 3.], 

183 [ 4., 5., 6., 7.], 

184 [ 8., 9., 10., 11.]], dtype=float32) 

185 >>> fpc[0,:] = 0 

186 >>> fpc 

187 memmap([[ 0., 0., 0., 0.], 

188 [ 4., 5., 6., 7.], 

189 [ 8., 9., 10., 11.]], dtype=float32) 

190 

191 File on disk is unchanged: 

192 

193 >>> fpr 

194 memmap([[ 0., 1., 2., 3.], 

195 [ 4., 5., 6., 7.], 

196 [ 8., 9., 10., 11.]], dtype=float32) 

197 

198 Offset into a memmap: 

199 

200 >>> fpo = np.memmap(filename, dtype='float32', mode='r', offset=16) 

201 >>> fpo 

202 memmap([ 4., 5., 6., 7., 8., 9., 10., 11.], dtype=float32) 

203 

204 """ 

205 

206 __array_priority__ = -100.0 

207 

208 def __new__(subtype, filename, dtype=uint8, mode='r+', offset=0, 

209 shape=None, order='C'): 

210 # Import here to minimize 'import numpy' overhead 

211 import mmap 

212 import os.path 

213 try: 

214 mode = mode_equivalents[mode] 

215 except KeyError as e: 

216 if mode not in valid_filemodes: 

217 raise ValueError( 

218 "mode must be one of {!r} (got {!r})" 

219 .format(valid_filemodes + list(mode_equivalents.keys()), mode) 

220 ) from None 

221 

222 if mode == 'w+' and shape is None: 

223 raise ValueError("shape must be given") 

224 

225 if hasattr(filename, 'read'): 

226 f_ctx = nullcontext(filename) 

227 else: 

228 f_ctx = open(os_fspath(filename), ('r' if mode == 'c' else mode)+'b') 

229 

230 with f_ctx as fid: 

231 fid.seek(0, 2) 

232 flen = fid.tell() 

233 descr = dtypedescr(dtype) 

234 _dbytes = descr.itemsize 

235 

236 if shape is None: 

237 bytes = flen - offset 

238 if bytes % _dbytes: 

239 raise ValueError("Size of available data is not a " 

240 "multiple of the data-type size.") 

241 size = bytes // _dbytes 

242 shape = (size,) 

243 else: 

244 if not isinstance(shape, tuple): 

245 shape = (shape,) 

246 size = np.intp(1) # avoid default choice of np.int_, which might overflow 

247 for k in shape: 

248 size *= k 

249 

250 bytes = int(offset + size*_dbytes) 

251 

252 if mode in ('w+', 'r+') and flen < bytes: 

253 fid.seek(bytes - 1, 0) 

254 fid.write(b'\0') 

255 fid.flush() 

256 

257 if mode == 'c': 

258 acc = mmap.ACCESS_COPY 

259 elif mode == 'r': 

260 acc = mmap.ACCESS_READ 

261 else: 

262 acc = mmap.ACCESS_WRITE 

263 

264 start = offset - offset % mmap.ALLOCATIONGRANULARITY 

265 bytes -= start 

266 array_offset = offset - start 

267 mm = mmap.mmap(fid.fileno(), bytes, access=acc, offset=start) 

268 

269 self = ndarray.__new__(subtype, shape, dtype=descr, buffer=mm, 

270 offset=array_offset, order=order) 

271 self._mmap = mm 

272 self.offset = offset 

273 self.mode = mode 

274 

275 if is_pathlib_path(filename): 

276 # special case - if we were constructed with a pathlib.path, 

277 # then filename is a path object, not a string 

278 self.filename = filename.resolve() 

279 elif hasattr(fid, "name") and isinstance(fid.name, str): 

280 # py3 returns int for TemporaryFile().name 

281 self.filename = os.path.abspath(fid.name) 

282 # same as memmap copies (e.g. memmap + 1) 

283 else: 

284 self.filename = None 

285 

286 return self 

287 

288 def __array_finalize__(self, obj): 

289 if hasattr(obj, '_mmap') and np.may_share_memory(self, obj): 

290 self._mmap = obj._mmap 

291 self.filename = obj.filename 

292 self.offset = obj.offset 

293 self.mode = obj.mode 

294 else: 

295 self._mmap = None 

296 self.filename = None 

297 self.offset = None 

298 self.mode = None 

299 

300 def flush(self): 

301 """ 

302 Write any changes in the array to the file on disk. 

303 

304 For further information, see `memmap`. 

305 

306 Parameters 

307 ---------- 

308 None 

309 

310 See Also 

311 -------- 

312 memmap 

313 

314 """ 

315 if self.base is not None and hasattr(self.base, 'flush'): 

316 self.base.flush() 

317 

318 def __array_wrap__(self, arr, context=None): 

319 arr = super().__array_wrap__(arr, context) 

320 

321 # Return a memmap if a memmap was given as the output of the 

322 # ufunc. Leave the arr class unchanged if self is not a memmap 

323 # to keep original memmap subclasses behavior 

324 if self is arr or type(self) is not memmap: 

325 return arr 

326 # Return scalar instead of 0d memmap, e.g. for np.sum with 

327 # axis=None 

328 if arr.shape == (): 

329 return arr[()] 

330 # Return ndarray otherwise 

331 return arr.view(np.ndarray) 

332 

333 def __getitem__(self, index): 

334 res = super().__getitem__(index) 

335 if type(res) is memmap and res._mmap is None: 

336 return res.view(type=ndarray) 

337 return res