Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/io/pickle.py: 38%

30 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" pickle compat """ 

2from __future__ import annotations 

3 

4import pickle 

5from typing import Any 

6import warnings 

7 

8from pandas._typing import ( 

9 CompressionOptions, 

10 FilePath, 

11 ReadPickleBuffer, 

12 StorageOptions, 

13 WriteBuffer, 

14) 

15from pandas.compat import pickle_compat as pc 

16from pandas.util._decorators import doc 

17 

18from pandas.core.shared_docs import _shared_docs 

19 

20from pandas.io.common import get_handle 

21 

22 

23@doc( 

24 storage_options=_shared_docs["storage_options"], 

25 compression_options=_shared_docs["compression_options"] % "filepath_or_buffer", 

26) 

27def to_pickle( 

28 obj: Any, 

29 filepath_or_buffer: FilePath | WriteBuffer[bytes], 

30 compression: CompressionOptions = "infer", 

31 protocol: int = pickle.HIGHEST_PROTOCOL, 

32 storage_options: StorageOptions = None, 

33) -> None: 

34 """ 

35 Pickle (serialize) object to file. 

36 

37 Parameters 

38 ---------- 

39 obj : any object 

40 Any python object. 

41 filepath_or_buffer : str, path object, or file-like object 

42 String, path object (implementing ``os.PathLike[str]``), or file-like 

43 object implementing a binary ``write()`` function. 

44 

45 .. versionchanged:: 1.0.0 

46 Accept URL. URL has to be of S3 or GCS. 

47 {compression_options} 

48 

49 .. versionchanged:: 1.4.0 Zstandard support. 

50 

51 protocol : int 

52 Int which indicates which protocol should be used by the pickler, 

53 default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible 

54 values for this parameter depend on the version of Python. For Python 

55 2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value. 

56 For Python >= 3.4, 4 is a valid value. A negative value for the 

57 protocol parameter is equivalent to setting its value to 

58 HIGHEST_PROTOCOL. 

59 

60 {storage_options} 

61 

62 .. versionadded:: 1.2.0 

63 

64 .. [1] https://docs.python.org/3/library/pickle.html 

65 

66 See Also 

67 -------- 

68 read_pickle : Load pickled pandas object (or any object) from file. 

69 DataFrame.to_hdf : Write DataFrame to an HDF5 file. 

70 DataFrame.to_sql : Write DataFrame to a SQL database. 

71 DataFrame.to_parquet : Write a DataFrame to the binary parquet format. 

72 

73 Examples 

74 -------- 

75 >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP 

76 >>> original_df # doctest: +SKIP 

77 foo bar 

78 0 0 5 

79 1 1 6 

80 2 2 7 

81 3 3 8 

82 4 4 9 

83 >>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP 

84 

85 >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP 

86 >>> unpickled_df # doctest: +SKIP 

87 foo bar 

88 0 0 5 

89 1 1 6 

90 2 2 7 

91 3 3 8 

92 4 4 9 

93 """ # noqa: E501 

94 if protocol < 0: 

95 protocol = pickle.HIGHEST_PROTOCOL 

96 

97 with get_handle( 

98 filepath_or_buffer, 

99 "wb", 

100 compression=compression, 

101 is_text=False, 

102 storage_options=storage_options, 

103 ) as handles: 

104 if handles.compression["method"] in ("bz2", "xz") and protocol >= 5: 

105 # some weird TypeError GH#39002 with pickle 5: fallback to letting 

106 # pickle create the entire object and then write it to the buffer. 

107 # "zip" would also be here if pandas.io.common._BytesZipFile 

108 # wouldn't buffer write calls 

109 handles.handle.write(pickle.dumps(obj, protocol=protocol)) 

110 else: 

111 # letting pickle write directly to the buffer is more memory-efficient 

112 pickle.dump(obj, handles.handle, protocol=protocol) 

113 

114 

115@doc( 

116 storage_options=_shared_docs["storage_options"], 

117 decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer", 

118) 

119def read_pickle( 

120 filepath_or_buffer: FilePath | ReadPickleBuffer, 

121 compression: CompressionOptions = "infer", 

122 storage_options: StorageOptions = None, 

123): 

124 """ 

125 Load pickled pandas object (or any object) from file. 

126 

127 .. warning:: 

128 

129 Loading pickled data received from untrusted sources can be 

130 unsafe. See `here <https://docs.python.org/3/library/pickle.html>`__. 

131 

132 Parameters 

133 ---------- 

134 filepath_or_buffer : str, path object, or file-like object 

135 String, path object (implementing ``os.PathLike[str]``), or file-like 

136 object implementing a binary ``readlines()`` function. 

137 

138 .. versionchanged:: 1.0.0 

139 Accept URL. URL is not limited to S3 and GCS. 

140 

141 {decompression_options} 

142 

143 .. versionchanged:: 1.4.0 Zstandard support. 

144 

145 {storage_options} 

146 

147 .. versionadded:: 1.2.0 

148 

149 Returns 

150 ------- 

151 unpickled : same type as object stored in file 

152 

153 See Also 

154 -------- 

155 DataFrame.to_pickle : Pickle (serialize) DataFrame object to file. 

156 Series.to_pickle : Pickle (serialize) Series object to file. 

157 read_hdf : Read HDF5 file into a DataFrame. 

158 read_sql : Read SQL query or database table into a DataFrame. 

159 read_parquet : Load a parquet object, returning a DataFrame. 

160 

161 Notes 

162 ----- 

163 read_pickle is only guaranteed to be backwards compatible to pandas 0.20.3 

164 provided the object was serialized with to_pickle. 

165 

166 Examples 

167 -------- 

168 >>> original_df = pd.DataFrame( 

169 ... {{"foo": range(5), "bar": range(5, 10)}} 

170 ... ) # doctest: +SKIP 

171 >>> original_df # doctest: +SKIP 

172 foo bar 

173 0 0 5 

174 1 1 6 

175 2 2 7 

176 3 3 8 

177 4 4 9 

178 >>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP 

179 

180 >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP 

181 >>> unpickled_df # doctest: +SKIP 

182 foo bar 

183 0 0 5 

184 1 1 6 

185 2 2 7 

186 3 3 8 

187 4 4 9 

188 """ 

189 excs_to_catch = (AttributeError, ImportError, ModuleNotFoundError, TypeError) 

190 with get_handle( 

191 filepath_or_buffer, 

192 "rb", 

193 compression=compression, 

194 is_text=False, 

195 storage_options=storage_options, 

196 ) as handles: 

197 

198 # 1) try standard library Pickle 

199 # 2) try pickle_compat (older pandas version) to handle subclass changes 

200 # 3) try pickle_compat with latin-1 encoding upon a UnicodeDecodeError 

201 

202 try: 

203 # TypeError for Cython complaints about object.__new__ vs Tick.__new__ 

204 try: 

205 with warnings.catch_warnings(record=True): 

206 # We want to silence any warnings about, e.g. moved modules. 

207 warnings.simplefilter("ignore", Warning) 

208 return pickle.load(handles.handle) 

209 except excs_to_catch: 

210 # e.g. 

211 # "No module named 'pandas.core.sparse.series'" 

212 # "Can't get attribute '__nat_unpickle' on <module 'pandas._libs.tslib" 

213 return pc.load(handles.handle, encoding=None) 

214 except UnicodeDecodeError: 

215 # e.g. can occur for files written in py27; see GH#28645 and GH#31988 

216 return pc.load(handles.handle, encoding="latin-1")