Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/io/feather_format.py: 29%

32 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" feather-format compat """ 

2from __future__ import annotations 

3 

4from typing import ( 

5 Hashable, 

6 Sequence, 

7) 

8 

9from pandas._typing import ( 

10 FilePath, 

11 ReadBuffer, 

12 StorageOptions, 

13 WriteBuffer, 

14) 

15from pandas.compat._optional import import_optional_dependency 

16from pandas.util._decorators import doc 

17 

18from pandas.core.api import ( 

19 DataFrame, 

20 Int64Index, 

21 RangeIndex, 

22) 

23from pandas.core.shared_docs import _shared_docs 

24 

25from pandas.io.common import get_handle 

26 

27 

28@doc(storage_options=_shared_docs["storage_options"]) 

29def to_feather( 

30 df: DataFrame, 

31 path: FilePath | WriteBuffer[bytes], 

32 storage_options: StorageOptions = None, 

33 **kwargs, 

34) -> None: 

35 """ 

36 Write a DataFrame to the binary Feather format. 

37 

38 Parameters 

39 ---------- 

40 df : DataFrame 

41 path : str, path object, or file-like object 

42 {storage_options} 

43 

44 .. versionadded:: 1.2.0 

45 

46 **kwargs : 

47 Additional keywords passed to `pyarrow.feather.write_feather`. 

48 

49 .. versionadded:: 1.1.0 

50 """ 

51 import_optional_dependency("pyarrow") 

52 from pyarrow import feather 

53 

54 if not isinstance(df, DataFrame): 

55 raise ValueError("feather only support IO with DataFrames") 

56 

57 valid_types = {"string", "unicode"} 

58 

59 # validate index 

60 # -------------- 

61 

62 # validate that we have only a default index 

63 # raise on anything else as we don't serialize the index 

64 

65 if not isinstance(df.index, (Int64Index, RangeIndex)): 

66 typ = type(df.index) 

67 raise ValueError( 

68 f"feather does not support serializing {typ} " 

69 "for the index; you can .reset_index() to make the index into column(s)" 

70 ) 

71 

72 if not df.index.equals(RangeIndex.from_range(range(len(df)))): 

73 raise ValueError( 

74 "feather does not support serializing a non-default index for the index; " 

75 "you can .reset_index() to make the index into column(s)" 

76 ) 

77 

78 if df.index.name is not None: 

79 raise ValueError( 

80 "feather does not serialize index meta-data on a default index" 

81 ) 

82 

83 # validate columns 

84 # ---------------- 

85 

86 # must have value column names (strings only) 

87 if df.columns.inferred_type not in valid_types: 

88 raise ValueError("feather must have string column names") 

89 

90 with get_handle( 

91 path, "wb", storage_options=storage_options, is_text=False 

92 ) as handles: 

93 feather.write_feather(df, handles.handle, **kwargs) 

94 

95 

96@doc(storage_options=_shared_docs["storage_options"]) 

97def read_feather( 

98 path: FilePath | ReadBuffer[bytes], 

99 columns: Sequence[Hashable] | None = None, 

100 use_threads: bool = True, 

101 storage_options: StorageOptions = None, 

102): 

103 """ 

104 Load a feather-format object from the file path. 

105 

106 Parameters 

107 ---------- 

108 path : str, path object, or file-like object 

109 String, path object (implementing ``os.PathLike[str]``), or file-like 

110 object implementing a binary ``read()`` function. The string could be a URL. 

111 Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is 

112 expected. A local file could be: ``file://localhost/path/to/table.feather``. 

113 columns : sequence, default None 

114 If not provided, all columns are read. 

115 use_threads : bool, default True 

116 Whether to parallelize reading using multiple threads. 

117 {storage_options} 

118 

119 .. versionadded:: 1.2.0 

120 

121 Returns 

122 ------- 

123 type of object stored in file 

124 """ 

125 import_optional_dependency("pyarrow") 

126 from pyarrow import feather 

127 

128 with get_handle( 

129 path, "rb", storage_options=storage_options, is_text=False 

130 ) as handles: 

131 

132 return feather.read_feather( 

133 handles.handle, columns=columns, use_threads=bool(use_threads) 

134 )