Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/io/sas/sasreader.py: 36%

51 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" 

2Read SAS sas7bdat or xport files. 

3""" 

4from __future__ import annotations 

5 

6from abc import ( 

7 ABCMeta, 

8 abstractmethod, 

9) 

10from typing import ( 

11 TYPE_CHECKING, 

12 Hashable, 

13 overload, 

14) 

15 

16from pandas._typing import ( 

17 CompressionOptions, 

18 FilePath, 

19 ReadBuffer, 

20) 

21from pandas.util._decorators import ( 

22 deprecate_nonkeyword_arguments, 

23 doc, 

24) 

25 

26from pandas.core.shared_docs import _shared_docs 

27 

28from pandas.io.common import stringify_path 

29 

30if TYPE_CHECKING: 30 ↛ 31line 30 didn't jump to line 31, because the condition on line 30 was never true

31 from pandas import DataFrame 

32 

33 

34# TODO(PY38): replace with Protocol in Python 3.8 

35class ReaderBase(metaclass=ABCMeta): 

36 """ 

37 Protocol for XportReader and SAS7BDATReader classes. 

38 """ 

39 

40 @abstractmethod 

41 def read(self, nrows: int | None = None) -> DataFrame: 

42 pass 

43 

44 @abstractmethod 

45 def close(self) -> None: 

46 pass 

47 

48 def __enter__(self) -> ReaderBase: 

49 return self 

50 

51 def __exit__(self, exc_type, exc_value, traceback) -> None: 

52 self.close() 

53 

54 

55@overload 

56def read_sas( 

57 filepath_or_buffer: FilePath | ReadBuffer[bytes], 

58 format: str | None = ..., 

59 index: Hashable | None = ..., 

60 encoding: str | None = ..., 

61 chunksize: int = ..., 

62 iterator: bool = ..., 

63 compression: CompressionOptions = ..., 

64) -> ReaderBase: 

65 ... 

66 

67 

68@overload 

69def read_sas( 

70 filepath_or_buffer: FilePath | ReadBuffer[bytes], 

71 format: str | None = ..., 

72 index: Hashable | None = ..., 

73 encoding: str | None = ..., 

74 chunksize: None = ..., 

75 iterator: bool = ..., 

76 compression: CompressionOptions = ..., 

77) -> DataFrame | ReaderBase: 

78 ... 

79 

80 

81@deprecate_nonkeyword_arguments(version=None, allowed_args=["filepath_or_buffer"]) 

82@doc(decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer") 

83def read_sas( 

84 filepath_or_buffer: FilePath | ReadBuffer[bytes], 

85 format: str | None = None, 

86 index: Hashable | None = None, 

87 encoding: str | None = None, 

88 chunksize: int | None = None, 

89 iterator: bool = False, 

90 compression: CompressionOptions = "infer", 

91) -> DataFrame | ReaderBase: 

92 """ 

93 Read SAS files stored as either XPORT or SAS7BDAT format files. 

94 

95 Parameters 

96 ---------- 

97 filepath_or_buffer : str, path object, or file-like object 

98 String, path object (implementing ``os.PathLike[str]``), or file-like 

99 object implementing a binary ``read()`` function. The string could be a URL. 

100 Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is 

101 expected. A local file could be: 

102 ``file://localhost/path/to/table.sas``. 

103 format : str {{'xport', 'sas7bdat'}} or None 

104 If None, file format is inferred from file extension. If 'xport' or 

105 'sas7bdat', uses the corresponding format. 

106 index : identifier of index column, defaults to None 

107 Identifier of column that should be used as index of the DataFrame. 

108 encoding : str, default is None 

109 Encoding for text data. If None, text data are stored as raw bytes. 

110 chunksize : int 

111 Read file `chunksize` lines at a time, returns iterator. 

112 

113 .. versionchanged:: 1.2 

114 

115 ``TextFileReader`` is a context manager. 

116 iterator : bool, defaults to False 

117 If True, returns an iterator for reading the file incrementally. 

118 

119 .. versionchanged:: 1.2 

120 

121 ``TextFileReader`` is a context manager. 

122 {decompression_options} 

123 

124 Returns 

125 ------- 

126 DataFrame if iterator=False and chunksize=None, else SAS7BDATReader 

127 or XportReader 

128 """ 

129 if format is None: 

130 buffer_error_msg = ( 

131 "If this is a buffer object rather " 

132 "than a string name, you must specify a format string" 

133 ) 

134 filepath_or_buffer = stringify_path(filepath_or_buffer) 

135 if not isinstance(filepath_or_buffer, str): 

136 raise ValueError(buffer_error_msg) 

137 fname = filepath_or_buffer.lower() 

138 if ".xpt" in fname: 

139 format = "xport" 

140 elif ".sas7bdat" in fname: 

141 format = "sas7bdat" 

142 else: 

143 raise ValueError( 

144 f"unable to infer format of SAS file from filename: {repr(fname)}" 

145 ) 

146 

147 reader: ReaderBase 

148 if format.lower() == "xport": 

149 from pandas.io.sas.sas_xport import XportReader 

150 

151 reader = XportReader( 

152 filepath_or_buffer, 

153 index=index, 

154 encoding=encoding, 

155 chunksize=chunksize, 

156 compression=compression, 

157 ) 

158 elif format.lower() == "sas7bdat": 

159 from pandas.io.sas.sas7bdat import SAS7BDATReader 

160 

161 reader = SAS7BDATReader( 

162 filepath_or_buffer, 

163 index=index, 

164 encoding=encoding, 

165 chunksize=chunksize, 

166 compression=compression, 

167 ) 

168 else: 

169 raise ValueError("unknown SAS format") 

170 

171 if iterator or chunksize: 

172 return reader 

173 

174 with reader: 

175 return reader.read()