Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/io/excel/_pyxlsb.py: 24%

56 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1# pyright: reportMissingImports=false 

2from __future__ import annotations 

3 

4from pandas._typing import ( 

5 FilePath, 

6 ReadBuffer, 

7 Scalar, 

8 StorageOptions, 

9) 

10from pandas.compat._optional import import_optional_dependency 

11from pandas.util._decorators import doc 

12 

13from pandas.core.shared_docs import _shared_docs 

14 

15from pandas.io.excel._base import BaseExcelReader 

16 

17 

18class PyxlsbReader(BaseExcelReader): 

19 @doc(storage_options=_shared_docs["storage_options"]) 

20 def __init__( 

21 self, 

22 filepath_or_buffer: FilePath | ReadBuffer[bytes], 

23 storage_options: StorageOptions = None, 

24 ) -> None: 

25 """ 

26 Reader using pyxlsb engine. 

27 

28 Parameters 

29 ---------- 

30 filepath_or_buffer : str, path object, or Workbook 

31 Object to be parsed. 

32 {storage_options} 

33 """ 

34 import_optional_dependency("pyxlsb") 

35 # This will call load_workbook on the filepath or buffer 

36 # And set the result to the book-attribute 

37 super().__init__(filepath_or_buffer, storage_options=storage_options) 

38 

39 @property 

40 def _workbook_class(self): 

41 from pyxlsb import Workbook 

42 

43 return Workbook 

44 

45 def load_workbook(self, filepath_or_buffer: FilePath | ReadBuffer[bytes]): 

46 from pyxlsb import open_workbook 

47 

48 # TODO: hack in buffer capability 

49 # This might need some modifications to the Pyxlsb library 

50 # Actual work for opening it is in xlsbpackage.py, line 20-ish 

51 

52 return open_workbook(filepath_or_buffer) 

53 

54 @property 

55 def sheet_names(self) -> list[str]: 

56 return self.book.sheets 

57 

58 def get_sheet_by_name(self, name: str): 

59 self.raise_if_bad_sheet_by_name(name) 

60 return self.book.get_sheet(name) 

61 

62 def get_sheet_by_index(self, index: int): 

63 self.raise_if_bad_sheet_by_index(index) 

64 # pyxlsb sheets are indexed from 1 onwards 

65 # There's a fix for this in the source, but the pypi package doesn't have it 

66 return self.book.get_sheet(index + 1) 

67 

68 def _convert_cell(self, cell, convert_float: bool) -> Scalar: 

69 # TODO: there is no way to distinguish between floats and datetimes in pyxlsb 

70 # This means that there is no way to read datetime types from an xlsb file yet 

71 if cell.v is None: 

72 return "" # Prevents non-named columns from not showing up as Unnamed: i 

73 if isinstance(cell.v, float) and convert_float: 

74 val = int(cell.v) 

75 if val == cell.v: 

76 return val 

77 else: 

78 return float(cell.v) 

79 

80 return cell.v 

81 

82 def get_sheet_data( 

83 self, 

84 sheet, 

85 convert_float: bool, 

86 file_rows_needed: int | None = None, 

87 ) -> list[list[Scalar]]: 

88 data: list[list[Scalar]] = [] 

89 prevous_row_number = -1 

90 # When sparse=True the rows can have different lengths and empty rows are 

91 # not returned. The cells are namedtuples of row, col, value (r, c, v). 

92 for row in sheet.rows(sparse=True): 

93 row_number = row[0].r 

94 converted_row = [self._convert_cell(cell, convert_float) for cell in row] 

95 while converted_row and converted_row[-1] == "": 

96 # trim trailing empty elements 

97 converted_row.pop() 

98 if converted_row: 

99 data.extend([[]] * (row_number - prevous_row_number - 1)) 

100 data.append(converted_row) 

101 prevous_row_number = row_number 

102 if file_rows_needed is not None and len(data) >= file_rows_needed: 

103 break 

104 if data: 

105 # extend rows to max_width 

106 max_width = max(len(data_row) for data_row in data) 

107 if min(len(data_row) for data_row in data) < max_width: 

108 empty_cell: list[Scalar] = [""] 

109 data = [ 

110 data_row + (max_width - len(data_row)) * empty_cell 

111 for data_row in data 

112 ] 

113 return data