Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/io/excel/_xlrd.py: 25%

62 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3from datetime import time 

4 

5import numpy as np 

6 

7from pandas._typing import ( 

8 Scalar, 

9 StorageOptions, 

10) 

11from pandas.compat._optional import import_optional_dependency 

12from pandas.util._decorators import doc 

13 

14from pandas.core.shared_docs import _shared_docs 

15 

16from pandas.io.excel._base import BaseExcelReader 

17 

18 

19class XlrdReader(BaseExcelReader): 

20 @doc(storage_options=_shared_docs["storage_options"]) 

21 def __init__( 

22 self, filepath_or_buffer, storage_options: StorageOptions = None 

23 ) -> None: 

24 """ 

25 Reader using xlrd engine. 

26 

27 Parameters 

28 ---------- 

29 filepath_or_buffer : str, path object or Workbook 

30 Object to be parsed. 

31 {storage_options} 

32 """ 

33 err_msg = "Install xlrd >= 1.0.0 for Excel support" 

34 import_optional_dependency("xlrd", extra=err_msg) 

35 super().__init__(filepath_or_buffer, storage_options=storage_options) 

36 

37 @property 

38 def _workbook_class(self): 

39 from xlrd import Book 

40 

41 return Book 

42 

43 def load_workbook(self, filepath_or_buffer): 

44 from xlrd import open_workbook 

45 

46 if hasattr(filepath_or_buffer, "read"): 

47 data = filepath_or_buffer.read() 

48 return open_workbook(file_contents=data) 

49 else: 

50 return open_workbook(filepath_or_buffer) 

51 

52 @property 

53 def sheet_names(self): 

54 return self.book.sheet_names() 

55 

56 def get_sheet_by_name(self, name): 

57 self.raise_if_bad_sheet_by_name(name) 

58 return self.book.sheet_by_name(name) 

59 

60 def get_sheet_by_index(self, index): 

61 self.raise_if_bad_sheet_by_index(index) 

62 return self.book.sheet_by_index(index) 

63 

64 def get_sheet_data( 

65 self, sheet, convert_float: bool, file_rows_needed: int | None = None 

66 ) -> list[list[Scalar]]: 

67 from xlrd import ( 

68 XL_CELL_BOOLEAN, 

69 XL_CELL_DATE, 

70 XL_CELL_ERROR, 

71 XL_CELL_NUMBER, 

72 xldate, 

73 ) 

74 

75 epoch1904 = self.book.datemode 

76 

77 def _parse_cell(cell_contents, cell_typ): 

78 """ 

79 converts the contents of the cell into a pandas appropriate object 

80 """ 

81 if cell_typ == XL_CELL_DATE: 

82 

83 # Use the newer xlrd datetime handling. 

84 try: 

85 cell_contents = xldate.xldate_as_datetime(cell_contents, epoch1904) 

86 except OverflowError: 

87 return cell_contents 

88 

89 # Excel doesn't distinguish between dates and time, 

90 # so we treat dates on the epoch as times only. 

91 # Also, Excel supports 1900 and 1904 epochs. 

92 year = (cell_contents.timetuple())[0:3] 

93 if (not epoch1904 and year == (1899, 12, 31)) or ( 

94 epoch1904 and year == (1904, 1, 1) 

95 ): 

96 cell_contents = time( 

97 cell_contents.hour, 

98 cell_contents.minute, 

99 cell_contents.second, 

100 cell_contents.microsecond, 

101 ) 

102 

103 elif cell_typ == XL_CELL_ERROR: 

104 cell_contents = np.nan 

105 elif cell_typ == XL_CELL_BOOLEAN: 

106 cell_contents = bool(cell_contents) 

107 elif convert_float and cell_typ == XL_CELL_NUMBER: 

108 # GH5394 - Excel 'numbers' are always floats 

109 # it's a minimal perf hit and less surprising 

110 val = int(cell_contents) 

111 if val == cell_contents: 

112 cell_contents = val 

113 return cell_contents 

114 

115 data = [] 

116 

117 nrows = sheet.nrows 

118 if file_rows_needed is not None: 

119 nrows = min(nrows, file_rows_needed) 

120 for i in range(nrows): 

121 row = [ 

122 _parse_cell(value, typ) 

123 for value, typ in zip(sheet.row_values(i), sheet.row_types(i)) 

124 ] 

125 data.append(row) 

126 

127 return data