Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/openpyxl/worksheet/_reader.py: 21%

306 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1# Copyright (c) 2010-2022 openpyxl 

2 

3"""Reader for a single worksheet.""" 

4from copy import copy 

5from warnings import warn 

6 

7# compatibility imports 

8from openpyxl.xml.functions import iterparse 

9 

10# package imports 

11from openpyxl.cell import Cell, MergedCell 

12from openpyxl.cell.text import Text 

13from openpyxl.worksheet.dimensions import ( 

14 ColumnDimension, 

15 RowDimension, 

16 SheetFormatProperties, 

17) 

18 

19from openpyxl.xml.constants import ( 

20 SHEET_MAIN_NS, 

21 EXT_TYPES, 

22) 

23from openpyxl.formatting.formatting import ConditionalFormatting 

24from openpyxl.formula.translate import Translator 

25from openpyxl.utils import ( 

26 get_column_letter, 

27 coordinate_to_tuple, 

28 ) 

29from openpyxl.utils.datetime import from_excel, from_ISO8601, WINDOWS_EPOCH 

30from openpyxl.descriptors.excel import ExtensionList 

31 

32from .filters import AutoFilter 

33from .header_footer import HeaderFooter 

34from .hyperlink import HyperlinkList 

35from .merge import MergeCells 

36from .page import PageMargins, PrintOptions, PrintPageSetup 

37from .pagebreak import RowBreak, ColBreak 

38from .protection import SheetProtection 

39from .scenario import ScenarioList 

40from .views import SheetViewList 

41from .datavalidation import DataValidationList 

42from .table import TablePartList 

43from .properties import WorksheetProperties 

44from .dimensions import SheetDimension 

45from .related import Related 

46 

47 

48CELL_TAG = '{%s}c' % SHEET_MAIN_NS 

49VALUE_TAG = '{%s}v' % SHEET_MAIN_NS 

50FORMULA_TAG = '{%s}f' % SHEET_MAIN_NS 

51MERGE_TAG = '{%s}mergeCells' % SHEET_MAIN_NS 

52INLINE_STRING = "{%s}is" % SHEET_MAIN_NS 

53COL_TAG = '{%s}col' % SHEET_MAIN_NS 

54ROW_TAG = '{%s}row' % SHEET_MAIN_NS 

55CF_TAG = '{%s}conditionalFormatting' % SHEET_MAIN_NS 

56LEGACY_TAG = '{%s}legacyDrawing' % SHEET_MAIN_NS 

57PROT_TAG = '{%s}sheetProtection' % SHEET_MAIN_NS 

58EXT_TAG = "{%s}extLst" % SHEET_MAIN_NS 

59HYPERLINK_TAG = "{%s}hyperlinks" % SHEET_MAIN_NS 

60TABLE_TAG = "{%s}tableParts" % SHEET_MAIN_NS 

61PRINT_TAG = '{%s}printOptions' % SHEET_MAIN_NS 

62MARGINS_TAG = '{%s}pageMargins' % SHEET_MAIN_NS 

63PAGE_TAG = '{%s}pageSetup' % SHEET_MAIN_NS 

64HEADER_TAG = '{%s}headerFooter' % SHEET_MAIN_NS 

65FILTER_TAG = '{%s}autoFilter' % SHEET_MAIN_NS 

66VALIDATION_TAG = '{%s}dataValidations' % SHEET_MAIN_NS 

67PROPERTIES_TAG = '{%s}sheetPr' % SHEET_MAIN_NS 

68VIEWS_TAG = '{%s}sheetViews' % SHEET_MAIN_NS 

69FORMAT_TAG = '{%s}sheetFormatPr' % SHEET_MAIN_NS 

70ROW_BREAK_TAG = '{%s}rowBreaks' % SHEET_MAIN_NS 

71COL_BREAK_TAG = '{%s}colBreaks' % SHEET_MAIN_NS 

72SCENARIOS_TAG = '{%s}scenarios' % SHEET_MAIN_NS 

73DATA_TAG = '{%s}sheetData' % SHEET_MAIN_NS 

74DIMENSION_TAG = '{%s}dimension' % SHEET_MAIN_NS 

75CUSTOM_VIEWS_TAG = '{%s}customSheetViews' % SHEET_MAIN_NS 

76 

77 

78def _cast_number(value): 

79 "Convert numbers as string to an int or float" 

80 if "." in value or "E" in value or "e" in value: 

81 return float(value) 

82 return int(value) 

83 

84 

85class WorkSheetParser(object): 

86 

87 def __init__(self, src, shared_strings, data_only=False, 

88 epoch=WINDOWS_EPOCH, date_formats=set(), 

89 timedelta_formats=set()): 

90 self.min_row = self.min_col = None 

91 self.epoch = epoch 

92 self.source = src 

93 self.shared_strings = shared_strings 

94 self.data_only = data_only 

95 self.shared_formulae = {} 

96 self.array_formulae = {} 

97 self.row_counter = self.col_counter = 0 

98 self.tables = TablePartList() 

99 self.date_formats = date_formats 

100 self.timedelta_formats = timedelta_formats 

101 self.row_dimensions = {} 

102 self.column_dimensions = {} 

103 self.number_formats = [] 

104 self.keep_vba = False 

105 self.hyperlinks = HyperlinkList() 

106 self.formatting = [] 

107 self.legacy_drawing = None 

108 self.merged_cells = None 

109 self.row_breaks = RowBreak() 

110 self.col_breaks = ColBreak() 

111 

112 

113 def parse(self): 

114 dispatcher = { 

115 COL_TAG: self.parse_column_dimensions, 

116 PROT_TAG: self.parse_sheet_protection, 

117 EXT_TAG: self.parse_extensions, 

118 CF_TAG: self.parse_formatting, 

119 LEGACY_TAG: self.parse_legacy, 

120 ROW_BREAK_TAG: self.parse_row_breaks, 

121 COL_BREAK_TAG: self.parse_col_breaks, 

122 CUSTOM_VIEWS_TAG: self.parse_custom_views, 

123 } 

124 

125 properties = { 

126 PRINT_TAG: ('print_options', PrintOptions), 

127 MARGINS_TAG: ('page_margins', PageMargins), 

128 PAGE_TAG: ('page_setup', PrintPageSetup), 

129 HEADER_TAG: ('HeaderFooter', HeaderFooter), 

130 FILTER_TAG: ('auto_filter', AutoFilter), 

131 VALIDATION_TAG: ('data_validations', DataValidationList), 

132 PROPERTIES_TAG: ('sheet_properties', WorksheetProperties), 

133 VIEWS_TAG: ('views', SheetViewList), 

134 FORMAT_TAG: ('sheet_format', SheetFormatProperties), 

135 SCENARIOS_TAG: ('scenarios', ScenarioList), 

136 TABLE_TAG: ('tables', TablePartList), 

137 HYPERLINK_TAG: ('hyperlinks', HyperlinkList), 

138 MERGE_TAG: ('merged_cells', MergeCells), 

139 

140 } 

141 

142 it = iterparse(self.source) # add a finaliser to close the source when this becomes possible 

143 

144 for _, element in it: 

145 tag_name = element.tag 

146 if tag_name in dispatcher: 

147 dispatcher[tag_name](element) 

148 element.clear() 

149 elif tag_name in properties: 

150 prop = properties[tag_name] 

151 obj = prop[1].from_tree(element) 

152 setattr(self, prop[0], obj) 

153 element.clear() 

154 elif tag_name == ROW_TAG: 

155 row = self.parse_row(element) 

156 element.clear() 

157 yield row 

158 

159 

160 def parse_dimensions(self): 

161 """ 

162 Get worksheet dimensions if they are provided. 

163 """ 

164 it = iterparse(self.source) 

165 

166 for _event, element in it: 

167 if element.tag == DIMENSION_TAG: 

168 dim = SheetDimension.from_tree(element) 

169 return dim.boundaries 

170 

171 elif element.tag == DATA_TAG: 

172 # Dimensions missing 

173 break 

174 element.clear() 

175 

176 

177 def parse_cell(self, element): 

178 data_type = element.get('t', 'n') 

179 coordinate = element.get('r') 

180 style_id = element.get('s', 0) 

181 if style_id: 

182 style_id = int(style_id) 

183 

184 if data_type == "inlineStr": 

185 value = None 

186 else: 

187 value = element.findtext(VALUE_TAG, None) or None 

188 

189 if coordinate: 

190 row, column = coordinate_to_tuple(coordinate) 

191 self.col_counter = column 

192 else: 

193 self.col_counter += 1 

194 row, column = self.row_counter, self.col_counter 

195 

196 if not self.data_only and element.find(FORMULA_TAG) is not None: 

197 data_type = 'f' 

198 value = self.parse_formula(element) 

199 

200 elif value is not None: 

201 if data_type == 'n': 

202 value = _cast_number(value) 

203 if style_id in self.date_formats: 

204 data_type = 'd' 

205 try: 

206 value = from_excel( 

207 value, self.epoch, timedelta=style_id in self.timedelta_formats 

208 ) 

209 except (OverflowError, ValueError): 

210 msg = f"""Cell {coordinate} is marked as a date but the serial value {value} is outside the limits for dates. The cell will be treated as an error.""" 

211 warn(msg) 

212 data_type = "e" 

213 value = "#VALUE!" 

214 elif data_type == 's': 

215 value = self.shared_strings[int(value)] 

216 elif data_type == 'b': 

217 value = bool(int(value)) 

218 elif data_type == "str": 

219 data_type = "s" 

220 elif data_type == 'd': 

221 value = from_ISO8601(value) 

222 

223 elif data_type == 'inlineStr': 

224 child = element.find(INLINE_STRING) 

225 if child is not None: 

226 data_type = 's' 

227 richtext = Text.from_tree(child) 

228 value = richtext.content 

229 

230 return {'row':row, 'column':column, 'value':value, 'data_type':data_type, 'style_id':style_id} 

231 

232 

233 def parse_formula(self, element): 

234 """ 

235 possible formulae types: shared, array, datatable 

236 """ 

237 formula = element.find(FORMULA_TAG) 

238 formula_type = formula.get('t') 

239 coordinate = element.get('r') 

240 value = "=" 

241 if formula.text is not None: 

242 value += formula.text 

243 

244 if formula_type == "array": 

245 self.array_formulae[coordinate] = dict(formula.attrib) 

246 

247 elif formula_type == "shared": 

248 idx = formula.get('si') 

249 if idx in self.shared_formulae: 

250 trans = self.shared_formulae[idx] 

251 value = trans.translate_formula(coordinate) 

252 elif value != "=": 

253 self.shared_formulae[idx] = Translator(value, coordinate) 

254 

255 return value 

256 

257 

258 def parse_column_dimensions(self, col): 

259 attrs = dict(col.attrib) 

260 column = get_column_letter(int(attrs['min'])) 

261 attrs['index'] = column 

262 self.column_dimensions[column] = attrs 

263 

264 

265 def parse_row(self, row): 

266 attrs = dict(row.attrib) 

267 

268 if "r" in attrs: 

269 try: 

270 self.row_counter = int(attrs['r']) 

271 except ValueError: 

272 val = float(attrs['r']) 

273 if val.is_integer(): 

274 self.row_counter = int(val) 

275 else: 

276 raise ValueError(f"{attrs['r']} is not a valid row number") 

277 else: 

278 self.row_counter += 1 

279 self.col_counter = 0 

280 

281 keys = {k for k in attrs if not k.startswith('{')} 

282 if keys - {'r', 'spans'}: 

283 # don't create dimension objects unless they have relevant information 

284 self.row_dimensions[str(self.row_counter)] = attrs 

285 

286 cells = [self.parse_cell(el) for el in row] 

287 return self.row_counter, cells 

288 

289 

290 def parse_formatting(self, element): 

291 try: 

292 cf = ConditionalFormatting.from_tree(element) 

293 self.formatting.append(cf) 

294 except TypeError as e: 

295 msg = f"Failed to load a conditional formatting rule. It will be discarded. Cause: {e}" 

296 warn(msg) 

297 

298 

299 def parse_sheet_protection(self, element): 

300 protection = SheetProtection.from_tree(element) 

301 password = element.get("password") 

302 if password is not None: 

303 protection.set_password(password, True) 

304 self.protection = protection 

305 

306 

307 def parse_extensions(self, element): 

308 extLst = ExtensionList.from_tree(element) 

309 for e in extLst.ext: 

310 ext_type = EXT_TYPES.get(e.uri.upper(), "Unknown") 

311 msg = "{0} extension is not supported and will be removed".format(ext_type) 

312 warn(msg) 

313 

314 

315 def parse_legacy(self, element): 

316 obj = Related.from_tree(element) 

317 self.legacy_drawing = obj.id 

318 

319 

320 def parse_row_breaks(self, element): 

321 brk = RowBreak.from_tree(element) 

322 self.row_breaks = brk 

323 

324 

325 def parse_col_breaks(self, element): 

326 brk = ColBreak.from_tree(element) 

327 self.col_breaks = brk 

328 

329 

330 def parse_custom_views(self, element): 

331 # clear page_breaks to avoid duplication which Excel doesn't like 

332 # basically they're ignored in custom views 

333 self.row_breaks = RowBreak() 

334 self.col_breaks = ColBreak() 

335 

336 

337class WorksheetReader(object): 

338 """ 

339 Create a parser and apply it to a workbook 

340 """ 

341 

342 def __init__(self, ws, xml_source, shared_strings, data_only): 

343 self.ws = ws 

344 self.parser = WorkSheetParser(xml_source, shared_strings, 

345 data_only, ws.parent.epoch, ws.parent._date_formats, 

346 ws.parent._timedelta_formats) 

347 self.tables = [] 

348 

349 

350 def bind_cells(self): 

351 for idx, row in self.parser.parse(): 

352 for cell in row: 

353 style = self.ws.parent._cell_styles[cell['style_id']] 

354 c = Cell(self.ws, row=cell['row'], column=cell['column'], style_array=style) 

355 c._value = cell['value'] 

356 c.data_type = cell['data_type'] 

357 self.ws._cells[(cell['row'], cell['column'])] = c 

358 self.ws.formula_attributes = self.parser.array_formulae 

359 if self.ws._cells: 

360 self.ws._current_row = self.ws.max_row # use cells not row dimensions 

361 

362 

363 def bind_formatting(self): 

364 for cf in self.parser.formatting: 

365 for rule in cf.rules: 

366 if rule.dxfId is not None: 

367 rule.dxf = self.ws.parent._differential_styles[rule.dxfId] 

368 self.ws.conditional_formatting[cf] = rule 

369 

370 

371 def bind_tables(self): 

372 for t in self.parser.tables.tablePart: 

373 rel = self.ws._rels[t.id] 

374 self.tables.append(rel.Target) 

375 

376 

377 def bind_merged_cells(self): 

378 from openpyxl.worksheet.cell_range import MultiCellRange 

379 from openpyxl.worksheet.merge import MergedCellRange 

380 if not self.parser.merged_cells: 

381 return 

382 

383 ranges = [] 

384 for cr in self.parser.merged_cells.mergeCell: 

385 mcr = MergedCellRange(self.ws, cr.ref) 

386 self.ws._clean_merge_range(mcr) 

387 ranges.append(mcr) 

388 self.ws.merged_cells = MultiCellRange(ranges) 

389 

390 

391 def bind_hyperlinks(self): 

392 for link in self.parser.hyperlinks.hyperlink: 

393 if link.id: 

394 rel = self.ws._rels[link.id] 

395 link.target = rel.Target 

396 if ":" in link.ref: 

397 # range of cells 

398 for row in self.ws[link.ref]: 

399 for cell in row: 

400 try: 

401 cell.hyperlink = copy(link) 

402 except AttributeError: 

403 pass 

404 else: 

405 cell = self.ws[link.ref] 

406 if isinstance(cell, MergedCell): 

407 cell = self.normalize_merged_cell_link(cell.coordinate) 

408 cell.hyperlink = link 

409 

410 def normalize_merged_cell_link(self, coord): 

411 """ 

412 Returns the appropriate cell to which a hyperlink, which references a merged cell at the specified coordinates, 

413 should be bound. 

414 """ 

415 for rng in self.ws.merged_cells: 

416 if coord in rng: 

417 return self.ws.cell(*rng.top[0]) 

418 

419 def bind_col_dimensions(self): 

420 for col, cd in self.parser.column_dimensions.items(): 

421 if 'style' in cd: 

422 key = int(cd['style']) 

423 cd['style'] = self.ws.parent._cell_styles[key] 

424 self.ws.column_dimensions[col] = ColumnDimension(self.ws, **cd) 

425 

426 

427 def bind_row_dimensions(self): 

428 for row, rd in self.parser.row_dimensions.items(): 

429 if 's' in rd: 

430 key = int(rd['s']) 

431 rd['s'] = self.ws.parent._cell_styles[key] 

432 self.ws.row_dimensions[int(row)] = RowDimension(self.ws, **rd) 

433 

434 

435 def bind_properties(self): 

436 for k in ('print_options', 'page_margins', 'page_setup', 

437 'HeaderFooter', 'auto_filter', 'data_validations', 

438 'sheet_properties', 'views', 'sheet_format', 

439 'row_breaks', 'col_breaks', 'scenarios', 'legacy_drawing', 

440 'protection', 

441 ): 

442 v = getattr(self.parser, k, None) 

443 if v is not None: 

444 setattr(self.ws, k, v) 

445 

446 

447 def bind_all(self): 

448 self.bind_cells() 

449 self.bind_merged_cells() 

450 self.bind_hyperlinks() 

451 self.bind_formatting() 

452 self.bind_col_dimensions() 

453 self.bind_row_dimensions() 

454 self.bind_tables() 

455 self.bind_properties()