Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/openpyxl/worksheet/_reader.py: 21%
306 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1# Copyright (c) 2010-2022 openpyxl
3"""Reader for a single worksheet."""
4from copy import copy
5from warnings import warn
7# compatibility imports
8from openpyxl.xml.functions import iterparse
10# package imports
11from openpyxl.cell import Cell, MergedCell
12from openpyxl.cell.text import Text
13from openpyxl.worksheet.dimensions import (
14 ColumnDimension,
15 RowDimension,
16 SheetFormatProperties,
17)
19from openpyxl.xml.constants import (
20 SHEET_MAIN_NS,
21 EXT_TYPES,
22)
23from openpyxl.formatting.formatting import ConditionalFormatting
24from openpyxl.formula.translate import Translator
25from openpyxl.utils import (
26 get_column_letter,
27 coordinate_to_tuple,
28 )
29from openpyxl.utils.datetime import from_excel, from_ISO8601, WINDOWS_EPOCH
30from openpyxl.descriptors.excel import ExtensionList
32from .filters import AutoFilter
33from .header_footer import HeaderFooter
34from .hyperlink import HyperlinkList
35from .merge import MergeCells
36from .page import PageMargins, PrintOptions, PrintPageSetup
37from .pagebreak import RowBreak, ColBreak
38from .protection import SheetProtection
39from .scenario import ScenarioList
40from .views import SheetViewList
41from .datavalidation import DataValidationList
42from .table import TablePartList
43from .properties import WorksheetProperties
44from .dimensions import SheetDimension
45from .related import Related
48CELL_TAG = '{%s}c' % SHEET_MAIN_NS
49VALUE_TAG = '{%s}v' % SHEET_MAIN_NS
50FORMULA_TAG = '{%s}f' % SHEET_MAIN_NS
51MERGE_TAG = '{%s}mergeCells' % SHEET_MAIN_NS
52INLINE_STRING = "{%s}is" % SHEET_MAIN_NS
53COL_TAG = '{%s}col' % SHEET_MAIN_NS
54ROW_TAG = '{%s}row' % SHEET_MAIN_NS
55CF_TAG = '{%s}conditionalFormatting' % SHEET_MAIN_NS
56LEGACY_TAG = '{%s}legacyDrawing' % SHEET_MAIN_NS
57PROT_TAG = '{%s}sheetProtection' % SHEET_MAIN_NS
58EXT_TAG = "{%s}extLst" % SHEET_MAIN_NS
59HYPERLINK_TAG = "{%s}hyperlinks" % SHEET_MAIN_NS
60TABLE_TAG = "{%s}tableParts" % SHEET_MAIN_NS
61PRINT_TAG = '{%s}printOptions' % SHEET_MAIN_NS
62MARGINS_TAG = '{%s}pageMargins' % SHEET_MAIN_NS
63PAGE_TAG = '{%s}pageSetup' % SHEET_MAIN_NS
64HEADER_TAG = '{%s}headerFooter' % SHEET_MAIN_NS
65FILTER_TAG = '{%s}autoFilter' % SHEET_MAIN_NS
66VALIDATION_TAG = '{%s}dataValidations' % SHEET_MAIN_NS
67PROPERTIES_TAG = '{%s}sheetPr' % SHEET_MAIN_NS
68VIEWS_TAG = '{%s}sheetViews' % SHEET_MAIN_NS
69FORMAT_TAG = '{%s}sheetFormatPr' % SHEET_MAIN_NS
70ROW_BREAK_TAG = '{%s}rowBreaks' % SHEET_MAIN_NS
71COL_BREAK_TAG = '{%s}colBreaks' % SHEET_MAIN_NS
72SCENARIOS_TAG = '{%s}scenarios' % SHEET_MAIN_NS
73DATA_TAG = '{%s}sheetData' % SHEET_MAIN_NS
74DIMENSION_TAG = '{%s}dimension' % SHEET_MAIN_NS
75CUSTOM_VIEWS_TAG = '{%s}customSheetViews' % SHEET_MAIN_NS
78def _cast_number(value):
79 "Convert numbers as string to an int or float"
80 if "." in value or "E" in value or "e" in value:
81 return float(value)
82 return int(value)
85class WorkSheetParser(object):
87 def __init__(self, src, shared_strings, data_only=False,
88 epoch=WINDOWS_EPOCH, date_formats=set(),
89 timedelta_formats=set()):
90 self.min_row = self.min_col = None
91 self.epoch = epoch
92 self.source = src
93 self.shared_strings = shared_strings
94 self.data_only = data_only
95 self.shared_formulae = {}
96 self.array_formulae = {}
97 self.row_counter = self.col_counter = 0
98 self.tables = TablePartList()
99 self.date_formats = date_formats
100 self.timedelta_formats = timedelta_formats
101 self.row_dimensions = {}
102 self.column_dimensions = {}
103 self.number_formats = []
104 self.keep_vba = False
105 self.hyperlinks = HyperlinkList()
106 self.formatting = []
107 self.legacy_drawing = None
108 self.merged_cells = None
109 self.row_breaks = RowBreak()
110 self.col_breaks = ColBreak()
113 def parse(self):
114 dispatcher = {
115 COL_TAG: self.parse_column_dimensions,
116 PROT_TAG: self.parse_sheet_protection,
117 EXT_TAG: self.parse_extensions,
118 CF_TAG: self.parse_formatting,
119 LEGACY_TAG: self.parse_legacy,
120 ROW_BREAK_TAG: self.parse_row_breaks,
121 COL_BREAK_TAG: self.parse_col_breaks,
122 CUSTOM_VIEWS_TAG: self.parse_custom_views,
123 }
125 properties = {
126 PRINT_TAG: ('print_options', PrintOptions),
127 MARGINS_TAG: ('page_margins', PageMargins),
128 PAGE_TAG: ('page_setup', PrintPageSetup),
129 HEADER_TAG: ('HeaderFooter', HeaderFooter),
130 FILTER_TAG: ('auto_filter', AutoFilter),
131 VALIDATION_TAG: ('data_validations', DataValidationList),
132 PROPERTIES_TAG: ('sheet_properties', WorksheetProperties),
133 VIEWS_TAG: ('views', SheetViewList),
134 FORMAT_TAG: ('sheet_format', SheetFormatProperties),
135 SCENARIOS_TAG: ('scenarios', ScenarioList),
136 TABLE_TAG: ('tables', TablePartList),
137 HYPERLINK_TAG: ('hyperlinks', HyperlinkList),
138 MERGE_TAG: ('merged_cells', MergeCells),
140 }
142 it = iterparse(self.source) # add a finaliser to close the source when this becomes possible
144 for _, element in it:
145 tag_name = element.tag
146 if tag_name in dispatcher:
147 dispatcher[tag_name](element)
148 element.clear()
149 elif tag_name in properties:
150 prop = properties[tag_name]
151 obj = prop[1].from_tree(element)
152 setattr(self, prop[0], obj)
153 element.clear()
154 elif tag_name == ROW_TAG:
155 row = self.parse_row(element)
156 element.clear()
157 yield row
160 def parse_dimensions(self):
161 """
162 Get worksheet dimensions if they are provided.
163 """
164 it = iterparse(self.source)
166 for _event, element in it:
167 if element.tag == DIMENSION_TAG:
168 dim = SheetDimension.from_tree(element)
169 return dim.boundaries
171 elif element.tag == DATA_TAG:
172 # Dimensions missing
173 break
174 element.clear()
177 def parse_cell(self, element):
178 data_type = element.get('t', 'n')
179 coordinate = element.get('r')
180 style_id = element.get('s', 0)
181 if style_id:
182 style_id = int(style_id)
184 if data_type == "inlineStr":
185 value = None
186 else:
187 value = element.findtext(VALUE_TAG, None) or None
189 if coordinate:
190 row, column = coordinate_to_tuple(coordinate)
191 self.col_counter = column
192 else:
193 self.col_counter += 1
194 row, column = self.row_counter, self.col_counter
196 if not self.data_only and element.find(FORMULA_TAG) is not None:
197 data_type = 'f'
198 value = self.parse_formula(element)
200 elif value is not None:
201 if data_type == 'n':
202 value = _cast_number(value)
203 if style_id in self.date_formats:
204 data_type = 'd'
205 try:
206 value = from_excel(
207 value, self.epoch, timedelta=style_id in self.timedelta_formats
208 )
209 except (OverflowError, ValueError):
210 msg = f"""Cell {coordinate} is marked as a date but the serial value {value} is outside the limits for dates. The cell will be treated as an error."""
211 warn(msg)
212 data_type = "e"
213 value = "#VALUE!"
214 elif data_type == 's':
215 value = self.shared_strings[int(value)]
216 elif data_type == 'b':
217 value = bool(int(value))
218 elif data_type == "str":
219 data_type = "s"
220 elif data_type == 'd':
221 value = from_ISO8601(value)
223 elif data_type == 'inlineStr':
224 child = element.find(INLINE_STRING)
225 if child is not None:
226 data_type = 's'
227 richtext = Text.from_tree(child)
228 value = richtext.content
230 return {'row':row, 'column':column, 'value':value, 'data_type':data_type, 'style_id':style_id}
233 def parse_formula(self, element):
234 """
235 possible formulae types: shared, array, datatable
236 """
237 formula = element.find(FORMULA_TAG)
238 formula_type = formula.get('t')
239 coordinate = element.get('r')
240 value = "="
241 if formula.text is not None:
242 value += formula.text
244 if formula_type == "array":
245 self.array_formulae[coordinate] = dict(formula.attrib)
247 elif formula_type == "shared":
248 idx = formula.get('si')
249 if idx in self.shared_formulae:
250 trans = self.shared_formulae[idx]
251 value = trans.translate_formula(coordinate)
252 elif value != "=":
253 self.shared_formulae[idx] = Translator(value, coordinate)
255 return value
258 def parse_column_dimensions(self, col):
259 attrs = dict(col.attrib)
260 column = get_column_letter(int(attrs['min']))
261 attrs['index'] = column
262 self.column_dimensions[column] = attrs
265 def parse_row(self, row):
266 attrs = dict(row.attrib)
268 if "r" in attrs:
269 try:
270 self.row_counter = int(attrs['r'])
271 except ValueError:
272 val = float(attrs['r'])
273 if val.is_integer():
274 self.row_counter = int(val)
275 else:
276 raise ValueError(f"{attrs['r']} is not a valid row number")
277 else:
278 self.row_counter += 1
279 self.col_counter = 0
281 keys = {k for k in attrs if not k.startswith('{')}
282 if keys - {'r', 'spans'}:
283 # don't create dimension objects unless they have relevant information
284 self.row_dimensions[str(self.row_counter)] = attrs
286 cells = [self.parse_cell(el) for el in row]
287 return self.row_counter, cells
290 def parse_formatting(self, element):
291 try:
292 cf = ConditionalFormatting.from_tree(element)
293 self.formatting.append(cf)
294 except TypeError as e:
295 msg = f"Failed to load a conditional formatting rule. It will be discarded. Cause: {e}"
296 warn(msg)
299 def parse_sheet_protection(self, element):
300 protection = SheetProtection.from_tree(element)
301 password = element.get("password")
302 if password is not None:
303 protection.set_password(password, True)
304 self.protection = protection
307 def parse_extensions(self, element):
308 extLst = ExtensionList.from_tree(element)
309 for e in extLst.ext:
310 ext_type = EXT_TYPES.get(e.uri.upper(), "Unknown")
311 msg = "{0} extension is not supported and will be removed".format(ext_type)
312 warn(msg)
315 def parse_legacy(self, element):
316 obj = Related.from_tree(element)
317 self.legacy_drawing = obj.id
320 def parse_row_breaks(self, element):
321 brk = RowBreak.from_tree(element)
322 self.row_breaks = brk
325 def parse_col_breaks(self, element):
326 brk = ColBreak.from_tree(element)
327 self.col_breaks = brk
330 def parse_custom_views(self, element):
331 # clear page_breaks to avoid duplication which Excel doesn't like
332 # basically they're ignored in custom views
333 self.row_breaks = RowBreak()
334 self.col_breaks = ColBreak()
337class WorksheetReader(object):
338 """
339 Create a parser and apply it to a workbook
340 """
342 def __init__(self, ws, xml_source, shared_strings, data_only):
343 self.ws = ws
344 self.parser = WorkSheetParser(xml_source, shared_strings,
345 data_only, ws.parent.epoch, ws.parent._date_formats,
346 ws.parent._timedelta_formats)
347 self.tables = []
350 def bind_cells(self):
351 for idx, row in self.parser.parse():
352 for cell in row:
353 style = self.ws.parent._cell_styles[cell['style_id']]
354 c = Cell(self.ws, row=cell['row'], column=cell['column'], style_array=style)
355 c._value = cell['value']
356 c.data_type = cell['data_type']
357 self.ws._cells[(cell['row'], cell['column'])] = c
358 self.ws.formula_attributes = self.parser.array_formulae
359 if self.ws._cells:
360 self.ws._current_row = self.ws.max_row # use cells not row dimensions
363 def bind_formatting(self):
364 for cf in self.parser.formatting:
365 for rule in cf.rules:
366 if rule.dxfId is not None:
367 rule.dxf = self.ws.parent._differential_styles[rule.dxfId]
368 self.ws.conditional_formatting[cf] = rule
371 def bind_tables(self):
372 for t in self.parser.tables.tablePart:
373 rel = self.ws._rels[t.id]
374 self.tables.append(rel.Target)
377 def bind_merged_cells(self):
378 from openpyxl.worksheet.cell_range import MultiCellRange
379 from openpyxl.worksheet.merge import MergedCellRange
380 if not self.parser.merged_cells:
381 return
383 ranges = []
384 for cr in self.parser.merged_cells.mergeCell:
385 mcr = MergedCellRange(self.ws, cr.ref)
386 self.ws._clean_merge_range(mcr)
387 ranges.append(mcr)
388 self.ws.merged_cells = MultiCellRange(ranges)
391 def bind_hyperlinks(self):
392 for link in self.parser.hyperlinks.hyperlink:
393 if link.id:
394 rel = self.ws._rels[link.id]
395 link.target = rel.Target
396 if ":" in link.ref:
397 # range of cells
398 for row in self.ws[link.ref]:
399 for cell in row:
400 try:
401 cell.hyperlink = copy(link)
402 except AttributeError:
403 pass
404 else:
405 cell = self.ws[link.ref]
406 if isinstance(cell, MergedCell):
407 cell = self.normalize_merged_cell_link(cell.coordinate)
408 cell.hyperlink = link
410 def normalize_merged_cell_link(self, coord):
411 """
412 Returns the appropriate cell to which a hyperlink, which references a merged cell at the specified coordinates,
413 should be bound.
414 """
415 for rng in self.ws.merged_cells:
416 if coord in rng:
417 return self.ws.cell(*rng.top[0])
419 def bind_col_dimensions(self):
420 for col, cd in self.parser.column_dimensions.items():
421 if 'style' in cd:
422 key = int(cd['style'])
423 cd['style'] = self.ws.parent._cell_styles[key]
424 self.ws.column_dimensions[col] = ColumnDimension(self.ws, **cd)
427 def bind_row_dimensions(self):
428 for row, rd in self.parser.row_dimensions.items():
429 if 's' in rd:
430 key = int(rd['s'])
431 rd['s'] = self.ws.parent._cell_styles[key]
432 self.ws.row_dimensions[int(row)] = RowDimension(self.ws, **rd)
435 def bind_properties(self):
436 for k in ('print_options', 'page_margins', 'page_setup',
437 'HeaderFooter', 'auto_filter', 'data_validations',
438 'sheet_properties', 'views', 'sheet_format',
439 'row_breaks', 'col_breaks', 'scenarios', 'legacy_drawing',
440 'protection',
441 ):
442 v = getattr(self.parser, k, None)
443 if v is not None:
444 setattr(self.ws, k, v)
447 def bind_all(self):
448 self.bind_cells()
449 self.bind_merged_cells()
450 self.bind_hyperlinks()
451 self.bind_formatting()
452 self.bind_col_dimensions()
453 self.bind_row_dimensions()
454 self.bind_tables()
455 self.bind_properties()