Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/openpyxl/reader/excel.py: 18%
185 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1# Copyright (c) 2010-2022 openpyxl
4"""Read an xlsx file into Python"""
6# Python stdlib imports
7from zipfile import ZipFile, ZIP_DEFLATED, BadZipfile
8from sys import exc_info
9from io import BytesIO
10import os.path
11import warnings
13from openpyxl.pivot.table import TableDefinition
15# Allow blanket setting of KEEP_VBA for testing
16try:
17 from ..tests import KEEP_VBA
18except ImportError:
19 KEEP_VBA = False
22# package imports
23from openpyxl.utils.exceptions import InvalidFileException
24from openpyxl.xml.constants import (
25 ARC_SHARED_STRINGS,
26 ARC_CORE,
27 ARC_CONTENT_TYPES,
28 ARC_WORKBOOK,
29 ARC_THEME,
30 COMMENTS_NS,
31 SHARED_STRINGS,
32 EXTERNAL_LINK,
33 XLTM,
34 XLTX,
35 XLSM,
36 XLSX,
37)
38from openpyxl.cell import MergedCell
39from openpyxl.comments.comment_sheet import CommentSheet
41from .strings import read_string_table
42from .workbook import WorkbookParser
43from openpyxl.styles.stylesheet import apply_stylesheet
45from openpyxl.packaging.core import DocumentProperties
46from openpyxl.packaging.manifest import Manifest, Override
48from openpyxl.packaging.relationship import (
49 RelationshipList,
50 get_dependents,
51 get_rels_path,
52)
54from openpyxl.worksheet._read_only import ReadOnlyWorksheet
55from openpyxl.worksheet._reader import WorksheetReader
56from openpyxl.chartsheet import Chartsheet
57from openpyxl.worksheet.table import Table
58from openpyxl.drawing.spreadsheet_drawing import SpreadsheetDrawing
60from openpyxl.xml.functions import fromstring
62from .drawings import find_images
65SUPPORTED_FORMATS = ('.xlsx', '.xlsm', '.xltx', '.xltm')
67def _validate_archive(filename):
68 """
69 Does a first check whether filename is a string or a file-like
70 object. If it is a string representing a filename, a check is done
71 for supported formats by checking the given file-extension. If the
72 file-extension is not in SUPPORTED_FORMATS an InvalidFileException
73 will raised. Otherwise the filename (resp. file-like object) will
74 forwarded to zipfile.ZipFile returning a ZipFile-Instance.
75 """
76 is_file_like = hasattr(filename, 'read')
77 if not is_file_like:
78 file_format = os.path.splitext(filename)[-1].lower()
79 if file_format not in SUPPORTED_FORMATS:
80 if file_format == '.xls':
81 msg = ('openpyxl does not support the old .xls file format, '
82 'please use xlrd to read this file, or convert it to '
83 'the more recent .xlsx file format.')
84 elif file_format == '.xlsb':
85 msg = ('openpyxl does not support binary format .xlsb, '
86 'please convert this file to .xlsx format if you want '
87 'to open it with openpyxl')
88 else:
89 msg = ('openpyxl does not support %s file format, '
90 'please check you can open '
91 'it with Excel first. '
92 'Supported formats are: %s') % (file_format,
93 ','.join(SUPPORTED_FORMATS))
94 raise InvalidFileException(msg)
96 archive = ZipFile(filename, 'r')
97 return archive
100def _find_workbook_part(package):
101 workbook_types = [XLTM, XLTX, XLSM, XLSX]
102 for ct in workbook_types:
103 part = package.find(ct)
104 if part:
105 return part
107 # some applications reassign the default for application/xml
108 defaults = {p.ContentType for p in package.Default}
109 workbook_type = defaults & set(workbook_types)
110 if workbook_type:
111 return Override("/" + ARC_WORKBOOK, workbook_type.pop())
113 raise IOError("File contains no valid workbook part")
116class ExcelReader:
118 """
119 Read an Excel package and dispatch the contents to the relevant modules
120 """
122 def __init__(self, fn, read_only=False, keep_vba=KEEP_VBA,
123 data_only=False, keep_links=True):
124 self.archive = _validate_archive(fn)
125 self.valid_files = self.archive.namelist()
126 self.read_only = read_only
127 self.keep_vba = keep_vba
128 self.data_only = data_only
129 self.keep_links = keep_links
130 self.shared_strings = []
133 def read_manifest(self):
134 src = self.archive.read(ARC_CONTENT_TYPES)
135 root = fromstring(src)
136 self.package = Manifest.from_tree(root)
139 def read_strings(self):
140 ct = self.package.find(SHARED_STRINGS)
141 if ct is not None:
142 strings_path = ct.PartName[1:]
143 with self.archive.open(strings_path,) as src:
144 self.shared_strings = read_string_table(src)
147 def read_workbook(self):
148 wb_part = _find_workbook_part(self.package)
149 self.parser = WorkbookParser(self.archive, wb_part.PartName[1:], keep_links=self.keep_links)
150 self.parser.parse()
151 wb = self.parser.wb
152 wb._sheets = []
153 wb._data_only = self.data_only
154 wb._read_only = self.read_only
155 wb.template = wb_part.ContentType in (XLTX, XLTM)
157 # If are going to preserve the vba then attach a copy of the archive to the
158 # workbook so that is available for the save.
159 if self.keep_vba:
160 wb.vba_archive = ZipFile(BytesIO(), 'a', ZIP_DEFLATED)
161 for name in self.valid_files:
162 wb.vba_archive.writestr(name, self.archive.read(name))
164 if self.read_only:
165 wb._archive = self.archive
167 self.wb = wb
170 def read_properties(self):
171 if ARC_CORE in self.valid_files:
172 src = fromstring(self.archive.read(ARC_CORE))
173 self.wb.properties = DocumentProperties.from_tree(src)
176 def read_theme(self):
177 if ARC_THEME in self.valid_files:
178 self.wb.loaded_theme = self.archive.read(ARC_THEME)
181 def read_chartsheet(self, sheet, rel):
182 sheet_path = rel.target
183 rels_path = get_rels_path(sheet_path)
184 rels = []
185 if rels_path in self.valid_files:
186 rels = get_dependents(self.archive, rels_path)
188 with self.archive.open(sheet_path, "r") as src:
189 xml = src.read()
190 node = fromstring(xml)
191 cs = Chartsheet.from_tree(node)
192 cs._parent = self.wb
193 cs.title = sheet.name
194 self.wb._add_sheet(cs)
196 drawings = rels.find(SpreadsheetDrawing._rel_type)
197 for rel in drawings:
198 charts, images = find_images(self.archive, rel.target)
199 for c in charts:
200 cs.add_chart(c)
203 def read_worksheets(self):
204 comment_warning = """Cell '{0}':{1} is part of a merged range but has a comment which will be removed because merged cells cannot contain any data."""
205 for sheet, rel in self.parser.find_sheets():
206 if rel.target not in self.valid_files:
207 continue
209 if "chartsheet" in rel.Type:
210 self.read_chartsheet(sheet, rel)
211 continue
213 rels_path = get_rels_path(rel.target)
214 rels = RelationshipList()
215 if rels_path in self.valid_files:
216 rels = get_dependents(self.archive, rels_path)
218 if self.read_only:
219 ws = ReadOnlyWorksheet(self.wb, sheet.name, rel.target, self.shared_strings)
220 ws.sheet_state = sheet.state
221 self.wb._sheets.append(ws)
222 continue
223 else:
224 fh = self.archive.open(rel.target)
225 ws = self.wb.create_sheet(sheet.name)
226 ws._rels = rels
227 ws_parser = WorksheetReader(ws, fh, self.shared_strings, self.data_only)
228 ws_parser.bind_all()
230 # assign any comments to cells
231 for r in rels.find(COMMENTS_NS):
232 src = self.archive.read(r.target)
233 comment_sheet = CommentSheet.from_tree(fromstring(src))
234 for ref, comment in comment_sheet.comments:
235 try:
236 ws[ref].comment = comment
237 except AttributeError:
238 c = ws[ref]
239 if isinstance(c, MergedCell):
240 warnings.warn(comment_warning.format(ws.title, c.coordinate))
241 continue
243 # preserve link to VML file if VBA
244 if self.wb.vba_archive and ws.legacy_drawing:
245 ws.legacy_drawing = rels[ws.legacy_drawing].target
246 else:
247 ws.legacy_drawing = None
249 for t in ws_parser.tables:
250 src = self.archive.read(t)
251 xml = fromstring(src)
252 table = Table.from_tree(xml)
253 ws.add_table(table)
255 drawings = rels.find(SpreadsheetDrawing._rel_type)
256 for rel in drawings:
257 charts, images = find_images(self.archive, rel.target)
258 for c in charts:
259 ws.add_chart(c, c.anchor)
260 for im in images:
261 ws.add_image(im, im.anchor)
263 pivot_rel = rels.find(TableDefinition.rel_type)
264 for r in pivot_rel:
265 pivot_path = r.Target
266 src = self.archive.read(pivot_path)
267 tree = fromstring(src)
268 pivot = TableDefinition.from_tree(tree)
269 pivot.cache = self.parser.pivot_caches[pivot.cacheId]
270 ws.add_pivot(pivot)
272 ws.sheet_state = sheet.state
275 def read(self):
276 self.read_manifest()
277 self.read_strings()
278 self.read_workbook()
279 self.read_properties()
280 self.read_theme()
281 apply_stylesheet(self.archive, self.wb)
282 self.read_worksheets()
283 self.parser.assign_names()
284 if not self.read_only:
285 self.archive.close()
288def load_workbook(filename, read_only=False, keep_vba=KEEP_VBA,
289 data_only=False, keep_links=True):
290 """Open the given filename and return the workbook
292 :param filename: the path to open or a file-like object
293 :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`
295 :param read_only: optimised for reading, content cannot be edited
296 :type read_only: bool
298 :param keep_vba: preseve vba content (this does NOT mean you can use it)
299 :type keep_vba: bool
301 :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet
302 :type data_only: bool
304 :param keep_links: whether links to external workbooks should be preserved. The default is True
305 :type keep_links: bool
307 :rtype: :class:`openpyxl.workbook.Workbook`
309 .. note::
311 When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet`
312 and the returned workbook will be read-only.
314 """
315 reader = ExcelReader(filename, read_only, keep_vba,
316 data_only, keep_links)
317 reader.read()
318 return reader.wb