Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/openpyxl/reader/excel.py: 18%

4"""Read an xlsx file into Python"""

6# Python stdlib imports

7from zipfile import ZipFile, ZIP_DEFLATED, BadZipfile

8from sys import exc_info

9from io import BytesIO

10import os.path

11import warnings

13from openpyxl.pivot.table import TableDefinition

15# Allow blanket setting of KEEP_VBA for testing

16try:

17 from ..tests import KEEP_VBA

18except ImportError:

19 KEEP_VBA = False

22# package imports

23from openpyxl.utils.exceptions import InvalidFileException

24from openpyxl.xml.constants import (

25 ARC_SHARED_STRINGS,

26 ARC_CORE,

27 ARC_CONTENT_TYPES,

28 ARC_WORKBOOK,

29 ARC_THEME,

30 COMMENTS_NS,

31 SHARED_STRINGS,

32 EXTERNAL_LINK,

33 XLTM,

34 XLTX,

35 XLSM,

36 XLSX,

37)

38from openpyxl.cell import MergedCell

39from openpyxl.comments.comment_sheet import CommentSheet

41from .strings import read_string_table

42from .workbook import WorkbookParser

43from openpyxl.styles.stylesheet import apply_stylesheet

45from openpyxl.packaging.core import DocumentProperties

46from openpyxl.packaging.manifest import Manifest, Override

48from openpyxl.packaging.relationship import (

49 RelationshipList,

50 get_dependents,

51 get_rels_path,

52)

54from openpyxl.worksheet._read_only import ReadOnlyWorksheet

55from openpyxl.worksheet._reader import WorksheetReader

56from openpyxl.chartsheet import Chartsheet

57from openpyxl.worksheet.table import Table

58from openpyxl.drawing.spreadsheet_drawing import SpreadsheetDrawing

60from openpyxl.xml.functions import fromstring

62from .drawings import find_images

65SUPPORTED_FORMATS = ('.xlsx', '.xlsm', '.xltx', '.xltm')

67def _validate_archive(filename):

68 """

69 Does a first check whether filename is a string or a file-like

70 object. If it is a string representing a filename, a check is done

71 for supported formats by checking the given file-extension. If the

72 file-extension is not in SUPPORTED_FORMATS an InvalidFileException

73 will raised. Otherwise the filename (resp. file-like object) will

74 forwarded to zipfile.ZipFile returning a ZipFile-Instance.

75 """

76 is_file_like = hasattr(filename, 'read')

77 if not is_file_like:

78 file_format = os.path.splitext(filename)[-1].lower()

79 if file_format not in SUPPORTED_FORMATS:

80 if file_format == '.xls':

81 msg = ('openpyxl does not support the old .xls file format, '

82 'please use xlrd to read this file, or convert it to '

83 'the more recent .xlsx file format.')

84 elif file_format == '.xlsb':

85 msg = ('openpyxl does not support binary format .xlsb, '

86 'please convert this file to .xlsx format if you want '

87 'to open it with openpyxl')

88 else:

89 msg = ('openpyxl does not support %s file format, '

90 'please check you can open '

91 'it with Excel first. '

92 'Supported formats are: %s') % (file_format,

93 ','.join(SUPPORTED_FORMATS))

94 raise InvalidFileException(msg)

96 archive = ZipFile(filename, 'r')

97 return archive

100def _find_workbook_part(package):

101 workbook_types = [XLTM, XLTX, XLSM, XLSX]

102 for ct in workbook_types:

103 part = package.find(ct)

104 if part:

105 return part

106

107 # some applications reassign the default for application/xml

108 defaults = {p.ContentType for p in package.Default}

109 workbook_type = defaults & set(workbook_types)

110 if workbook_type:

111 return Override("/" + ARC_WORKBOOK, workbook_type.pop())

112

113 raise IOError("File contains no valid workbook part")

114

115

116class ExcelReader:

117

118 """

119 Read an Excel package and dispatch the contents to the relevant modules

120 """

121

122 def __init__(self, fn, read_only=False, keep_vba=KEEP_VBA,

123 data_only=False, keep_links=True):

124 self.archive = _validate_archive(fn)

125 self.valid_files = self.archive.namelist()

126 self.read_only = read_only

127 self.keep_vba = keep_vba

128 self.data_only = data_only

129 self.keep_links = keep_links

130 self.shared_strings = []

131

132

133 def read_manifest(self):

134 src = self.archive.read(ARC_CONTENT_TYPES)

135 root = fromstring(src)

136 self.package = Manifest.from_tree(root)

137

138

139 def read_strings(self):

140 ct = self.package.find(SHARED_STRINGS)

141 if ct is not None:

142 strings_path = ct.PartName[1:]

143 with self.archive.open(strings_path,) as src:

144 self.shared_strings = read_string_table(src)

145

146

147 def read_workbook(self):

148 wb_part = _find_workbook_part(self.package)

149 self.parser = WorkbookParser(self.archive, wb_part.PartName[1:], keep_links=self.keep_links)

150 self.parser.parse()

151 wb = self.parser.wb

152 wb._sheets = []

153 wb._data_only = self.data_only

154 wb._read_only = self.read_only

155 wb.template = wb_part.ContentType in (XLTX, XLTM)

156

157 # If are going to preserve the vba then attach a copy of the archive to the

158 # workbook so that is available for the save.

159 if self.keep_vba:

160 wb.vba_archive = ZipFile(BytesIO(), 'a', ZIP_DEFLATED)

161 for name in self.valid_files:

162 wb.vba_archive.writestr(name, self.archive.read(name))

163

164 if self.read_only:

165 wb._archive = self.archive

166

167 self.wb = wb

168

169

170 def read_properties(self):

171 if ARC_CORE in self.valid_files:

172 src = fromstring(self.archive.read(ARC_CORE))

173 self.wb.properties = DocumentProperties.from_tree(src)

174

175

176 def read_theme(self):

177 if ARC_THEME in self.valid_files:

178 self.wb.loaded_theme = self.archive.read(ARC_THEME)

179

180

181 def read_chartsheet(self, sheet, rel):

182 sheet_path = rel.target

183 rels_path = get_rels_path(sheet_path)

184 rels = []

185 if rels_path in self.valid_files:

186 rels = get_dependents(self.archive, rels_path)

187

188 with self.archive.open(sheet_path, "r") as src:

189 xml = src.read()

190 node = fromstring(xml)

191 cs = Chartsheet.from_tree(node)

192 cs._parent = self.wb

193 cs.title = sheet.name

194 self.wb._add_sheet(cs)

195

196 drawings = rels.find(SpreadsheetDrawing._rel_type)

197 for rel in drawings:

198 charts, images = find_images(self.archive, rel.target)

199 for c in charts:

200 cs.add_chart(c)

201

202

203 def read_worksheets(self):

204 comment_warning = """Cell '{0}':{1} is part of a merged range but has a comment which will be removed because merged cells cannot contain any data."""

205 for sheet, rel in self.parser.find_sheets():

206 if rel.target not in self.valid_files:

207 continue

208

209 if "chartsheet" in rel.Type:

210 self.read_chartsheet(sheet, rel)

211 continue

212

213 rels_path = get_rels_path(rel.target)

214 rels = RelationshipList()

215 if rels_path in self.valid_files:

216 rels = get_dependents(self.archive, rels_path)

217

218 if self.read_only:

219 ws = ReadOnlyWorksheet(self.wb, sheet.name, rel.target, self.shared_strings)

220 ws.sheet_state = sheet.state

221 self.wb._sheets.append(ws)

222 continue

223 else:

224 fh = self.archive.open(rel.target)

225 ws = self.wb.create_sheet(sheet.name)

226 ws._rels = rels

227 ws_parser = WorksheetReader(ws, fh, self.shared_strings, self.data_only)

228 ws_parser.bind_all()

229

230 # assign any comments to cells

231 for r in rels.find(COMMENTS_NS):

232 src = self.archive.read(r.target)

233 comment_sheet = CommentSheet.from_tree(fromstring(src))

234 for ref, comment in comment_sheet.comments:

235 try:

236 ws[ref].comment = comment

237 except AttributeError:

238 c = ws[ref]

239 if isinstance(c, MergedCell):

240 warnings.warn(comment_warning.format(ws.title, c.coordinate))

241 continue

242

243 # preserve link to VML file if VBA

244 if self.wb.vba_archive and ws.legacy_drawing:

245 ws.legacy_drawing = rels[ws.legacy_drawing].target

246 else:

247 ws.legacy_drawing = None

248

249 for t in ws_parser.tables:

250 src = self.archive.read(t)

251 xml = fromstring(src)

252 table = Table.from_tree(xml)

253 ws.add_table(table)

254

255 drawings = rels.find(SpreadsheetDrawing._rel_type)

256 for rel in drawings:

257 charts, images = find_images(self.archive, rel.target)

258 for c in charts:

259 ws.add_chart(c, c.anchor)

260 for im in images:

261 ws.add_image(im, im.anchor)

262

263 pivot_rel = rels.find(TableDefinition.rel_type)

264 for r in pivot_rel:

265 pivot_path = r.Target

266 src = self.archive.read(pivot_path)

267 tree = fromstring(src)

268 pivot = TableDefinition.from_tree(tree)

269 pivot.cache = self.parser.pivot_caches[pivot.cacheId]

270 ws.add_pivot(pivot)

271

272 ws.sheet_state = sheet.state

273

274

275 def read(self):

276 self.read_manifest()

277 self.read_strings()

278 self.read_workbook()

279 self.read_properties()

280 self.read_theme()

281 apply_stylesheet(self.archive, self.wb)

282 self.read_worksheets()

283 self.parser.assign_names()

284 if not self.read_only:

285 self.archive.close()

286

287

288def load_workbook(filename, read_only=False, keep_vba=KEEP_VBA,

289 data_only=False, keep_links=True):

290 """Open the given filename and return the workbook

291

292 :param filename: the path to open or a file-like object

293 :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`

294

295 :param read_only: optimised for reading, content cannot be edited

296 :type read_only: bool

297

298 :param keep_vba: preseve vba content (this does NOT mean you can use it)

299 :type keep_vba: bool

300

301 :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet

302 :type data_only: bool

303

304 :param keep_links: whether links to external workbooks should be preserved. The default is True

305 :type keep_links: bool

306

307 :rtype: :class:`openpyxl.workbook.Workbook`

308

309 .. note::

310

311 When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet`

312 and the returned workbook will be read-only.

313

314 """

315 reader = ExcelReader(filename, read_only, keep_vba,

316 data_only, keep_links)

317 reader.read()

318 return reader.wb