Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/openpyxl/reader/excel.py: 18%

185 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1# Copyright (c) 2010-2022 openpyxl 

2 

3 

4"""Read an xlsx file into Python""" 

5 

6# Python stdlib imports 

7from zipfile import ZipFile, ZIP_DEFLATED, BadZipfile 

8from sys import exc_info 

9from io import BytesIO 

10import os.path 

11import warnings 

12 

13from openpyxl.pivot.table import TableDefinition 

14 

15# Allow blanket setting of KEEP_VBA for testing 

16try: 

17 from ..tests import KEEP_VBA 

18except ImportError: 

19 KEEP_VBA = False 

20 

21 

22# package imports 

23from openpyxl.utils.exceptions import InvalidFileException 

24from openpyxl.xml.constants import ( 

25 ARC_SHARED_STRINGS, 

26 ARC_CORE, 

27 ARC_CONTENT_TYPES, 

28 ARC_WORKBOOK, 

29 ARC_THEME, 

30 COMMENTS_NS, 

31 SHARED_STRINGS, 

32 EXTERNAL_LINK, 

33 XLTM, 

34 XLTX, 

35 XLSM, 

36 XLSX, 

37) 

38from openpyxl.cell import MergedCell 

39from openpyxl.comments.comment_sheet import CommentSheet 

40 

41from .strings import read_string_table 

42from .workbook import WorkbookParser 

43from openpyxl.styles.stylesheet import apply_stylesheet 

44 

45from openpyxl.packaging.core import DocumentProperties 

46from openpyxl.packaging.manifest import Manifest, Override 

47 

48from openpyxl.packaging.relationship import ( 

49 RelationshipList, 

50 get_dependents, 

51 get_rels_path, 

52) 

53 

54from openpyxl.worksheet._read_only import ReadOnlyWorksheet 

55from openpyxl.worksheet._reader import WorksheetReader 

56from openpyxl.chartsheet import Chartsheet 

57from openpyxl.worksheet.table import Table 

58from openpyxl.drawing.spreadsheet_drawing import SpreadsheetDrawing 

59 

60from openpyxl.xml.functions import fromstring 

61 

62from .drawings import find_images 

63 

64 

65SUPPORTED_FORMATS = ('.xlsx', '.xlsm', '.xltx', '.xltm') 

66 

67def _validate_archive(filename): 

68 """ 

69 Does a first check whether filename is a string or a file-like 

70 object. If it is a string representing a filename, a check is done 

71 for supported formats by checking the given file-extension. If the 

72 file-extension is not in SUPPORTED_FORMATS an InvalidFileException 

73 will raised. Otherwise the filename (resp. file-like object) will 

74 forwarded to zipfile.ZipFile returning a ZipFile-Instance. 

75 """ 

76 is_file_like = hasattr(filename, 'read') 

77 if not is_file_like: 

78 file_format = os.path.splitext(filename)[-1].lower() 

79 if file_format not in SUPPORTED_FORMATS: 

80 if file_format == '.xls': 

81 msg = ('openpyxl does not support the old .xls file format, ' 

82 'please use xlrd to read this file, or convert it to ' 

83 'the more recent .xlsx file format.') 

84 elif file_format == '.xlsb': 

85 msg = ('openpyxl does not support binary format .xlsb, ' 

86 'please convert this file to .xlsx format if you want ' 

87 'to open it with openpyxl') 

88 else: 

89 msg = ('openpyxl does not support %s file format, ' 

90 'please check you can open ' 

91 'it with Excel first. ' 

92 'Supported formats are: %s') % (file_format, 

93 ','.join(SUPPORTED_FORMATS)) 

94 raise InvalidFileException(msg) 

95 

96 archive = ZipFile(filename, 'r') 

97 return archive 

98 

99 

100def _find_workbook_part(package): 

101 workbook_types = [XLTM, XLTX, XLSM, XLSX] 

102 for ct in workbook_types: 

103 part = package.find(ct) 

104 if part: 

105 return part 

106 

107 # some applications reassign the default for application/xml 

108 defaults = {p.ContentType for p in package.Default} 

109 workbook_type = defaults & set(workbook_types) 

110 if workbook_type: 

111 return Override("/" + ARC_WORKBOOK, workbook_type.pop()) 

112 

113 raise IOError("File contains no valid workbook part") 

114 

115 

116class ExcelReader: 

117 

118 """ 

119 Read an Excel package and dispatch the contents to the relevant modules 

120 """ 

121 

122 def __init__(self, fn, read_only=False, keep_vba=KEEP_VBA, 

123 data_only=False, keep_links=True): 

124 self.archive = _validate_archive(fn) 

125 self.valid_files = self.archive.namelist() 

126 self.read_only = read_only 

127 self.keep_vba = keep_vba 

128 self.data_only = data_only 

129 self.keep_links = keep_links 

130 self.shared_strings = [] 

131 

132 

133 def read_manifest(self): 

134 src = self.archive.read(ARC_CONTENT_TYPES) 

135 root = fromstring(src) 

136 self.package = Manifest.from_tree(root) 

137 

138 

139 def read_strings(self): 

140 ct = self.package.find(SHARED_STRINGS) 

141 if ct is not None: 

142 strings_path = ct.PartName[1:] 

143 with self.archive.open(strings_path,) as src: 

144 self.shared_strings = read_string_table(src) 

145 

146 

147 def read_workbook(self): 

148 wb_part = _find_workbook_part(self.package) 

149 self.parser = WorkbookParser(self.archive, wb_part.PartName[1:], keep_links=self.keep_links) 

150 self.parser.parse() 

151 wb = self.parser.wb 

152 wb._sheets = [] 

153 wb._data_only = self.data_only 

154 wb._read_only = self.read_only 

155 wb.template = wb_part.ContentType in (XLTX, XLTM) 

156 

157 # If are going to preserve the vba then attach a copy of the archive to the 

158 # workbook so that is available for the save. 

159 if self.keep_vba: 

160 wb.vba_archive = ZipFile(BytesIO(), 'a', ZIP_DEFLATED) 

161 for name in self.valid_files: 

162 wb.vba_archive.writestr(name, self.archive.read(name)) 

163 

164 if self.read_only: 

165 wb._archive = self.archive 

166 

167 self.wb = wb 

168 

169 

170 def read_properties(self): 

171 if ARC_CORE in self.valid_files: 

172 src = fromstring(self.archive.read(ARC_CORE)) 

173 self.wb.properties = DocumentProperties.from_tree(src) 

174 

175 

176 def read_theme(self): 

177 if ARC_THEME in self.valid_files: 

178 self.wb.loaded_theme = self.archive.read(ARC_THEME) 

179 

180 

181 def read_chartsheet(self, sheet, rel): 

182 sheet_path = rel.target 

183 rels_path = get_rels_path(sheet_path) 

184 rels = [] 

185 if rels_path in self.valid_files: 

186 rels = get_dependents(self.archive, rels_path) 

187 

188 with self.archive.open(sheet_path, "r") as src: 

189 xml = src.read() 

190 node = fromstring(xml) 

191 cs = Chartsheet.from_tree(node) 

192 cs._parent = self.wb 

193 cs.title = sheet.name 

194 self.wb._add_sheet(cs) 

195 

196 drawings = rels.find(SpreadsheetDrawing._rel_type) 

197 for rel in drawings: 

198 charts, images = find_images(self.archive, rel.target) 

199 for c in charts: 

200 cs.add_chart(c) 

201 

202 

203 def read_worksheets(self): 

204 comment_warning = """Cell '{0}':{1} is part of a merged range but has a comment which will be removed because merged cells cannot contain any data.""" 

205 for sheet, rel in self.parser.find_sheets(): 

206 if rel.target not in self.valid_files: 

207 continue 

208 

209 if "chartsheet" in rel.Type: 

210 self.read_chartsheet(sheet, rel) 

211 continue 

212 

213 rels_path = get_rels_path(rel.target) 

214 rels = RelationshipList() 

215 if rels_path in self.valid_files: 

216 rels = get_dependents(self.archive, rels_path) 

217 

218 if self.read_only: 

219 ws = ReadOnlyWorksheet(self.wb, sheet.name, rel.target, self.shared_strings) 

220 ws.sheet_state = sheet.state 

221 self.wb._sheets.append(ws) 

222 continue 

223 else: 

224 fh = self.archive.open(rel.target) 

225 ws = self.wb.create_sheet(sheet.name) 

226 ws._rels = rels 

227 ws_parser = WorksheetReader(ws, fh, self.shared_strings, self.data_only) 

228 ws_parser.bind_all() 

229 

230 # assign any comments to cells 

231 for r in rels.find(COMMENTS_NS): 

232 src = self.archive.read(r.target) 

233 comment_sheet = CommentSheet.from_tree(fromstring(src)) 

234 for ref, comment in comment_sheet.comments: 

235 try: 

236 ws[ref].comment = comment 

237 except AttributeError: 

238 c = ws[ref] 

239 if isinstance(c, MergedCell): 

240 warnings.warn(comment_warning.format(ws.title, c.coordinate)) 

241 continue 

242 

243 # preserve link to VML file if VBA 

244 if self.wb.vba_archive and ws.legacy_drawing: 

245 ws.legacy_drawing = rels[ws.legacy_drawing].target 

246 else: 

247 ws.legacy_drawing = None 

248 

249 for t in ws_parser.tables: 

250 src = self.archive.read(t) 

251 xml = fromstring(src) 

252 table = Table.from_tree(xml) 

253 ws.add_table(table) 

254 

255 drawings = rels.find(SpreadsheetDrawing._rel_type) 

256 for rel in drawings: 

257 charts, images = find_images(self.archive, rel.target) 

258 for c in charts: 

259 ws.add_chart(c, c.anchor) 

260 for im in images: 

261 ws.add_image(im, im.anchor) 

262 

263 pivot_rel = rels.find(TableDefinition.rel_type) 

264 for r in pivot_rel: 

265 pivot_path = r.Target 

266 src = self.archive.read(pivot_path) 

267 tree = fromstring(src) 

268 pivot = TableDefinition.from_tree(tree) 

269 pivot.cache = self.parser.pivot_caches[pivot.cacheId] 

270 ws.add_pivot(pivot) 

271 

272 ws.sheet_state = sheet.state 

273 

274 

275 def read(self): 

276 self.read_manifest() 

277 self.read_strings() 

278 self.read_workbook() 

279 self.read_properties() 

280 self.read_theme() 

281 apply_stylesheet(self.archive, self.wb) 

282 self.read_worksheets() 

283 self.parser.assign_names() 

284 if not self.read_only: 

285 self.archive.close() 

286 

287 

288def load_workbook(filename, read_only=False, keep_vba=KEEP_VBA, 

289 data_only=False, keep_links=True): 

290 """Open the given filename and return the workbook 

291 

292 :param filename: the path to open or a file-like object 

293 :type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile` 

294 

295 :param read_only: optimised for reading, content cannot be edited 

296 :type read_only: bool 

297 

298 :param keep_vba: preseve vba content (this does NOT mean you can use it) 

299 :type keep_vba: bool 

300 

301 :param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet 

302 :type data_only: bool 

303 

304 :param keep_links: whether links to external workbooks should be preserved. The default is True 

305 :type keep_links: bool 

306 

307 :rtype: :class:`openpyxl.workbook.Workbook` 

308 

309 .. note:: 

310 

311 When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet` 

312 and the returned workbook will be read-only. 

313 

314 """ 

315 reader = ExcelReader(filename, read_only, keep_vba, 

316 data_only, keep_links) 

317 reader.read() 

318 return reader.wb