Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/io/excel/_openpyxl.py: 16%

246 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3import mmap 

4from typing import ( 

5 TYPE_CHECKING, 

6 Any, 

7 Tuple, 

8 cast, 

9) 

10 

11import numpy as np 

12 

13from pandas._typing import ( 

14 FilePath, 

15 ReadBuffer, 

16 Scalar, 

17 StorageOptions, 

18 WriteExcelBuffer, 

19) 

20from pandas.compat._optional import import_optional_dependency 

21from pandas.util._decorators import doc 

22 

23from pandas.core.shared_docs import _shared_docs 

24 

25from pandas.io.excel._base import ( 

26 BaseExcelReader, 

27 ExcelWriter, 

28) 

29from pandas.io.excel._util import ( 

30 combine_kwargs, 

31 validate_freeze_panes, 

32) 

33 

34if TYPE_CHECKING: 34 ↛ 35line 34 didn't jump to line 35, because the condition on line 34 was never true

35 from openpyxl.descriptors.serialisable import Serialisable 

36 from openpyxl.workbook import Workbook 

37 

38 

39class OpenpyxlWriter(ExcelWriter): 

40 _engine = "openpyxl" 

41 _supported_extensions = (".xlsx", ".xlsm") 

42 

43 def __init__( 

44 self, 

45 path: FilePath | WriteExcelBuffer | ExcelWriter, 

46 engine: str | None = None, 

47 date_format: str | None = None, 

48 datetime_format: str | None = None, 

49 mode: str = "w", 

50 storage_options: StorageOptions = None, 

51 if_sheet_exists: str | None = None, 

52 engine_kwargs: dict[str, Any] | None = None, 

53 **kwargs, 

54 ) -> None: 

55 # Use the openpyxl module as the Excel writer. 

56 from openpyxl.workbook import Workbook 

57 

58 engine_kwargs = combine_kwargs(engine_kwargs, kwargs) 

59 

60 super().__init__( 

61 path, 

62 mode=mode, 

63 storage_options=storage_options, 

64 if_sheet_exists=if_sheet_exists, 

65 engine_kwargs=engine_kwargs, 

66 ) 

67 

68 # ExcelWriter replaced "a" by "r+" to allow us to first read the excel file from 

69 # the file and later write to it 

70 if "r+" in self._mode: # Load from existing workbook 

71 from openpyxl import load_workbook 

72 

73 self._book = load_workbook(self._handles.handle, **engine_kwargs) 

74 self._handles.handle.seek(0) 

75 else: 

76 # Create workbook object with default optimized_write=True. 

77 self._book = Workbook(**engine_kwargs) 

78 

79 if self.book.worksheets: 

80 self.book.remove(self.book.worksheets[0]) 

81 

82 @property 

83 def book(self) -> Workbook: 

84 """ 

85 Book instance of class openpyxl.workbook.Workbook. 

86 

87 This attribute can be used to access engine-specific features. 

88 """ 

89 return self._book 

90 

91 @book.setter 

92 def book(self, other: Workbook) -> None: 

93 """ 

94 Set book instance. Class type will depend on the engine used. 

95 """ 

96 self._deprecate_set_book() 

97 self._book = other 

98 

99 @property 

100 def sheets(self) -> dict[str, Any]: 

101 """Mapping of sheet names to sheet objects.""" 

102 result = {name: self.book[name] for name in self.book.sheetnames} 

103 return result 

104 

105 def _save(self) -> None: 

106 """ 

107 Save workbook to disk. 

108 """ 

109 self.book.save(self._handles.handle) 

110 if "r+" in self._mode and not isinstance(self._handles.handle, mmap.mmap): 

111 # truncate file to the written content 

112 self._handles.handle.truncate() 

113 

114 @classmethod 

115 def _convert_to_style_kwargs(cls, style_dict: dict) -> dict[str, Serialisable]: 

116 """ 

117 Convert a style_dict to a set of kwargs suitable for initializing 

118 or updating-on-copy an openpyxl v2 style object. 

119 

120 Parameters 

121 ---------- 

122 style_dict : dict 

123 A dict with zero or more of the following keys (or their synonyms). 

124 'font' 

125 'fill' 

126 'border' ('borders') 

127 'alignment' 

128 'number_format' 

129 'protection' 

130 

131 Returns 

132 ------- 

133 style_kwargs : dict 

134 A dict with the same, normalized keys as ``style_dict`` but each 

135 value has been replaced with a native openpyxl style object of the 

136 appropriate class. 

137 """ 

138 _style_key_map = {"borders": "border"} 

139 

140 style_kwargs: dict[str, Serialisable] = {} 

141 for k, v in style_dict.items(): 

142 if k in _style_key_map: 

143 k = _style_key_map[k] 

144 _conv_to_x = getattr(cls, f"_convert_to_{k}", lambda x: None) 

145 new_v = _conv_to_x(v) 

146 if new_v: 

147 style_kwargs[k] = new_v 

148 

149 return style_kwargs 

150 

151 @classmethod 

152 def _convert_to_color(cls, color_spec): 

153 """ 

154 Convert ``color_spec`` to an openpyxl v2 Color object. 

155 

156 Parameters 

157 ---------- 

158 color_spec : str, dict 

159 A 32-bit ARGB hex string, or a dict with zero or more of the 

160 following keys. 

161 'rgb' 

162 'indexed' 

163 'auto' 

164 'theme' 

165 'tint' 

166 'index' 

167 'type' 

168 

169 Returns 

170 ------- 

171 color : openpyxl.styles.Color 

172 """ 

173 from openpyxl.styles import Color 

174 

175 if isinstance(color_spec, str): 

176 return Color(color_spec) 

177 else: 

178 return Color(**color_spec) 

179 

180 @classmethod 

181 def _convert_to_font(cls, font_dict): 

182 """ 

183 Convert ``font_dict`` to an openpyxl v2 Font object. 

184 

185 Parameters 

186 ---------- 

187 font_dict : dict 

188 A dict with zero or more of the following keys (or their synonyms). 

189 'name' 

190 'size' ('sz') 

191 'bold' ('b') 

192 'italic' ('i') 

193 'underline' ('u') 

194 'strikethrough' ('strike') 

195 'color' 

196 'vertAlign' ('vertalign') 

197 'charset' 

198 'scheme' 

199 'family' 

200 'outline' 

201 'shadow' 

202 'condense' 

203 

204 Returns 

205 ------- 

206 font : openpyxl.styles.Font 

207 """ 

208 from openpyxl.styles import Font 

209 

210 _font_key_map = { 

211 "sz": "size", 

212 "b": "bold", 

213 "i": "italic", 

214 "u": "underline", 

215 "strike": "strikethrough", 

216 "vertalign": "vertAlign", 

217 } 

218 

219 font_kwargs = {} 

220 for k, v in font_dict.items(): 

221 if k in _font_key_map: 

222 k = _font_key_map[k] 

223 if k == "color": 

224 v = cls._convert_to_color(v) 

225 font_kwargs[k] = v 

226 

227 return Font(**font_kwargs) 

228 

229 @classmethod 

230 def _convert_to_stop(cls, stop_seq): 

231 """ 

232 Convert ``stop_seq`` to a list of openpyxl v2 Color objects, 

233 suitable for initializing the ``GradientFill`` ``stop`` parameter. 

234 

235 Parameters 

236 ---------- 

237 stop_seq : iterable 

238 An iterable that yields objects suitable for consumption by 

239 ``_convert_to_color``. 

240 

241 Returns 

242 ------- 

243 stop : list of openpyxl.styles.Color 

244 """ 

245 return map(cls._convert_to_color, stop_seq) 

246 

247 @classmethod 

248 def _convert_to_fill(cls, fill_dict: dict[str, Any]): 

249 """ 

250 Convert ``fill_dict`` to an openpyxl v2 Fill object. 

251 

252 Parameters 

253 ---------- 

254 fill_dict : dict 

255 A dict with one or more of the following keys (or their synonyms), 

256 'fill_type' ('patternType', 'patterntype') 

257 'start_color' ('fgColor', 'fgcolor') 

258 'end_color' ('bgColor', 'bgcolor') 

259 or one or more of the following keys (or their synonyms). 

260 'type' ('fill_type') 

261 'degree' 

262 'left' 

263 'right' 

264 'top' 

265 'bottom' 

266 'stop' 

267 

268 Returns 

269 ------- 

270 fill : openpyxl.styles.Fill 

271 """ 

272 from openpyxl.styles import ( 

273 GradientFill, 

274 PatternFill, 

275 ) 

276 

277 _pattern_fill_key_map = { 

278 "patternType": "fill_type", 

279 "patterntype": "fill_type", 

280 "fgColor": "start_color", 

281 "fgcolor": "start_color", 

282 "bgColor": "end_color", 

283 "bgcolor": "end_color", 

284 } 

285 

286 _gradient_fill_key_map = {"fill_type": "type"} 

287 

288 pfill_kwargs = {} 

289 gfill_kwargs = {} 

290 for k, v in fill_dict.items(): 

291 pk = gk = None 

292 if k in _pattern_fill_key_map: 

293 pk = _pattern_fill_key_map[k] 

294 if k in _gradient_fill_key_map: 

295 gk = _gradient_fill_key_map[k] 

296 if pk in ["start_color", "end_color"]: 

297 v = cls._convert_to_color(v) 

298 if gk == "stop": 

299 v = cls._convert_to_stop(v) 

300 if pk: 

301 pfill_kwargs[pk] = v 

302 elif gk: 

303 gfill_kwargs[gk] = v 

304 else: 

305 pfill_kwargs[k] = v 

306 gfill_kwargs[k] = v 

307 

308 try: 

309 return PatternFill(**pfill_kwargs) 

310 except TypeError: 

311 return GradientFill(**gfill_kwargs) 

312 

313 @classmethod 

314 def _convert_to_side(cls, side_spec): 

315 """ 

316 Convert ``side_spec`` to an openpyxl v2 Side object. 

317 

318 Parameters 

319 ---------- 

320 side_spec : str, dict 

321 A string specifying the border style, or a dict with zero or more 

322 of the following keys (or their synonyms). 

323 'style' ('border_style') 

324 'color' 

325 

326 Returns 

327 ------- 

328 side : openpyxl.styles.Side 

329 """ 

330 from openpyxl.styles import Side 

331 

332 _side_key_map = {"border_style": "style"} 

333 

334 if isinstance(side_spec, str): 

335 return Side(style=side_spec) 

336 

337 side_kwargs = {} 

338 for k, v in side_spec.items(): 

339 if k in _side_key_map: 

340 k = _side_key_map[k] 

341 if k == "color": 

342 v = cls._convert_to_color(v) 

343 side_kwargs[k] = v 

344 

345 return Side(**side_kwargs) 

346 

347 @classmethod 

348 def _convert_to_border(cls, border_dict): 

349 """ 

350 Convert ``border_dict`` to an openpyxl v2 Border object. 

351 

352 Parameters 

353 ---------- 

354 border_dict : dict 

355 A dict with zero or more of the following keys (or their synonyms). 

356 'left' 

357 'right' 

358 'top' 

359 'bottom' 

360 'diagonal' 

361 'diagonal_direction' 

362 'vertical' 

363 'horizontal' 

364 'diagonalUp' ('diagonalup') 

365 'diagonalDown' ('diagonaldown') 

366 'outline' 

367 

368 Returns 

369 ------- 

370 border : openpyxl.styles.Border 

371 """ 

372 from openpyxl.styles import Border 

373 

374 _border_key_map = {"diagonalup": "diagonalUp", "diagonaldown": "diagonalDown"} 

375 

376 border_kwargs = {} 

377 for k, v in border_dict.items(): 

378 if k in _border_key_map: 

379 k = _border_key_map[k] 

380 if k == "color": 

381 v = cls._convert_to_color(v) 

382 if k in ["left", "right", "top", "bottom", "diagonal"]: 

383 v = cls._convert_to_side(v) 

384 border_kwargs[k] = v 

385 

386 return Border(**border_kwargs) 

387 

388 @classmethod 

389 def _convert_to_alignment(cls, alignment_dict): 

390 """ 

391 Convert ``alignment_dict`` to an openpyxl v2 Alignment object. 

392 

393 Parameters 

394 ---------- 

395 alignment_dict : dict 

396 A dict with zero or more of the following keys (or their synonyms). 

397 'horizontal' 

398 'vertical' 

399 'text_rotation' 

400 'wrap_text' 

401 'shrink_to_fit' 

402 'indent' 

403 Returns 

404 ------- 

405 alignment : openpyxl.styles.Alignment 

406 """ 

407 from openpyxl.styles import Alignment 

408 

409 return Alignment(**alignment_dict) 

410 

411 @classmethod 

412 def _convert_to_number_format(cls, number_format_dict): 

413 """ 

414 Convert ``number_format_dict`` to an openpyxl v2.1.0 number format 

415 initializer. 

416 

417 Parameters 

418 ---------- 

419 number_format_dict : dict 

420 A dict with zero or more of the following keys. 

421 'format_code' : str 

422 

423 Returns 

424 ------- 

425 number_format : str 

426 """ 

427 return number_format_dict["format_code"] 

428 

429 @classmethod 

430 def _convert_to_protection(cls, protection_dict): 

431 """ 

432 Convert ``protection_dict`` to an openpyxl v2 Protection object. 

433 

434 Parameters 

435 ---------- 

436 protection_dict : dict 

437 A dict with zero or more of the following keys. 

438 'locked' 

439 'hidden' 

440 

441 Returns 

442 ------- 

443 """ 

444 from openpyxl.styles import Protection 

445 

446 return Protection(**protection_dict) 

447 

448 def _write_cells( 

449 self, 

450 cells, 

451 sheet_name: str | None = None, 

452 startrow: int = 0, 

453 startcol: int = 0, 

454 freeze_panes: tuple[int, int] | None = None, 

455 ) -> None: 

456 # Write the frame cells using openpyxl. 

457 sheet_name = self._get_sheet_name(sheet_name) 

458 

459 _style_cache: dict[str, dict[str, Serialisable]] = {} 

460 

461 if sheet_name in self.sheets and self._if_sheet_exists != "new": 

462 if "r+" in self._mode: 

463 if self._if_sheet_exists == "replace": 

464 old_wks = self.sheets[sheet_name] 

465 target_index = self.book.index(old_wks) 

466 del self.book[sheet_name] 

467 wks = self.book.create_sheet(sheet_name, target_index) 

468 elif self._if_sheet_exists == "error": 

469 raise ValueError( 

470 f"Sheet '{sheet_name}' already exists and " 

471 f"if_sheet_exists is set to 'error'." 

472 ) 

473 elif self._if_sheet_exists == "overlay": 

474 wks = self.sheets[sheet_name] 

475 else: 

476 raise ValueError( 

477 f"'{self._if_sheet_exists}' is not valid for if_sheet_exists. " 

478 "Valid options are 'error', 'new', 'replace' and 'overlay'." 

479 ) 

480 else: 

481 wks = self.sheets[sheet_name] 

482 else: 

483 wks = self.book.create_sheet() 

484 wks.title = sheet_name 

485 

486 if validate_freeze_panes(freeze_panes): 

487 freeze_panes = cast(Tuple[int, int], freeze_panes) 

488 wks.freeze_panes = wks.cell( 

489 row=freeze_panes[0] + 1, column=freeze_panes[1] + 1 

490 ) 

491 

492 for cell in cells: 

493 xcell = wks.cell( 

494 row=startrow + cell.row + 1, column=startcol + cell.col + 1 

495 ) 

496 xcell.value, fmt = self._value_with_fmt(cell.val) 

497 if fmt: 

498 xcell.number_format = fmt 

499 

500 style_kwargs: dict[str, Serialisable] | None = {} 

501 if cell.style: 

502 key = str(cell.style) 

503 style_kwargs = _style_cache.get(key) 

504 if style_kwargs is None: 

505 style_kwargs = self._convert_to_style_kwargs(cell.style) 

506 _style_cache[key] = style_kwargs 

507 

508 if style_kwargs: 

509 for k, v in style_kwargs.items(): 

510 setattr(xcell, k, v) 

511 

512 if cell.mergestart is not None and cell.mergeend is not None: 

513 

514 wks.merge_cells( 

515 start_row=startrow + cell.row + 1, 

516 start_column=startcol + cell.col + 1, 

517 end_column=startcol + cell.mergeend + 1, 

518 end_row=startrow + cell.mergestart + 1, 

519 ) 

520 

521 # When cells are merged only the top-left cell is preserved 

522 # The behaviour of the other cells in a merged range is 

523 # undefined 

524 if style_kwargs: 

525 first_row = startrow + cell.row + 1 

526 last_row = startrow + cell.mergestart + 1 

527 first_col = startcol + cell.col + 1 

528 last_col = startcol + cell.mergeend + 1 

529 

530 for row in range(first_row, last_row + 1): 

531 for col in range(first_col, last_col + 1): 

532 if row == first_row and col == first_col: 

533 # Ignore first cell. It is already handled. 

534 continue 

535 xcell = wks.cell(column=col, row=row) 

536 for k, v in style_kwargs.items(): 

537 setattr(xcell, k, v) 

538 

539 

540class OpenpyxlReader(BaseExcelReader): 

541 @doc(storage_options=_shared_docs["storage_options"]) 

542 def __init__( 

543 self, 

544 filepath_or_buffer: FilePath | ReadBuffer[bytes], 

545 storage_options: StorageOptions = None, 

546 ) -> None: 

547 """ 

548 Reader using openpyxl engine. 

549 

550 Parameters 

551 ---------- 

552 filepath_or_buffer : str, path object or Workbook 

553 Object to be parsed. 

554 {storage_options} 

555 """ 

556 import_optional_dependency("openpyxl") 

557 super().__init__(filepath_or_buffer, storage_options=storage_options) 

558 

559 @property 

560 def _workbook_class(self): 

561 from openpyxl import Workbook 

562 

563 return Workbook 

564 

565 def load_workbook(self, filepath_or_buffer: FilePath | ReadBuffer[bytes]): 

566 from openpyxl import load_workbook 

567 

568 return load_workbook( 

569 filepath_or_buffer, read_only=True, data_only=True, keep_links=False 

570 ) 

571 

572 @property 

573 def sheet_names(self) -> list[str]: 

574 return [sheet.title for sheet in self.book.worksheets] 

575 

576 def get_sheet_by_name(self, name: str): 

577 self.raise_if_bad_sheet_by_name(name) 

578 return self.book[name] 

579 

580 def get_sheet_by_index(self, index: int): 

581 self.raise_if_bad_sheet_by_index(index) 

582 return self.book.worksheets[index] 

583 

584 def _convert_cell(self, cell, convert_float: bool) -> Scalar: 

585 

586 from openpyxl.cell.cell import ( 

587 TYPE_ERROR, 

588 TYPE_NUMERIC, 

589 ) 

590 

591 if cell.value is None: 

592 return "" # compat with xlrd 

593 elif cell.data_type == TYPE_ERROR: 

594 return np.nan 

595 elif cell.data_type == TYPE_NUMERIC: 

596 # GH5394, GH46988 

597 if convert_float: 

598 val = int(cell.value) 

599 if val == cell.value: 

600 return val 

601 else: 

602 return float(cell.value) 

603 

604 return cell.value 

605 

606 def get_sheet_data( 

607 self, sheet, convert_float: bool, file_rows_needed: int | None = None 

608 ) -> list[list[Scalar]]: 

609 

610 if self.book.read_only: 

611 sheet.reset_dimensions() 

612 

613 data: list[list[Scalar]] = [] 

614 last_row_with_data = -1 

615 for row_number, row in enumerate(sheet.rows): 

616 converted_row = [self._convert_cell(cell, convert_float) for cell in row] 

617 while converted_row and converted_row[-1] == "": 

618 # trim trailing empty elements 

619 converted_row.pop() 

620 if converted_row: 

621 last_row_with_data = row_number 

622 data.append(converted_row) 

623 if file_rows_needed is not None and len(data) >= file_rows_needed: 

624 break 

625 

626 # Trim trailing empty rows 

627 data = data[: last_row_with_data + 1] 

628 

629 if len(data) > 0: 

630 # extend rows to max width 

631 max_width = max(len(data_row) for data_row in data) 

632 if min(len(data_row) for data_row in data) < max_width: 

633 empty_cell: list[Scalar] = [""] 

634 data = [ 

635 data_row + (max_width - len(data_row)) * empty_cell 

636 for data_row in data 

637 ] 

638 

639 return data