Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/io/excel/_openpyxl.py: 16%
246 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1from __future__ import annotations
3import mmap
4from typing import (
5 TYPE_CHECKING,
6 Any,
7 Tuple,
8 cast,
9)
11import numpy as np
13from pandas._typing import (
14 FilePath,
15 ReadBuffer,
16 Scalar,
17 StorageOptions,
18 WriteExcelBuffer,
19)
20from pandas.compat._optional import import_optional_dependency
21from pandas.util._decorators import doc
23from pandas.core.shared_docs import _shared_docs
25from pandas.io.excel._base import (
26 BaseExcelReader,
27 ExcelWriter,
28)
29from pandas.io.excel._util import (
30 combine_kwargs,
31 validate_freeze_panes,
32)
34if TYPE_CHECKING: 34 ↛ 35line 34 didn't jump to line 35, because the condition on line 34 was never true
35 from openpyxl.descriptors.serialisable import Serialisable
36 from openpyxl.workbook import Workbook
39class OpenpyxlWriter(ExcelWriter):
40 _engine = "openpyxl"
41 _supported_extensions = (".xlsx", ".xlsm")
43 def __init__(
44 self,
45 path: FilePath | WriteExcelBuffer | ExcelWriter,
46 engine: str | None = None,
47 date_format: str | None = None,
48 datetime_format: str | None = None,
49 mode: str = "w",
50 storage_options: StorageOptions = None,
51 if_sheet_exists: str | None = None,
52 engine_kwargs: dict[str, Any] | None = None,
53 **kwargs,
54 ) -> None:
55 # Use the openpyxl module as the Excel writer.
56 from openpyxl.workbook import Workbook
58 engine_kwargs = combine_kwargs(engine_kwargs, kwargs)
60 super().__init__(
61 path,
62 mode=mode,
63 storage_options=storage_options,
64 if_sheet_exists=if_sheet_exists,
65 engine_kwargs=engine_kwargs,
66 )
68 # ExcelWriter replaced "a" by "r+" to allow us to first read the excel file from
69 # the file and later write to it
70 if "r+" in self._mode: # Load from existing workbook
71 from openpyxl import load_workbook
73 self._book = load_workbook(self._handles.handle, **engine_kwargs)
74 self._handles.handle.seek(0)
75 else:
76 # Create workbook object with default optimized_write=True.
77 self._book = Workbook(**engine_kwargs)
79 if self.book.worksheets:
80 self.book.remove(self.book.worksheets[0])
82 @property
83 def book(self) -> Workbook:
84 """
85 Book instance of class openpyxl.workbook.Workbook.
87 This attribute can be used to access engine-specific features.
88 """
89 return self._book
91 @book.setter
92 def book(self, other: Workbook) -> None:
93 """
94 Set book instance. Class type will depend on the engine used.
95 """
96 self._deprecate_set_book()
97 self._book = other
99 @property
100 def sheets(self) -> dict[str, Any]:
101 """Mapping of sheet names to sheet objects."""
102 result = {name: self.book[name] for name in self.book.sheetnames}
103 return result
105 def _save(self) -> None:
106 """
107 Save workbook to disk.
108 """
109 self.book.save(self._handles.handle)
110 if "r+" in self._mode and not isinstance(self._handles.handle, mmap.mmap):
111 # truncate file to the written content
112 self._handles.handle.truncate()
114 @classmethod
115 def _convert_to_style_kwargs(cls, style_dict: dict) -> dict[str, Serialisable]:
116 """
117 Convert a style_dict to a set of kwargs suitable for initializing
118 or updating-on-copy an openpyxl v2 style object.
120 Parameters
121 ----------
122 style_dict : dict
123 A dict with zero or more of the following keys (or their synonyms).
124 'font'
125 'fill'
126 'border' ('borders')
127 'alignment'
128 'number_format'
129 'protection'
131 Returns
132 -------
133 style_kwargs : dict
134 A dict with the same, normalized keys as ``style_dict`` but each
135 value has been replaced with a native openpyxl style object of the
136 appropriate class.
137 """
138 _style_key_map = {"borders": "border"}
140 style_kwargs: dict[str, Serialisable] = {}
141 for k, v in style_dict.items():
142 if k in _style_key_map:
143 k = _style_key_map[k]
144 _conv_to_x = getattr(cls, f"_convert_to_{k}", lambda x: None)
145 new_v = _conv_to_x(v)
146 if new_v:
147 style_kwargs[k] = new_v
149 return style_kwargs
151 @classmethod
152 def _convert_to_color(cls, color_spec):
153 """
154 Convert ``color_spec`` to an openpyxl v2 Color object.
156 Parameters
157 ----------
158 color_spec : str, dict
159 A 32-bit ARGB hex string, or a dict with zero or more of the
160 following keys.
161 'rgb'
162 'indexed'
163 'auto'
164 'theme'
165 'tint'
166 'index'
167 'type'
169 Returns
170 -------
171 color : openpyxl.styles.Color
172 """
173 from openpyxl.styles import Color
175 if isinstance(color_spec, str):
176 return Color(color_spec)
177 else:
178 return Color(**color_spec)
180 @classmethod
181 def _convert_to_font(cls, font_dict):
182 """
183 Convert ``font_dict`` to an openpyxl v2 Font object.
185 Parameters
186 ----------
187 font_dict : dict
188 A dict with zero or more of the following keys (or their synonyms).
189 'name'
190 'size' ('sz')
191 'bold' ('b')
192 'italic' ('i')
193 'underline' ('u')
194 'strikethrough' ('strike')
195 'color'
196 'vertAlign' ('vertalign')
197 'charset'
198 'scheme'
199 'family'
200 'outline'
201 'shadow'
202 'condense'
204 Returns
205 -------
206 font : openpyxl.styles.Font
207 """
208 from openpyxl.styles import Font
210 _font_key_map = {
211 "sz": "size",
212 "b": "bold",
213 "i": "italic",
214 "u": "underline",
215 "strike": "strikethrough",
216 "vertalign": "vertAlign",
217 }
219 font_kwargs = {}
220 for k, v in font_dict.items():
221 if k in _font_key_map:
222 k = _font_key_map[k]
223 if k == "color":
224 v = cls._convert_to_color(v)
225 font_kwargs[k] = v
227 return Font(**font_kwargs)
229 @classmethod
230 def _convert_to_stop(cls, stop_seq):
231 """
232 Convert ``stop_seq`` to a list of openpyxl v2 Color objects,
233 suitable for initializing the ``GradientFill`` ``stop`` parameter.
235 Parameters
236 ----------
237 stop_seq : iterable
238 An iterable that yields objects suitable for consumption by
239 ``_convert_to_color``.
241 Returns
242 -------
243 stop : list of openpyxl.styles.Color
244 """
245 return map(cls._convert_to_color, stop_seq)
247 @classmethod
248 def _convert_to_fill(cls, fill_dict: dict[str, Any]):
249 """
250 Convert ``fill_dict`` to an openpyxl v2 Fill object.
252 Parameters
253 ----------
254 fill_dict : dict
255 A dict with one or more of the following keys (or their synonyms),
256 'fill_type' ('patternType', 'patterntype')
257 'start_color' ('fgColor', 'fgcolor')
258 'end_color' ('bgColor', 'bgcolor')
259 or one or more of the following keys (or their synonyms).
260 'type' ('fill_type')
261 'degree'
262 'left'
263 'right'
264 'top'
265 'bottom'
266 'stop'
268 Returns
269 -------
270 fill : openpyxl.styles.Fill
271 """
272 from openpyxl.styles import (
273 GradientFill,
274 PatternFill,
275 )
277 _pattern_fill_key_map = {
278 "patternType": "fill_type",
279 "patterntype": "fill_type",
280 "fgColor": "start_color",
281 "fgcolor": "start_color",
282 "bgColor": "end_color",
283 "bgcolor": "end_color",
284 }
286 _gradient_fill_key_map = {"fill_type": "type"}
288 pfill_kwargs = {}
289 gfill_kwargs = {}
290 for k, v in fill_dict.items():
291 pk = gk = None
292 if k in _pattern_fill_key_map:
293 pk = _pattern_fill_key_map[k]
294 if k in _gradient_fill_key_map:
295 gk = _gradient_fill_key_map[k]
296 if pk in ["start_color", "end_color"]:
297 v = cls._convert_to_color(v)
298 if gk == "stop":
299 v = cls._convert_to_stop(v)
300 if pk:
301 pfill_kwargs[pk] = v
302 elif gk:
303 gfill_kwargs[gk] = v
304 else:
305 pfill_kwargs[k] = v
306 gfill_kwargs[k] = v
308 try:
309 return PatternFill(**pfill_kwargs)
310 except TypeError:
311 return GradientFill(**gfill_kwargs)
313 @classmethod
314 def _convert_to_side(cls, side_spec):
315 """
316 Convert ``side_spec`` to an openpyxl v2 Side object.
318 Parameters
319 ----------
320 side_spec : str, dict
321 A string specifying the border style, or a dict with zero or more
322 of the following keys (or their synonyms).
323 'style' ('border_style')
324 'color'
326 Returns
327 -------
328 side : openpyxl.styles.Side
329 """
330 from openpyxl.styles import Side
332 _side_key_map = {"border_style": "style"}
334 if isinstance(side_spec, str):
335 return Side(style=side_spec)
337 side_kwargs = {}
338 for k, v in side_spec.items():
339 if k in _side_key_map:
340 k = _side_key_map[k]
341 if k == "color":
342 v = cls._convert_to_color(v)
343 side_kwargs[k] = v
345 return Side(**side_kwargs)
347 @classmethod
348 def _convert_to_border(cls, border_dict):
349 """
350 Convert ``border_dict`` to an openpyxl v2 Border object.
352 Parameters
353 ----------
354 border_dict : dict
355 A dict with zero or more of the following keys (or their synonyms).
356 'left'
357 'right'
358 'top'
359 'bottom'
360 'diagonal'
361 'diagonal_direction'
362 'vertical'
363 'horizontal'
364 'diagonalUp' ('diagonalup')
365 'diagonalDown' ('diagonaldown')
366 'outline'
368 Returns
369 -------
370 border : openpyxl.styles.Border
371 """
372 from openpyxl.styles import Border
374 _border_key_map = {"diagonalup": "diagonalUp", "diagonaldown": "diagonalDown"}
376 border_kwargs = {}
377 for k, v in border_dict.items():
378 if k in _border_key_map:
379 k = _border_key_map[k]
380 if k == "color":
381 v = cls._convert_to_color(v)
382 if k in ["left", "right", "top", "bottom", "diagonal"]:
383 v = cls._convert_to_side(v)
384 border_kwargs[k] = v
386 return Border(**border_kwargs)
388 @classmethod
389 def _convert_to_alignment(cls, alignment_dict):
390 """
391 Convert ``alignment_dict`` to an openpyxl v2 Alignment object.
393 Parameters
394 ----------
395 alignment_dict : dict
396 A dict with zero or more of the following keys (or their synonyms).
397 'horizontal'
398 'vertical'
399 'text_rotation'
400 'wrap_text'
401 'shrink_to_fit'
402 'indent'
403 Returns
404 -------
405 alignment : openpyxl.styles.Alignment
406 """
407 from openpyxl.styles import Alignment
409 return Alignment(**alignment_dict)
411 @classmethod
412 def _convert_to_number_format(cls, number_format_dict):
413 """
414 Convert ``number_format_dict`` to an openpyxl v2.1.0 number format
415 initializer.
417 Parameters
418 ----------
419 number_format_dict : dict
420 A dict with zero or more of the following keys.
421 'format_code' : str
423 Returns
424 -------
425 number_format : str
426 """
427 return number_format_dict["format_code"]
429 @classmethod
430 def _convert_to_protection(cls, protection_dict):
431 """
432 Convert ``protection_dict`` to an openpyxl v2 Protection object.
434 Parameters
435 ----------
436 protection_dict : dict
437 A dict with zero or more of the following keys.
438 'locked'
439 'hidden'
441 Returns
442 -------
443 """
444 from openpyxl.styles import Protection
446 return Protection(**protection_dict)
448 def _write_cells(
449 self,
450 cells,
451 sheet_name: str | None = None,
452 startrow: int = 0,
453 startcol: int = 0,
454 freeze_panes: tuple[int, int] | None = None,
455 ) -> None:
456 # Write the frame cells using openpyxl.
457 sheet_name = self._get_sheet_name(sheet_name)
459 _style_cache: dict[str, dict[str, Serialisable]] = {}
461 if sheet_name in self.sheets and self._if_sheet_exists != "new":
462 if "r+" in self._mode:
463 if self._if_sheet_exists == "replace":
464 old_wks = self.sheets[sheet_name]
465 target_index = self.book.index(old_wks)
466 del self.book[sheet_name]
467 wks = self.book.create_sheet(sheet_name, target_index)
468 elif self._if_sheet_exists == "error":
469 raise ValueError(
470 f"Sheet '{sheet_name}' already exists and "
471 f"if_sheet_exists is set to 'error'."
472 )
473 elif self._if_sheet_exists == "overlay":
474 wks = self.sheets[sheet_name]
475 else:
476 raise ValueError(
477 f"'{self._if_sheet_exists}' is not valid for if_sheet_exists. "
478 "Valid options are 'error', 'new', 'replace' and 'overlay'."
479 )
480 else:
481 wks = self.sheets[sheet_name]
482 else:
483 wks = self.book.create_sheet()
484 wks.title = sheet_name
486 if validate_freeze_panes(freeze_panes):
487 freeze_panes = cast(Tuple[int, int], freeze_panes)
488 wks.freeze_panes = wks.cell(
489 row=freeze_panes[0] + 1, column=freeze_panes[1] + 1
490 )
492 for cell in cells:
493 xcell = wks.cell(
494 row=startrow + cell.row + 1, column=startcol + cell.col + 1
495 )
496 xcell.value, fmt = self._value_with_fmt(cell.val)
497 if fmt:
498 xcell.number_format = fmt
500 style_kwargs: dict[str, Serialisable] | None = {}
501 if cell.style:
502 key = str(cell.style)
503 style_kwargs = _style_cache.get(key)
504 if style_kwargs is None:
505 style_kwargs = self._convert_to_style_kwargs(cell.style)
506 _style_cache[key] = style_kwargs
508 if style_kwargs:
509 for k, v in style_kwargs.items():
510 setattr(xcell, k, v)
512 if cell.mergestart is not None and cell.mergeend is not None:
514 wks.merge_cells(
515 start_row=startrow + cell.row + 1,
516 start_column=startcol + cell.col + 1,
517 end_column=startcol + cell.mergeend + 1,
518 end_row=startrow + cell.mergestart + 1,
519 )
521 # When cells are merged only the top-left cell is preserved
522 # The behaviour of the other cells in a merged range is
523 # undefined
524 if style_kwargs:
525 first_row = startrow + cell.row + 1
526 last_row = startrow + cell.mergestart + 1
527 first_col = startcol + cell.col + 1
528 last_col = startcol + cell.mergeend + 1
530 for row in range(first_row, last_row + 1):
531 for col in range(first_col, last_col + 1):
532 if row == first_row and col == first_col:
533 # Ignore first cell. It is already handled.
534 continue
535 xcell = wks.cell(column=col, row=row)
536 for k, v in style_kwargs.items():
537 setattr(xcell, k, v)
540class OpenpyxlReader(BaseExcelReader):
541 @doc(storage_options=_shared_docs["storage_options"])
542 def __init__(
543 self,
544 filepath_or_buffer: FilePath | ReadBuffer[bytes],
545 storage_options: StorageOptions = None,
546 ) -> None:
547 """
548 Reader using openpyxl engine.
550 Parameters
551 ----------
552 filepath_or_buffer : str, path object or Workbook
553 Object to be parsed.
554 {storage_options}
555 """
556 import_optional_dependency("openpyxl")
557 super().__init__(filepath_or_buffer, storage_options=storage_options)
559 @property
560 def _workbook_class(self):
561 from openpyxl import Workbook
563 return Workbook
565 def load_workbook(self, filepath_or_buffer: FilePath | ReadBuffer[bytes]):
566 from openpyxl import load_workbook
568 return load_workbook(
569 filepath_or_buffer, read_only=True, data_only=True, keep_links=False
570 )
572 @property
573 def sheet_names(self) -> list[str]:
574 return [sheet.title for sheet in self.book.worksheets]
576 def get_sheet_by_name(self, name: str):
577 self.raise_if_bad_sheet_by_name(name)
578 return self.book[name]
580 def get_sheet_by_index(self, index: int):
581 self.raise_if_bad_sheet_by_index(index)
582 return self.book.worksheets[index]
584 def _convert_cell(self, cell, convert_float: bool) -> Scalar:
586 from openpyxl.cell.cell import (
587 TYPE_ERROR,
588 TYPE_NUMERIC,
589 )
591 if cell.value is None:
592 return "" # compat with xlrd
593 elif cell.data_type == TYPE_ERROR:
594 return np.nan
595 elif cell.data_type == TYPE_NUMERIC:
596 # GH5394, GH46988
597 if convert_float:
598 val = int(cell.value)
599 if val == cell.value:
600 return val
601 else:
602 return float(cell.value)
604 return cell.value
606 def get_sheet_data(
607 self, sheet, convert_float: bool, file_rows_needed: int | None = None
608 ) -> list[list[Scalar]]:
610 if self.book.read_only:
611 sheet.reset_dimensions()
613 data: list[list[Scalar]] = []
614 last_row_with_data = -1
615 for row_number, row in enumerate(sheet.rows):
616 converted_row = [self._convert_cell(cell, convert_float) for cell in row]
617 while converted_row and converted_row[-1] == "":
618 # trim trailing empty elements
619 converted_row.pop()
620 if converted_row:
621 last_row_with_data = row_number
622 data.append(converted_row)
623 if file_rows_needed is not None and len(data) >= file_rows_needed:
624 break
626 # Trim trailing empty rows
627 data = data[: last_row_with_data + 1]
629 if len(data) > 0:
630 # extend rows to max width
631 max_width = max(len(data_row) for data_row in data)
632 if min(len(data_row) for data_row in data) < max_width:
633 empty_cell: list[Scalar] = [""]
634 data = [
635 data_row + (max_width - len(data_row)) * empty_cell
636 for data_row in data
637 ]
639 return data