Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/io/excel/

1from __future__ import annotations

3import abc

4import datetime

5from functools import partial

6from io import BytesIO

7import os

8from textwrap import fill

9from typing import (

10 IO,

11 Any,

12 Callable,

13 Hashable,

14 Iterable,

15 List,

16 Literal,

17 Mapping,

18 Sequence,

19 Union,

20 cast,

21 overload,

22)

23import warnings

24import zipfile

26from pandas._config import config

28from pandas._libs.parsers import STR_NA_VALUES

29from pandas._typing import (

30 DtypeArg,

31 FilePath,

32 IntStrT,

33 ReadBuffer,

34 StorageOptions,

35 WriteExcelBuffer,

36)

37from pandas.compat._optional import (

38 get_version,

39 import_optional_dependency,

40)

41from pandas.errors import EmptyDataError

42from pandas.util._decorators import (

43 Appender,

44 deprecate_kwarg,

45 deprecate_nonkeyword_arguments,

46 doc,

47)

48from pandas.util._exceptions import find_stack_level

50from pandas.core.dtypes.common import (

51 is_bool,

52 is_float,

53 is_integer,

54 is_list_like,

55)

57from pandas.core.frame import DataFrame

58from pandas.core.shared_docs import _shared_docs

59from pandas.util.version import Version

61from pandas.io.common import (

62 IOHandles,

63 get_handle,

64 stringify_path,

65 validate_header_arg,

66)

67from pandas.io.excel._util import (

68 fill_mi_header,

69 get_default_engine,

70 get_writer,

71 maybe_convert_usecols,

72 pop_header_name,

73)

74from pandas.io.parsers import TextParser

75from pandas.io.parsers.readers import validate_integer

77_read_excel_doc = (

78 """

79Read an Excel file into a pandas DataFrame.

81Supports `xls`, `xlsx`, `xlsm`, `xlsb`, `odf`, `ods` and `odt` file extensions

82read from a local filesystem or URL. Supports an option to read

83a single sheet or a list of sheets.

85Parameters

86----------

87io : str, bytes, ExcelFile, xlrd.Book, path object, or file-like object

88 Any valid string path is acceptable. The string could be a URL. Valid

89 URL schemes include http, ftp, s3, and file. For file URLs, a host is

90 expected. A local file could be: ``file://localhost/path/to/table.xlsx``.

92 If you want to pass in a path object, pandas accepts any ``os.PathLike``.

94 By file-like object, we refer to objects with a ``read()`` method,

95 such as a file handle (e.g. via builtin ``open`` function)

96 or ``StringIO``.

97sheet_name : str, int, list, or None, default 0

98 Strings are used for sheet names. Integers are used in zero-indexed

99 sheet positions (chart sheets do not count as a sheet position).

100 Lists of strings/integers are used to request multiple sheets.

101 Specify None to get all worksheets.

102

103 Available cases:

104

105 * Defaults to ``0``: 1st sheet as a `DataFrame`

106 * ``1``: 2nd sheet as a `DataFrame`

107 * ``"Sheet1"``: Load sheet with name "Sheet1"

108 * ``[0, 1, "Sheet5"]``: Load first, second and sheet named "Sheet5"

109 as a dict of `DataFrame`

110 * None: All worksheets.

111

112header : int, list of int, default 0

113 Row (0-indexed) to use for the column labels of the parsed

114 DataFrame. If a list of integers is passed those row positions will

115 be combined into a ``MultiIndex``. Use None if there is no header.

116names : array-like, default None

117 List of column names to use. If file contains no header row,

118 then you should explicitly pass header=None.

119index_col : int, list of int, default None

120 Column (0-indexed) to use as the row labels of the DataFrame.

121 Pass None if there is no such column. If a list is passed,

122 those columns will be combined into a ``MultiIndex``. If a

123 subset of data is selected with ``usecols``, index_col

124 is based on the subset.

125

126 Missing values will be forward filled to allow roundtripping with

127 ``to_excel`` for ``merged_cells=True``. To avoid forward filling the

128 missing values use ``set_index`` after reading the data instead of

129 ``index_col``.

130usecols : str, list-like, or callable, default None

131 * If None, then parse all columns.

132 * If str, then indicates comma separated list of Excel column letters

133 and column ranges (e.g. "A:E" or "A,C,E:F"). Ranges are inclusive of

134 both sides.

135 * If list of int, then indicates list of column numbers to be parsed

136 (0-indexed).

137 * If list of string, then indicates list of column names to be parsed.

138 * If callable, then evaluate each column name against it and parse the

139 column if the callable returns ``True``.

140

141 Returns a subset of the columns according to behavior above.

142squeeze : bool, default False

143 If the parsed data only contains one column then return a Series.

144

145 .. deprecated:: 1.4.0

146 Append ``.squeeze("columns")`` to the call to ``read_excel`` to squeeze

147 the data.

148dtype : Type name or dict of column -> type, default None

149 Data type for data or columns. E.g. {{'a': np.float64, 'b': np.int32}}

150 Use `object` to preserve data as stored in Excel and not interpret dtype.

151 If converters are specified, they will be applied INSTEAD

152 of dtype conversion.

153engine : str, default None

154 If io is not a buffer or path, this must be set to identify io.

155 Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb".

156 Engine compatibility :

157

158 - "xlrd" supports old-style Excel files (.xls).

159 - "openpyxl" supports newer Excel file formats.

160 - "odf" supports OpenDocument file formats (.odf, .ods, .odt).

161 - "pyxlsb" supports Binary Excel files.

162

163 .. versionchanged:: 1.2.0

164 The engine `xlrd <https://xlrd.readthedocs.io/en/latest/>`_

165 now only supports old-style ``.xls`` files.

166 When ``engine=None``, the following logic will be

167 used to determine the engine:

168

169 - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt),

170 then `odf <https://pypi.org/project/odfpy/>`_ will be used.

171 - Otherwise if ``path_or_buffer`` is an xls format,

172 ``xlrd`` will be used.

173 - Otherwise if ``path_or_buffer`` is in xlsb format,

174 ``pyxlsb`` will be used.

175

176 .. versionadded:: 1.3.0

177 - Otherwise ``openpyxl`` will be used.

178

179 .. versionchanged:: 1.3.0

180

181converters : dict, default None

182 Dict of functions for converting values in certain columns. Keys can

183 either be integers or column labels, values are functions that take one

184 input argument, the Excel cell content, and return the transformed

185 content.

186true_values : list, default None

187 Values to consider as True.

188false_values : list, default None

189 Values to consider as False.

190skiprows : list-like, int, or callable, optional

191 Line numbers to skip (0-indexed) or number of lines to skip (int) at the

192 start of the file. If callable, the callable function will be evaluated

193 against the row indices, returning True if the row should be skipped and

194 False otherwise. An example of a valid callable argument would be ``lambda

195 x: x in [0, 2]``.

196nrows : int, default None

197 Number of rows to parse.

198na_values : scalar, str, list-like, or dict, default None

199 Additional strings to recognize as NA/NaN. If dict passed, specific

200 per-column NA values. By default the following values are interpreted

201 as NaN: '"""

202 + fill("', '".join(sorted(STR_NA_VALUES)), 70, subsequent_indent=" ")

203 + """'.

204keep_default_na : bool, default True

205 Whether or not to include the default NaN values when parsing the data.

206 Depending on whether `na_values` is passed in, the behavior is as follows:

207

208 * If `keep_default_na` is True, and `na_values` are specified, `na_values`

209 is appended to the default NaN values used for parsing.

210 * If `keep_default_na` is True, and `na_values` are not specified, only

211 the default NaN values are used for parsing.

212 * If `keep_default_na` is False, and `na_values` are specified, only

213 the NaN values specified `na_values` are used for parsing.

214 * If `keep_default_na` is False, and `na_values` are not specified, no

215 strings will be parsed as NaN.

216

217 Note that if `na_filter` is passed in as False, the `keep_default_na` and

218 `na_values` parameters will be ignored.

219na_filter : bool, default True

220 Detect missing value markers (empty strings and the value of na_values). In

221 data without any NAs, passing na_filter=False can improve the performance

222 of reading a large file.

223verbose : bool, default False

224 Indicate number of NA values placed in non-numeric columns.

225parse_dates : bool, list-like, or dict, default False

226 The behavior is as follows:

227

228 * bool. If True -> try parsing the index.

229 * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3

230 each as a separate date column.

231 * list of lists. e.g. If [[1, 3]] -> combine columns 1 and 3 and parse as

232 a single date column.

233 * dict, e.g. {{'foo' : [1, 3]}} -> parse columns 1, 3 as date and call

234 result 'foo'

235

236 If a column or index contains an unparsable date, the entire column or

237 index will be returned unaltered as an object data type. If you don`t want to

238 parse some cells as date just change their type in Excel to "Text".

239 For non-standard datetime parsing, use ``pd.to_datetime`` after ``pd.read_excel``.

240

241 Note: A fast-path exists for iso8601-formatted dates.

242date_parser : function, optional

243 Function to use for converting a sequence of string columns to an array of

244 datetime instances. The default uses ``dateutil.parser.parser`` to do the

245 conversion. Pandas will try to call `date_parser` in three different ways,

246 advancing to the next if an exception occurs: 1) Pass one or more arrays

247 (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the

248 string values from the columns defined by `parse_dates` into a single array

249 and pass that; and 3) call `date_parser` once for each row using one or

250 more strings (corresponding to the columns defined by `parse_dates`) as

251 arguments.

252thousands : str, default None

253 Thousands separator for parsing string columns to numeric. Note that

254 this parameter is only necessary for columns stored as TEXT in Excel,

255 any numeric columns will automatically be parsed, regardless of display

256 format.

257decimal : str, default '.'

258 Character to recognize as decimal point for parsing string columns to numeric.

259 Note that this parameter is only necessary for columns stored as TEXT in Excel,

260 any numeric columns will automatically be parsed, regardless of display

261 format.(e.g. use ',' for European data).

262

263 .. versionadded:: 1.4.0

264

265comment : str, default None

266 Comments out remainder of line. Pass a character or characters to this

267 argument to indicate comments in the input file. Any data between the

268 comment string and the end of the current line is ignored.

269skipfooter : int, default 0

270 Rows at the end to skip (0-indexed).

271convert_float : bool, default True

272 Convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric

273 data will be read in as floats: Excel stores all numbers as floats

274 internally.

275

276 .. deprecated:: 1.3.0

277 convert_float will be removed in a future version

278

279mangle_dupe_cols : bool, default True

280 Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than

281 'X'...'X'. Passing in False will cause data to be overwritten if there

282 are duplicate names in the columns.

283

284 .. deprecated:: 1.5.0

285 Not implemented, and a new argument to specify the pattern for the

286 names of duplicated columns will be added instead

287

288{storage_options}

289

290 .. versionadded:: 1.2.0

291

292Returns

293-------

294DataFrame or dict of DataFrames

295 DataFrame from the passed in Excel file. See notes in sheet_name

296 argument for more information on when a dict of DataFrames is returned.

297

298See Also

299--------

300DataFrame.to_excel : Write DataFrame to an Excel file.

301DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file.

302read_csv : Read a comma-separated values (csv) file into DataFrame.

303read_fwf : Read a table of fixed-width formatted lines into DataFrame.

304

305Examples

306--------

307The file can be read using the file name as string or an open file object:

308

309>>> pd.read_excel('tmp.xlsx', index_col=0) # doctest: +SKIP

310 Name Value

3110 string1 1

3121 string2 2

3132 #Comment 3

314

315>>> pd.read_excel(open('tmp.xlsx', 'rb'),

316... sheet_name='Sheet3') # doctest: +SKIP

317 Unnamed: 0 Name Value

3180 0 string1 1

3191 1 string2 2

3202 2 #Comment 3

321

322Index and header can be specified via the `index_col` and `header` arguments

323

324>>> pd.read_excel('tmp.xlsx', index_col=None, header=None) # doctest: +SKIP

325 0 1 2

3260 NaN Name Value

3271 0.0 string1 1

3282 1.0 string2 2

3293 2.0 #Comment 3

330

331Column types are inferred but can be explicitly specified

332

333>>> pd.read_excel('tmp.xlsx', index_col=0,

334... dtype={{'Name': str, 'Value': float}}) # doctest: +SKIP

335 Name Value

3360 string1 1.0

3371 string2 2.0

3382 #Comment 3.0

339

340True, False, and NA values, and thousands separators have defaults,

341but can be explicitly specified, too. Supply the values you would like

342as strings or lists of strings!

343

344>>> pd.read_excel('tmp.xlsx', index_col=0,

345... na_values=['string1', 'string2']) # doctest: +SKIP

346 Name Value

3470 NaN 1

3481 NaN 2

3492 #Comment 3

350

351Comment lines in the excel input file can be skipped using the `comment` kwarg

352

353>>> pd.read_excel('tmp.xlsx', index_col=0, comment='#') # doctest: +SKIP

354 Name Value

3550 string1 1.0

3561 string2 2.0

3572 None NaN

358"""

359)

360

361

362@overload

363def read_excel(

364 io,

365 # sheet name is str or int -> DataFrame

366 sheet_name: str | int = ...,

367 header: int | Sequence[int] | None = ...,

368 names: list[str] | None = ...,

369 index_col: int | Sequence[int] | None = ...,

370 usecols: int

371 | str

372 | Sequence[int]

373 | Sequence[str]

374 | Callable[[str], bool]

375 | None = ...,

376 squeeze: bool | None = ...,

377 dtype: DtypeArg | None = ...,

378 engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ...,

379 converters: dict[str, Callable] | dict[int, Callable] | None = ...,

380 true_values: Iterable[Hashable] | None = ...,

381 false_values: Iterable[Hashable] | None = ...,

382 skiprows: Sequence[int] | int | Callable[[int], object] | None = ...,

383 nrows: int | None = ...,

384 na_values=...,

385 keep_default_na: bool = ...,

386 na_filter: bool = ...,

387 verbose: bool = ...,

388 parse_dates: list | dict | bool = ...,

389 date_parser: Callable | None = ...,

390 thousands: str | None = ...,

391 decimal: str = ...,

392 comment: str | None = ...,

393 skipfooter: int = ...,

394 convert_float: bool | None = ...,

395 mangle_dupe_cols: bool = ...,

396 storage_options: StorageOptions = ...,

397) -> DataFrame:

398 ...

399

400

401@overload

402def read_excel(

403 io,

404 # sheet name is list or None -> dict[IntStrT, DataFrame]

405 sheet_name: list[IntStrT] | None,

406 header: int | Sequence[int] | None = ...,

407 names: list[str] | None = ...,

408 index_col: int | Sequence[int] | None = ...,

409 usecols: int

410 | str

411 | Sequence[int]

412 | Sequence[str]

413 | Callable[[str], bool]

414 | None = ...,

415 squeeze: bool | None = ...,

416 dtype: DtypeArg | None = ...,

417 engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = ...,

418 converters: dict[str, Callable] | dict[int, Callable] | None = ...,

419 true_values: Iterable[Hashable] | None = ...,

420 false_values: Iterable[Hashable] | None = ...,

421 skiprows: Sequence[int] | int | Callable[[int], object] | None = ...,

422 nrows: int | None = ...,

423 na_values=...,

424 keep_default_na: bool = ...,

425 na_filter: bool = ...,

426 verbose: bool = ...,

427 parse_dates: list | dict | bool = ...,

428 date_parser: Callable | None = ...,

429 thousands: str | None = ...,

430 decimal: str = ...,

431 comment: str | None = ...,

432 skipfooter: int = ...,

433 convert_float: bool | None = ...,

434 mangle_dupe_cols: bool = ...,

435 storage_options: StorageOptions = ...,

436) -> dict[IntStrT, DataFrame]:

437 ...

438

439

440@doc(storage_options=_shared_docs["storage_options"])

441@deprecate_kwarg(old_arg_name="mangle_dupe_cols", new_arg_name=None)

442@deprecate_nonkeyword_arguments(allowed_args=["io", "sheet_name"], version="2.0")

443@Appender(_read_excel_doc)

444def read_excel(

445 io,

446 sheet_name: str | int | list[IntStrT] | None = 0,

447 header: int | Sequence[int] | None = 0,

448 names: list[str] | None = None,

449 index_col: int | Sequence[int] | None = None,

450 usecols: int

451 | str

452 | Sequence[int]

453 | Sequence[str]

454 | Callable[[str], bool]

455 | None = None,

456 squeeze: bool | None = None,

457 dtype: DtypeArg | None = None,

458 engine: Literal["xlrd", "openpyxl", "odf", "pyxlsb"] | None = None,

459 converters: dict[str, Callable] | dict[int, Callable] | None = None,

460 true_values: Iterable[Hashable] | None = None,

461 false_values: Iterable[Hashable] | None = None,

462 skiprows: Sequence[int] | int | Callable[[int], object] | None = None,

463 nrows: int | None = None,

464 na_values=None,

465 keep_default_na: bool = True,

466 na_filter: bool = True,

467 verbose: bool = False,

468 parse_dates: list | dict | bool = False,

469 date_parser: Callable | None = None,

470 thousands: str | None = None,

471 decimal: str = ".",

472 comment: str | None = None,

473 skipfooter: int = 0,

474 convert_float: bool | None = None,

475 mangle_dupe_cols: bool = True,

476 storage_options: StorageOptions = None,

477) -> DataFrame | dict[IntStrT, DataFrame]:

478

479 should_close = False

480 if not isinstance(io, ExcelFile):

481 should_close = True

482 io = ExcelFile(io, storage_options=storage_options, engine=engine)

483 elif engine and engine != io.engine:

484 raise ValueError(

485 "Engine should not be specified when passing "

486 "an ExcelFile - ExcelFile already has the engine set"

487 )

488

489 try:

490 data = io.parse(

491 sheet_name=sheet_name,

492 header=header,

493 names=names,

494 index_col=index_col,

495 usecols=usecols,

496 squeeze=squeeze,

497 dtype=dtype,

498 converters=converters,

499 true_values=true_values,

500 false_values=false_values,

501 skiprows=skiprows,

502 nrows=nrows,

503 na_values=na_values,

504 keep_default_na=keep_default_na,

505 na_filter=na_filter,

506 verbose=verbose,

507 parse_dates=parse_dates,

508 date_parser=date_parser,

509 thousands=thousands,

510 decimal=decimal,

511 comment=comment,

512 skipfooter=skipfooter,

513 convert_float=convert_float,

514 mangle_dupe_cols=mangle_dupe_cols,

515 )

516 finally:

517 # make sure to close opened file handles

518 if should_close:

519 io.close()

520 return data

521

522

523class BaseExcelReader(metaclass=abc.ABCMeta):

524 def __init__(

525 self, filepath_or_buffer, storage_options: StorageOptions = None

526 ) -> None:

527 # First argument can also be bytes, so create a buffer

528 if isinstance(filepath_or_buffer, bytes):

529 filepath_or_buffer = BytesIO(filepath_or_buffer)

530

531 self.handles = IOHandles(

532 handle=filepath_or_buffer, compression={"method": None}

533 )

534 if not isinstance(filepath_or_buffer, (ExcelFile, self._workbook_class)):

535 self.handles = get_handle(

536 filepath_or_buffer, "rb", storage_options=storage_options, is_text=False

537 )

538

539 if isinstance(self.handles.handle, self._workbook_class):

540 self.book = self.handles.handle

541 elif hasattr(self.handles.handle, "read"):

542 # N.B. xlrd.Book has a read attribute too

543 self.handles.handle.seek(0)

544 try:

545 self.book = self.load_workbook(self.handles.handle)

546 except Exception:

547 self.close()

548 raise

549 else:

550 raise ValueError(

551 "Must explicitly set engine if not passing in buffer or path for io."

552 )

553

554 @property

555 @abc.abstractmethod

556 def _workbook_class(self):

557 pass

558

559 @abc.abstractmethod

560 def load_workbook(self, filepath_or_buffer):

561 pass

562

563 def close(self) -> None:

564 if hasattr(self, "book"):

565 if hasattr(self.book, "close"):

566 # pyxlsb: opens a TemporaryFile

567 # openpyxl: https://stackoverflow.com/questions/31416842/

568 # openpyxl-does-not-close-excel-workbook-in-read-only-mode

569 self.book.close()

570 elif hasattr(self.book, "release_resources"):

571 # xlrd

572 # https://github.com/python-excel/xlrd/blob/2.0.1/xlrd/book.py#L548

573 self.book.release_resources()

574 self.handles.close()

575

576 @property

577 @abc.abstractmethod

578 def sheet_names(self) -> list[str]:

579 pass

580

581 @abc.abstractmethod

582 def get_sheet_by_name(self, name: str):

583 pass

584

585 @abc.abstractmethod

586 def get_sheet_by_index(self, index: int):

587 pass

588

589 @abc.abstractmethod

590 def get_sheet_data(self, sheet, convert_float: bool, rows: int | None = None):

591 pass

592

593 def raise_if_bad_sheet_by_index(self, index: int) -> None:

594 n_sheets = len(self.sheet_names)

595 if index >= n_sheets:

596 raise ValueError(

597 f"Worksheet index {index} is invalid, {n_sheets} worksheets found"

598 )

599

600 def raise_if_bad_sheet_by_name(self, name: str) -> None:

601 if name not in self.sheet_names:

602 raise ValueError(f"Worksheet named '{name}' not found")

603

604 def _check_skiprows_func(

605 self,

606 skiprows: Callable,

607 rows_to_use: int,

608 ) -> int:

609 """

610 Determine how many file rows are required to obtain `nrows` data

611 rows when `skiprows` is a function.

612

613 Parameters

614 ----------

615 skiprows : function

616 The function passed to read_excel by the user.

617 rows_to_use : int

618 The number of rows that will be needed for the header and

619 the data.

620

621 Returns

622 -------

623 int

624 """

625 i = 0

626 rows_used_so_far = 0

627 while rows_used_so_far < rows_to_use:

628 if not skiprows(i):

629 rows_used_so_far += 1

630 i += 1

631 return i

632

633 def _calc_rows(

634 self,

635 header: int | Sequence[int] | None,

636 index_col: int | Sequence[int] | None,

637 skiprows: Sequence[int] | int | Callable[[int], object] | None,

638 nrows: int | None,

639 ) -> int | None:

640 """

641 If nrows specified, find the number of rows needed from the

642 file, otherwise return None.

643

644

645 Parameters

646 ----------

647 header : int, list of int, or None

648 See read_excel docstring.

649 index_col : int, list of int, or None

650 See read_excel docstring.

651 skiprows : list-like, int, callable, or None

652 See read_excel docstring.

653 nrows : int or None

654 See read_excel docstring.

655

656 Returns

657 -------

658 int or None

659 """

660 if nrows is None:

661 return None

662 if header is None:

663 header_rows = 1

664 elif is_integer(header):

665 header = cast(int, header)

666 header_rows = 1 + header

667 else:

668 header = cast(Sequence, header)

669 header_rows = 1 + header[-1]

670 # If there is a MultiIndex header and an index then there is also

671 # a row containing just the index name(s)

672 if is_list_like(header) and index_col is not None:

673 header = cast(Sequence, header)

674 if len(header) > 1:

675 header_rows += 1

676 if skiprows is None:

677 return header_rows + nrows

678 if is_integer(skiprows):

679 skiprows = cast(int, skiprows)

680 return header_rows + nrows + skiprows

681 if is_list_like(skiprows):

682

683 def f(skiprows: Sequence, x: int) -> bool:

684 return x in skiprows

685

686 skiprows = cast(Sequence, skiprows)

687 return self._check_skiprows_func(partial(f, skiprows), header_rows + nrows)

688 if callable(skiprows):

689 return self._check_skiprows_func(

690 skiprows,

691 header_rows + nrows,

692 )

693 # else unexpected skiprows type: read_excel will not optimize

694 # the number of rows read from file

695 return None

696

697 def parse(

698 self,

699 sheet_name: str | int | list[int] | list[str] | None = 0,

700 header: int | Sequence[int] | None = 0,

701 names=None,

702 index_col: int | Sequence[int] | None = None,

703 usecols=None,

704 squeeze: bool | None = None,

705 dtype: DtypeArg | None = None,

706 true_values: Iterable[Hashable] | None = None,

707 false_values: Iterable[Hashable] | None = None,

708 skiprows: Sequence[int] | int | Callable[[int], object] | None = None,

709 nrows: int | None = None,

710 na_values=None,

711 verbose: bool = False,

712 parse_dates: list | dict | bool = False,

713 date_parser: Callable | None = None,

714 thousands: str | None = None,

715 decimal: str = ".",

716 comment: str | None = None,

717 skipfooter: int = 0,

718 convert_float: bool | None = None,

719 mangle_dupe_cols: bool = True,

720 **kwds,

721 ):

722

723 if convert_float is None:

724 convert_float = True

725 else:

726 warnings.warn(

727 "convert_float is deprecated and will be removed in a future version.",

728 FutureWarning,

729 stacklevel=find_stack_level(),

730 )

731

732 validate_header_arg(header)

733 validate_integer("nrows", nrows)

734

735 ret_dict = False

736

737 # Keep sheetname to maintain backwards compatibility.

738 sheets: list[int] | list[str]

739 if isinstance(sheet_name, list):

740 sheets = sheet_name

741 ret_dict = True

742 elif sheet_name is None:

743 sheets = self.sheet_names

744 ret_dict = True

745 elif isinstance(sheet_name, str):

746 sheets = [sheet_name]

747 else:

748 sheets = [sheet_name]

749

750 # handle same-type duplicates.

751 sheets = cast(Union[List[int], List[str]], list(dict.fromkeys(sheets).keys()))

752

753 output = {}

754

755 for asheetname in sheets:

756 if verbose:

757 print(f"Reading sheet {asheetname}")

758

759 if isinstance(asheetname, str):

760 sheet = self.get_sheet_by_name(asheetname)

761 else: # assume an integer if not a string

762 sheet = self.get_sheet_by_index(asheetname)

763

764 file_rows_needed = self._calc_rows(header, index_col, skiprows, nrows)

765 data = self.get_sheet_data(sheet, convert_float, file_rows_needed)

766 if hasattr(sheet, "close"):

767 # pyxlsb opens two TemporaryFiles

768 sheet.close()

769 usecols = maybe_convert_usecols(usecols)

770

771 if not data:

772 output[asheetname] = DataFrame()

773 continue

774

775 is_list_header = False

776 is_len_one_list_header = False

777 if is_list_like(header):

778 assert isinstance(header, Sequence)

779 is_list_header = True

780 if len(header) == 1:

781 is_len_one_list_header = True

782

783 if is_len_one_list_header:

784 header = cast(Sequence[int], header)[0]

785

786 # forward fill and pull out names for MultiIndex column

787 header_names = None

788 if header is not None and is_list_like(header):

789 assert isinstance(header, Sequence)

790

791 header_names = []

792 control_row = [True] * len(data[0])

793

794 for row in header:

795 if is_integer(skiprows):

796 assert isinstance(skiprows, int)

797 row += skiprows

798

799 if row > len(data) - 1:

800 raise ValueError(

801 f"header index {row} exceeds maximum index "

802 f"{len(data) - 1} of data.",

803 )

804

805 data[row], control_row = fill_mi_header(data[row], control_row)

806

807 if index_col is not None:

808 header_name, _ = pop_header_name(data[row], index_col)

809 header_names.append(header_name)

810

811 # If there is a MultiIndex header and an index then there is also

812 # a row containing just the index name(s)

813 has_index_names = False

814 if is_list_header and not is_len_one_list_header and index_col is not None:

815

816 index_col_list: Sequence[int]

817 if isinstance(index_col, int):

818 index_col_list = [index_col]

819 else:

820 assert isinstance(index_col, Sequence)

821 index_col_list = index_col

822

823 # We have to handle mi without names. If any of the entries in the data

824 # columns are not empty, this is a regular row

825 assert isinstance(header, Sequence)

826 if len(header) < len(data):

827 potential_index_names = data[len(header)]

828 potential_data = [

829 x

830 for i, x in enumerate(potential_index_names)

831 if not control_row[i] and i not in index_col_list

832 ]

833 has_index_names = all(x == "" or x is None for x in potential_data)

834

835 if is_list_like(index_col):

836 # Forward fill values for MultiIndex index.

837 if header is None:

838 offset = 0

839 elif isinstance(header, int):

840 offset = 1 + header

841 else:

842 offset = 1 + max(header)

843

844 # GH34673: if MultiIndex names present and not defined in the header,

845 # offset needs to be incremented so that forward filling starts

846 # from the first MI value instead of the name

847 if has_index_names:

848 offset += 1

849

850 # Check if we have an empty dataset

851 # before trying to collect data.

852 if offset < len(data):

853 assert isinstance(index_col, Sequence)

854

855 for col in index_col:

856 last = data[offset][col]

857

858 for row in range(offset + 1, len(data)):

859 if data[row][col] == "" or data[row][col] is None:

860 data[row][col] = last

861 else:

862 last = data[row][col]

863

864 # GH 12292 : error when read one empty column from excel file

865 try:

866 parser = TextParser(

867 data,

868 names=names,

869 header=header,

870 index_col=index_col,

871 has_index_names=has_index_names,

872 squeeze=squeeze,

873 dtype=dtype,

874 true_values=true_values,

875 false_values=false_values,

876 skiprows=skiprows,

877 nrows=nrows,

878 na_values=na_values,

879 skip_blank_lines=False, # GH 39808

880 parse_dates=parse_dates,

881 date_parser=date_parser,

882 thousands=thousands,

883 decimal=decimal,

884 comment=comment,

885 skipfooter=skipfooter,

886 usecols=usecols,

887 mangle_dupe_cols=mangle_dupe_cols,

888 **kwds,

889 )

890

891 output[asheetname] = parser.read(nrows=nrows)

892

893 if not squeeze or isinstance(output[asheetname], DataFrame):

894 if header_names:

895 output[asheetname].columns = output[

896 asheetname

897 ].columns.set_names(header_names)

898

899 except EmptyDataError:

900 # No Data, return an empty DataFrame

901 output[asheetname] = DataFrame()

902

903 if ret_dict:

904 return output

905 else:

906 return output[asheetname]

907

908

909@doc(storage_options=_shared_docs["storage_options"])

910class ExcelWriter(metaclass=abc.ABCMeta):

911 """

912 Class for writing DataFrame objects into excel sheets.

913

914 Default is to use:

915

916 * `xlwt <https://pypi.org/project/xlwt/>`__ for xls files

917 * `xlsxwriter <https://pypi.org/project/XlsxWriter/>`__ for xlsx files if xlsxwriter

918 is installed otherwise `openpyxl <https://pypi.org/project/openpyxl/>`__

919 * `odswriter <https://pypi.org/project/odswriter/>`__ for ods files

920

921 See ``DataFrame.to_excel`` for typical usage.

922

923 The writer should be used as a context manager. Otherwise, call `close()` to save

924 and close any opened file handles.

925

926 Parameters

927 ----------

928 path : str or typing.BinaryIO

929 Path to xls or xlsx or ods file.

930 engine : str (optional)

931 Engine to use for writing. If None, defaults to

932 ``io.excel.<extension>.writer``. NOTE: can only be passed as a keyword

933 argument.

934

935 .. deprecated:: 1.2.0

936

937 As the `xlwt <https://pypi.org/project/xlwt/>`__ package is no longer

938 maintained, the ``xlwt`` engine will be removed in a future

939 version of pandas.

940

941 date_format : str, default None

942 Format string for dates written into Excel files (e.g. 'YYYY-MM-DD').

943 datetime_format : str, default None

944 Format string for datetime objects written into Excel files.

945 (e.g. 'YYYY-MM-DD HH:MM:SS').

946 mode : {{'w', 'a'}}, default 'w'

947 File mode to use (write or append). Append does not work with fsspec URLs.

948 {storage_options}

949

950 .. versionadded:: 1.2.0

951

952 if_sheet_exists : {{'error', 'new', 'replace', 'overlay'}}, default 'error'

953 How to behave when trying to write to a sheet that already

954 exists (append mode only).

955

956 * error: raise a ValueError.

957 * new: Create a new sheet, with a name determined by the engine.

958 * replace: Delete the contents of the sheet before writing to it.

959 * overlay: Write contents to the existing sheet without removing the old

960 contents.

961

962 .. versionadded:: 1.3.0

963

964 .. versionchanged:: 1.4.0

965

966 Added ``overlay`` option

967

968 engine_kwargs : dict, optional

969 Keyword arguments to be passed into the engine. These will be passed to

970 the following functions of the respective engines:

971

972 * xlsxwriter: ``xlsxwriter.Workbook(file, **engine_kwargs)``

973 * openpyxl (write mode): ``openpyxl.Workbook(**engine_kwargs)``

974 * openpyxl (append mode): ``openpyxl.load_workbook(file, **engine_kwargs)``

975 * odswriter: ``odf.opendocument.OpenDocumentSpreadsheet(**engine_kwargs)``

976

977 .. versionadded:: 1.3.0

978 **kwargs : dict, optional

979 Keyword arguments to be passed into the engine.

980

981 .. deprecated:: 1.3.0

982

983 Use engine_kwargs instead.

984

985 Notes

986 -----

987 For compatibility with CSV writers, ExcelWriter serializes lists

988 and dicts to strings before writing.

989

990 Examples

991 --------

992 Default usage:

993

994 >>> df = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"]) # doctest: +SKIP

995 >>> with pd.ExcelWriter("path_to_file.xlsx") as writer:

996 ... df.to_excel(writer) # doctest: +SKIP

997

998 To write to separate sheets in a single file:

999

1000 >>> df1 = pd.DataFrame([["AAA", "BBB"]], columns=["Spam", "Egg"]) # doctest: +SKIP

1001 >>> df2 = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"]) # doctest: +SKIP

1002 >>> with pd.ExcelWriter("path_to_file.xlsx") as writer:

1003 ... df1.to_excel(writer, sheet_name="Sheet1") # doctest: +SKIP

1004 ... df2.to_excel(writer, sheet_name="Sheet2") # doctest: +SKIP

1005

1006 You can set the date format or datetime format:

1007

1008 >>> from datetime import date, datetime # doctest: +SKIP

1009 >>> df = pd.DataFrame(

1010 ... [

1011 ... [date(2014, 1, 31), date(1999, 9, 24)],

1012 ... [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)],

1013 ... ],

1014 ... index=["Date", "Datetime"],

1015 ... columns=["X", "Y"],

1016 ... ) # doctest: +SKIP

1017 >>> with pd.ExcelWriter(

1018 ... "path_to_file.xlsx",

1019 ... date_format="YYYY-MM-DD",

1020 ... datetime_format="YYYY-MM-DD HH:MM:SS"

1021 ... ) as writer:

1022 ... df.to_excel(writer) # doctest: +SKIP

1023

1024 You can also append to an existing Excel file:

1025

1026 >>> with pd.ExcelWriter("path_to_file.xlsx", mode="a", engine="openpyxl") as writer:

1027 ... df.to_excel(writer, sheet_name="Sheet3") # doctest: +SKIP

1028

1029 Here, the `if_sheet_exists` parameter can be set to replace a sheet if it

1030 already exists:

1031

1032 >>> with ExcelWriter(

1033 ... "path_to_file.xlsx",

1034 ... mode="a",

1035 ... engine="openpyxl",

1036 ... if_sheet_exists="replace",

1037 ... ) as writer:

1038 ... df.to_excel(writer, sheet_name="Sheet1") # doctest: +SKIP

1039

1040 You can also write multiple DataFrames to a single sheet. Note that the

1041 ``if_sheet_exists`` parameter needs to be set to ``overlay``:

1042

1043 >>> with ExcelWriter("path_to_file.xlsx",

1044 ... mode="a",

1045 ... engine="openpyxl",

1046 ... if_sheet_exists="overlay",

1047 ... ) as writer:

1048 ... df1.to_excel(writer, sheet_name="Sheet1")

1049 ... df2.to_excel(writer, sheet_name="Sheet1", startcol=3) # doctest: +SKIP

1050

1051 You can store Excel file in RAM:

1052

1053 >>> import io

1054 >>> df = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"])

1055 >>> buffer = io.BytesIO()

1056 >>> with pd.ExcelWriter(buffer) as writer:

1057 ... df.to_excel(writer)

1058

1059 You can pack Excel file into zip archive:

1060

1061 >>> import zipfile # doctest: +SKIP

1062 >>> df = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"]) # doctest: +SKIP

1063 >>> with zipfile.ZipFile("path_to_file.zip", "w") as zf:

1064 ... with zf.open("filename.xlsx", "w") as buffer:

1065 ... with pd.ExcelWriter(buffer) as writer:

1066 ... df.to_excel(writer) # doctest: +SKIP

1067

1068 You can specify additional arguments to the underlying engine:

1069

1070 >>> with pd.ExcelWriter(

1071 ... "path_to_file.xlsx",

1072 ... engine="xlsxwriter",

1073 ... engine_kwargs={{"options": {{"nan_inf_to_errors": True}}}}

1074 ... ) as writer:

1075 ... df.to_excel(writer) # doctest: +SKIP

1076

1077 In append mode, ``engine_kwargs`` are passed through to

1078 openpyxl's ``load_workbook``:

1079

1080 >>> with pd.ExcelWriter(

1081 ... "path_to_file.xlsx",

1082 ... engine="openpyxl",

1083 ... mode="a",

1084 ... engine_kwargs={{"keep_vba": True}}

1085 ... ) as writer:

1086 ... df.to_excel(writer, sheet_name="Sheet2") # doctest: +SKIP

1087 """

1088

1089 # Defining an ExcelWriter implementation (see abstract methods for more...)

1090

1091 # - Mandatory

1092 # - ``write_cells(self, cells, sheet_name=None, startrow=0, startcol=0)``

1093 # --> called to write additional DataFrames to disk

1094 # - ``_supported_extensions`` (tuple of supported extensions), used to

1095 # check that engine supports the given extension.

1096 # - ``_engine`` - string that gives the engine name. Necessary to

1097 # instantiate class directly and bypass ``ExcelWriterMeta`` engine

1098 # lookup.

1099 # - ``save(self)`` --> called to save file to disk

1100 # - Mostly mandatory (i.e. should at least exist)

1101 # - book, cur_sheet, path

1102

1103 # - Optional:

1104 # - ``__init__(self, path, engine=None, **kwargs)`` --> always called

1105 # with path as first argument.

1106

1107 # You also need to register the class with ``register_writer()``.

1108 # Technically, ExcelWriter implementations don't need to subclass

1109 # ExcelWriter.

1110

1111 _engine: str

1112 _supported_extensions: tuple[str, ...]

1113

1114 def __new__(

1115 cls: type[ExcelWriter],

1116 path: FilePath | WriteExcelBuffer | ExcelWriter,

1117 engine: str | None = None,

1118 date_format: str | None = None,

1119 datetime_format: str | None = None,

1120 mode: str = "w",

1121 storage_options: StorageOptions = None,

1122 if_sheet_exists: Literal["error", "new", "replace", "overlay"] | None = None,

1123 engine_kwargs: dict | None = None,

1124 **kwargs,

1125 ) -> ExcelWriter:

1126 if kwargs:

1127 if engine_kwargs is not None:

1128 raise ValueError("Cannot use both engine_kwargs and **kwargs")

1129 warnings.warn(

1130 "Use of **kwargs is deprecated, use engine_kwargs instead.",

1131 FutureWarning,

1132 stacklevel=find_stack_level(),

1133 )

1134

1135 # only switch class if generic(ExcelWriter)

1136 if cls is ExcelWriter:

1137 if engine is None or (isinstance(engine, str) and engine == "auto"):

1138 if isinstance(path, str):

1139 ext = os.path.splitext(path)[-1][1:]

1140 else:

1141 ext = "xlsx"

1142

1143 try:

1144 engine = config.get_option(f"io.excel.{ext}.writer", silent=True)

1145 if engine == "auto":

1146 engine = get_default_engine(ext, mode="writer")

1147 except KeyError as err:

1148 raise ValueError(f"No engine for filetype: '{ext}'") from err

1149

1150 if engine == "xlwt":

1151 xls_config_engine = config.get_option(

1152 "io.excel.xls.writer", silent=True

1153 )

1154 # Don't warn a 2nd time if user has changed the default engine for xls

1155 if xls_config_engine != "xlwt":

1156 warnings.warn(

1157 "As the xlwt package is no longer maintained, the xlwt "

1158 "engine will be removed in a future version of pandas. "

1159 "This is the only engine in pandas that supports writing "

1160 "in the xls format. Install openpyxl and write to an xlsx "

1161 "file instead. You can set the option io.excel.xls.writer "

1162 "to 'xlwt' to silence this warning. While this option is "

1163 "deprecated and will also raise a warning, it can "

1164 "be globally set and the warning suppressed.",

1165 FutureWarning,

1166 stacklevel=find_stack_level(),

1167 )

1168

1169 # for mypy

1170 assert engine is not None

1171 cls = get_writer(engine)

1172

1173 return object.__new__(cls)

1174

1175 # declare external properties you can count on

1176 _path = None

1177

1178 @property

1179 def supported_extensions(self) -> tuple[str, ...]:

1180 """Extensions that writer engine supports."""

1181 return self._supported_extensions

1182

1183 @property

1184 def engine(self) -> str:

1185 """Name of engine."""

1186 return self._engine

1187

1188 @property

1189 @abc.abstractmethod

1190 def sheets(self) -> dict[str, Any]:

1191 """Mapping of sheet names to sheet objects."""

1192 pass

1193

1194 # mypy doesn't handle abstract setters prior to 0.981

1195 # https://github.com/python/mypy/issues/4165

1196 @property # type: ignore[misc]

1197 @abc.abstractmethod

1198 def book(self):

1199 """

1200 Book instance. Class type will depend on the engine used.

1201

1202 This attribute can be used to access engine-specific features.

1203 """

1204 pass

1205

1206 # mypy doesn't handle abstract setters prior to 0.981

1207 # https://github.com/python/mypy/issues/4165

1208 @book.setter # type: ignore[misc]

1209 @abc.abstractmethod

1210 def book(self, other) -> None:

1211 """

1212 Set book instance. Class type will depend on the engine used.

1213 """

1214 pass

1215

1216 def write_cells(

1217 self,

1218 cells,

1219 sheet_name: str | None = None,

1220 startrow: int = 0,

1221 startcol: int = 0,

1222 freeze_panes: tuple[int, int] | None = None,

1223 ) -> None:

1224 """

1225 Write given formatted cells into Excel an excel sheet

1226

1227 .. deprecated:: 1.5.0

1228

1229 Parameters

1230 ----------

1231 cells : generator

1232 cell of formatted data to save to Excel sheet

1233 sheet_name : str, default None

1234 Name of Excel sheet, if None, then use self.cur_sheet

1235 startrow : upper left cell row to dump data frame

1236 startcol : upper left cell column to dump data frame

1237 freeze_panes: int tuple of length 2

1238 contains the bottom-most row and right-most column to freeze

1239 """

1240 self._deprecate("write_cells")

1241 return self._write_cells(cells, sheet_name, startrow, startcol, freeze_panes)

1242

1243 @abc.abstractmethod

1244 def _write_cells(

1245 self,

1246 cells,

1247 sheet_name: str | None = None,

1248 startrow: int = 0,

1249 startcol: int = 0,

1250 freeze_panes: tuple[int, int] | None = None,

1251 ) -> None:

1252 """

1253 Write given formatted cells into Excel an excel sheet

1254

1255 Parameters

1256 ----------

1257 cells : generator

1258 cell of formatted data to save to Excel sheet

1259 sheet_name : str, default None

1260 Name of Excel sheet, if None, then use self.cur_sheet

1261 startrow : upper left cell row to dump data frame

1262 startcol : upper left cell column to dump data frame

1263 freeze_panes: int tuple of length 2

1264 contains the bottom-most row and right-most column to freeze

1265 """

1266 pass

1267

1268 def save(self) -> None:

1269 """

1270 Save workbook to disk.

1271

1272 .. deprecated:: 1.5.0

1273 """

1274 self._deprecate("save")

1275 return self._save()

1276

1277 @abc.abstractmethod

1278 def _save(self) -> None:

1279 """

1280 Save workbook to disk.

1281 """

1282 pass

1283

1284 def __init__(

1285 self,

1286 path: FilePath | WriteExcelBuffer | ExcelWriter,

1287 engine: str | None = None,

1288 date_format: str | None = None,

1289 datetime_format: str | None = None,

1290 mode: str = "w",

1291 storage_options: StorageOptions = None,

1292 if_sheet_exists: str | None = None,

1293 engine_kwargs: dict[str, Any] | None = None,

1294 **kwargs,

1295 ) -> None:

1296 # validate that this engine can handle the extension

1297 if isinstance(path, str):

1298 ext = os.path.splitext(path)[-1]

1299 self.check_extension(ext)

1300

1301 # use mode to open the file

1302 if "b" not in mode:

1303 mode += "b"

1304 # use "a" for the user to append data to excel but internally use "r+" to let

1305 # the excel backend first read the existing file and then write any data to it

1306 mode = mode.replace("a", "r+")

1307

1308 # cast ExcelWriter to avoid adding 'if self.handles is not None'

1309 self._handles = IOHandles(

1310 cast(IO[bytes], path), compression={"compression": None}

1311 )

1312 if not isinstance(path, ExcelWriter):

1313 self._handles = get_handle(

1314 path, mode, storage_options=storage_options, is_text=False

1315 )

1316 self._cur_sheet = None

1317

1318 if date_format is None:

1319 self._date_format = "YYYY-MM-DD"

1320 else:

1321 self._date_format = date_format

1322 if datetime_format is None:

1323 self._datetime_format = "YYYY-MM-DD HH:MM:SS"

1324 else:

1325 self._datetime_format = datetime_format

1326

1327 self._mode = mode

1328

1329 if if_sheet_exists not in (None, "error", "new", "replace", "overlay"):

1330 raise ValueError(

1331 f"'{if_sheet_exists}' is not valid for if_sheet_exists. "

1332 "Valid options are 'error', 'new', 'replace' and 'overlay'."

1333 )

1334 if if_sheet_exists and "r+" not in mode:

1335 raise ValueError("if_sheet_exists is only valid in append mode (mode='a')")

1336 if if_sheet_exists is None:

1337 if_sheet_exists = "error"

1338 self._if_sheet_exists = if_sheet_exists

1339

1340 def _deprecate(self, attr: str):

1341 """

1342 Deprecate attribute or method for ExcelWriter.

1343 """

1344 warnings.warn(

1345 f"{attr} is not part of the public API, usage can give unexpected "

1346 "results and will be removed in a future version",

1347 FutureWarning,

1348 stacklevel=find_stack_level(),

1349 )

1350

1351 def _deprecate_set_book(self) -> None:

1352 """

1353 Deprecate setting the book attribute - GH#48780.

1354 """

1355 warnings.warn(

1356 "Setting the `book` attribute is not part of the public API, "

1357 "usage can give unexpected or corrupted results and will be "

1358 "removed in a future version",

1359 FutureWarning,

1360 stacklevel=find_stack_level(),

1361 )

1362

1363 @property

1364 def date_format(self) -> str:

1365 """

1366 Format string for dates written into Excel files (e.g. ‘YYYY-MM-DD’).

1367 """

1368 return self._date_format

1369

1370 @property

1371 def datetime_format(self) -> str:

1372 """

1373 Format string for dates written into Excel files (e.g. ‘YYYY-MM-DD’).

1374 """

1375 return self._datetime_format

1376

1377 @property

1378 def if_sheet_exists(self) -> str:

1379 """

1380 How to behave when writing to a sheet that already exists in append mode.

1381 """

1382 return self._if_sheet_exists

1383

1384 @property

1385 def cur_sheet(self):

1386 """

1387 Current sheet for writing.

1388

1389 .. deprecated:: 1.5.0

1390 """

1391 self._deprecate("cur_sheet")

1392 return self._cur_sheet

1393

1394 @property

1395 def handles(self) -> IOHandles[bytes]:

1396 """

1397 Handles to Excel sheets.

1398

1399 .. deprecated:: 1.5.0

1400 """

1401 self._deprecate("handles")

1402 return self._handles

1403

1404 @property

1405 def path(self):

1406 """

1407 Path to Excel file.

1408

1409 .. deprecated:: 1.5.0

1410 """

1411 self._deprecate("path")

1412 return self._path

1413

1414 def __fspath__(self) -> str:

1415 return getattr(self._handles.handle, "name", "")

1416

1417 def _get_sheet_name(self, sheet_name: str | None) -> str:

1418 if sheet_name is None:

1419 sheet_name = self._cur_sheet

1420 if sheet_name is None: # pragma: no cover

1421 raise ValueError("Must pass explicit sheet_name or set _cur_sheet property")

1422 return sheet_name

1423

1424 def _value_with_fmt(self, val) -> tuple[object, str | None]:

1425 """

1426 Convert numpy types to Python types for the Excel writers.

1427

1428 Parameters

1429 ----------

1430 val : object

1431 Value to be written into cells

1432

1433 Returns

1434 -------

1435 Tuple with the first element being the converted value and the second

1436 being an optional format

1437 """

1438 fmt = None

1439

1440 if is_integer(val):

1441 val = int(val)

1442 elif is_float(val):

1443 val = float(val)

1444 elif is_bool(val):

1445 val = bool(val)

1446 elif isinstance(val, datetime.datetime):

1447 fmt = self._datetime_format

1448 elif isinstance(val, datetime.date):

1449 fmt = self._date_format

1450 elif isinstance(val, datetime.timedelta):

1451 val = val.total_seconds() / 86400

1452 fmt = "0"

1453 else:

1454 val = str(val)

1455

1456 return val, fmt

1457

1458 @classmethod

1459 def check_extension(cls, ext: str) -> Literal[True]:

1460 """

1461 checks that path's extension against the Writer's supported

1462 extensions. If it isn't supported, raises UnsupportedFiletypeError.

1463 """

1464 if ext.startswith("."):

1465 ext = ext[1:]

1466 if not any(ext in extension for extension in cls._supported_extensions):

1467 raise ValueError(f"Invalid extension for engine '{cls.engine}': '{ext}'")

1468 else:

1469 return True

1470

1471 # Allow use as a contextmanager

1472 def __enter__(self) -> ExcelWriter:

1473 return self

1474

1475 def __exit__(self, exc_type, exc_value, traceback) -> None:

1476 self.close()

1477

1478 def close(self) -> None:

1479 """synonym for save, to make it more file-like"""

1480 self._save()

1481 self._handles.close()

1482

1483

1484XLS_SIGNATURES = (

1485 b"\x09\x00\x04\x00\x07\x00\x10\x00", # BIFF2

1486 b"\x09\x02\x06\x00\x00\x00\x10\x00", # BIFF3

1487 b"\x09\x04\x06\x00\x00\x00\x10\x00", # BIFF4

1488 b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", # Compound File Binary

1489)

1490ZIP_SIGNATURE = b"PK\x03\x04"

1491PEEK_SIZE = max(map(len, XLS_SIGNATURES + (ZIP_SIGNATURE,)))

1492

1493

1494@doc(storage_options=_shared_docs["storage_options"])

1495def inspect_excel_format(

1496 content_or_path: FilePath | ReadBuffer[bytes],

1497 storage_options: StorageOptions = None,

1498) -> str | None:

1499 """

1500 Inspect the path or content of an excel file and get its format.

1501

1502 Adopted from xlrd: https://github.com/python-excel/xlrd.

1503

1504 Parameters

1505 ----------

1506 content_or_path : str or file-like object

1507 Path to file or content of file to inspect. May be a URL.

1508 {storage_options}

1509

1510 Returns

1511 -------

1512 str or None

1513 Format of file if it can be determined.

1514

1515 Raises

1516 ------

1517 ValueError

1518 If resulting stream is empty.

1519 BadZipFile

1520 If resulting stream does not have an XLS signature and is not a valid zipfile.

1521 """

1522 if isinstance(content_or_path, bytes):

1523 content_or_path = BytesIO(content_or_path)

1524

1525 with get_handle(

1526 content_or_path, "rb", storage_options=storage_options, is_text=False

1527 ) as handle:

1528 stream = handle.handle

1529 stream.seek(0)

1530 buf = stream.read(PEEK_SIZE)

1531 if buf is None:

1532 raise ValueError("stream is empty")

1533 else:

1534 assert isinstance(buf, bytes)

1535 peek = buf

1536 stream.seek(0)

1537

1538 if any(peek.startswith(sig) for sig in XLS_SIGNATURES):

1539 return "xls"

1540 elif not peek.startswith(ZIP_SIGNATURE):

1541 return None

1542

1543 with zipfile.ZipFile(stream) as zf:

1544 # Workaround for some third party files that use forward slashes and

1545 # lower case names.

1546 component_names = [

1547 name.replace("\\", "/").lower() for name in zf.namelist()

1548 ]

1549

1550 if "xl/workbook.xml" in component_names:

1551 return "xlsx"

1552 if "xl/workbook.bin" in component_names:

1553 return "xlsb"

1554 if "content.xml" in component_names:

1555 return "ods"

1556 return "zip"

1557

1558

1559class ExcelFile:

1560 """

1561 Class for parsing tabular excel sheets into DataFrame objects.

1562

1563 See read_excel for more documentation.

1564

1565 Parameters

1566 ----------

1567 path_or_buffer : str, bytes, path object (pathlib.Path or py._path.local.LocalPath),

1568 a file-like object, xlrd workbook or openpyxl workbook.

1569 If a string or path object, expected to be a path to a

1570 .xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file.

1571 engine : str, default None

1572 If io is not a buffer or path, this must be set to identify io.

1573 Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``

1574 Engine compatibility :

1575

1576 - ``xlrd`` supports old-style Excel files (.xls).

1577 - ``openpyxl`` supports newer Excel file formats.

1578 - ``odf`` supports OpenDocument file formats (.odf, .ods, .odt).

1579 - ``pyxlsb`` supports Binary Excel files.

1580

1581 .. versionchanged:: 1.2.0

1582

1583 The engine `xlrd <https://xlrd.readthedocs.io/en/latest/>`_

1584 now only supports old-style ``.xls`` files.

1585 When ``engine=None``, the following logic will be

1586 used to determine the engine:

1587

1588 - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt),

1589 then `odf <https://pypi.org/project/odfpy/>`_ will be used.

1590 - Otherwise if ``path_or_buffer`` is an xls format,

1591 ``xlrd`` will be used.

1592 - Otherwise if ``path_or_buffer`` is in xlsb format,

1593 `pyxlsb <https://pypi.org/project/pyxlsb/>`_ will be used.

1594

1595 .. versionadded:: 1.3.0

1596 - Otherwise if `openpyxl <https://pypi.org/project/openpyxl/>`_ is installed,

1597 then ``openpyxl`` will be used.

1598 - Otherwise if ``xlrd >= 2.0`` is installed, a ``ValueError`` will be raised.

1599 - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised.

1600 This case will raise a ``ValueError`` in a future version of pandas.

1601

1602 .. warning::

1603

1604 Please do not report issues when using ``xlrd`` to read ``.xlsx`` files.

1605 This is not supported, switch to using ``openpyxl`` instead.

1606 """

1607

1608 from pandas.io.excel._odfreader import ODFReader

1609 from pandas.io.excel._openpyxl import OpenpyxlReader

1610 from pandas.io.excel._pyxlsb import PyxlsbReader

1611 from pandas.io.excel._xlrd import XlrdReader

1612

1613 _engines: Mapping[str, Any] = {

1614 "xlrd": XlrdReader,

1615 "openpyxl": OpenpyxlReader,

1616 "odf": ODFReader,

1617 "pyxlsb": PyxlsbReader,

1618 }

1619

1620 def __init__(

1621 self,

1622 path_or_buffer,

1623 engine: str | None = None,

1624 storage_options: StorageOptions = None,

1625 ) -> None:

1626 if engine is not None and engine not in self._engines:

1627 raise ValueError(f"Unknown engine: {engine}")

1628

1629 # First argument can also be bytes, so create a buffer

1630 if isinstance(path_or_buffer, bytes):

1631 path_or_buffer = BytesIO(path_or_buffer)

1632

1633 # Could be a str, ExcelFile, Book, etc.

1634 self.io = path_or_buffer

1635 # Always a string

1636 self._io = stringify_path(path_or_buffer)

1637

1638 # Determine xlrd version if installed

1639 if import_optional_dependency("xlrd", errors="ignore") is None:

1640 xlrd_version = None

1641 else:

1642 import xlrd

1643

1644 xlrd_version = Version(get_version(xlrd))

1645

1646 ext = None

1647 if engine is None:

1648 # Only determine ext if it is needed

1649 if xlrd_version is not None and isinstance(path_or_buffer, xlrd.Book):

1650 ext = "xls"

1651 else:

1652 ext = inspect_excel_format(

1653 content_or_path=path_or_buffer, storage_options=storage_options

1654 )

1655 if ext is None:

1656 raise ValueError(

1657 "Excel file format cannot be determined, you must specify "

1658 "an engine manually."

1659 )

1660

1661 engine = config.get_option(f"io.excel.{ext}.reader", silent=True)

1662 if engine == "auto":

1663 engine = get_default_engine(ext, mode="reader")

1664

1665 if engine == "xlrd" and xlrd_version is not None:

1666 if ext is None:

1667 # Need ext to determine ext in order to raise/warn

1668 if isinstance(path_or_buffer, xlrd.Book):

1669 ext = "xls"

1670 else:

1671 ext = inspect_excel_format(

1672 path_or_buffer, storage_options=storage_options

1673 )

1674

1675 # Pass through if ext is None, otherwise check if ext valid for xlrd

1676 if ext and ext != "xls" and xlrd_version >= Version("2"):

1677 raise ValueError(

1678 f"Your version of xlrd is {xlrd_version}. In xlrd >= 2.0, "

1679 f"only the xls format is supported. Install openpyxl instead."

1680 )

1681 elif ext and ext != "xls":

1682 stacklevel = find_stack_level()

1683 warnings.warn(

1684 f"Your version of xlrd is {xlrd_version}. In xlrd >= 2.0, "

1685 f"only the xls format is supported. Install "

1686 f"openpyxl instead.",

1687 FutureWarning,

1688 stacklevel=stacklevel,

1689 )

1690

1691 assert engine is not None

1692 self.engine = engine

1693 self.storage_options = storage_options

1694

1695 self._reader = self._engines[engine](self._io, storage_options=storage_options)

1696

1697 def __fspath__(self):

1698 return self._io

1699

1700 def parse(

1701 self,

1702 sheet_name: str | int | list[int] | list[str] | None = 0,

1703 header: int | Sequence[int] | None = 0,

1704 names=None,

1705 index_col: int | Sequence[int] | None = None,

1706 usecols=None,

1707 squeeze: bool | None = None,

1708 converters=None,

1709 true_values: Iterable[Hashable] | None = None,

1710 false_values: Iterable[Hashable] | None = None,

1711 skiprows: Sequence[int] | int | Callable[[int], object] | None = None,

1712 nrows: int | None = None,

1713 na_values=None,

1714 parse_dates: list | dict | bool = False,

1715 date_parser: Callable | None = None,

1716 thousands: str | None = None,

1717 comment: str | None = None,

1718 skipfooter: int = 0,

1719 convert_float: bool | None = None,

1720 mangle_dupe_cols: bool = True,

1721 **kwds,

1722 ) -> DataFrame | dict[str, DataFrame] | dict[int, DataFrame]:

1723 """

1724 Parse specified sheet(s) into a DataFrame.

1725

1726 Equivalent to read_excel(ExcelFile, ...) See the read_excel

1727 docstring for more info on accepted parameters.

1728

1729 Returns

1730 -------

1731 DataFrame or dict of DataFrames

1732 DataFrame from the passed in Excel file.

1733 """

1734 return self._reader.parse(

1735 sheet_name=sheet_name,

1736 header=header,

1737 names=names,

1738 index_col=index_col,

1739 usecols=usecols,

1740 squeeze=squeeze,

1741 converters=converters,

1742 true_values=true_values,

1743 false_values=false_values,

1744 skiprows=skiprows,

1745 nrows=nrows,

1746 na_values=na_values,

1747 parse_dates=parse_dates,

1748 date_parser=date_parser,

1749 thousands=thousands,

1750 comment=comment,

1751 skipfooter=skipfooter,

1752 convert_float=convert_float,

1753 mangle_dupe_cols=mangle_dupe_cols,

1754 **kwds,

1755 )

1756

1757 @property

1758 def book(self):

1759 return self._reader.book

1760

1761 @property

1762 def sheet_names(self):

1763 return self._reader.sheet_names

1764

1765 def close(self) -> None:

1766 """close io if necessary"""

1767 self._reader.close()

1768

1769 def __enter__(self) -> ExcelFile:

1770 return self

1771

1772 def __exit__(self, exc_type, exc_value, traceback) -> None:

1773 self.close()

1774

1775 def __del__(self) -> None:

1776 # Ensure we don't leak file descriptors, but put in try/except in case

1777 # attributes are already deleted

1778 try:

1779 self.close()

1780 except AttributeError:

1781 pass

Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/io/excel/_base.py: 20%

466 statements