Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/io/formats/format.py: 14%

1"""

2Internal module for formatting output data in csv, html, xml,

3and latex files. This module also applies to display formatting.

4"""

5from __future__ import annotations

7from contextlib import contextmanager

8from csv import (

9 QUOTE_NONE,

10 QUOTE_NONNUMERIC,

11)

12import decimal

13from functools import partial

14from io import StringIO

15import math

16import re

17from shutil import get_terminal_size

18from typing import (

19 IO,

20 TYPE_CHECKING,

21 Any,

22 Callable,

23 Final,

24 Hashable,

25 Iterable,

26 Iterator,

27 List,

28 Mapping,

29 Sequence,

30 cast,

31)

32from unicodedata import east_asian_width

34import numpy as np

36from pandas._config.config import (

37 get_option,

38 set_option,

39)

41from pandas._libs import lib

42from pandas._libs.missing import NA

43from pandas._libs.tslibs import (

44 NaT,

45 Timedelta,

46 Timestamp,

47 get_unit_from_dtype,

48 iNaT,

49 periods_per_day,

50)

51from pandas._libs.tslibs.nattype import NaTType

52from pandas._typing import (

53 ArrayLike,

54 Axes,

55 ColspaceArgType,

56 ColspaceType,

57 CompressionOptions,

58 FilePath,

59 FloatFormatType,

60 FormattersType,

61 IndexLabel,

62 StorageOptions,

63 WriteBuffer,

64)

65from pandas.util._decorators import deprecate_kwarg

67from pandas.core.dtypes.common import (

68 is_categorical_dtype,

69 is_complex_dtype,

70 is_datetime64_dtype,

71 is_extension_array_dtype,

72 is_float,

73 is_float_dtype,

74 is_integer,

75 is_integer_dtype,

76 is_list_like,

77 is_numeric_dtype,

78 is_scalar,

79 is_timedelta64_dtype,

80)

81from pandas.core.dtypes.dtypes import DatetimeTZDtype

82from pandas.core.dtypes.missing import (

83 isna,

84 notna,

85)

87from pandas.core.arrays import (

88 Categorical,

89 DatetimeArray,

90 TimedeltaArray,

91)

92from pandas.core.base import PandasObject

93import pandas.core.common as com

94from pandas.core.construction import extract_array

95from pandas.core.indexes.api import (

96 Index,

97 MultiIndex,

98 PeriodIndex,

99 ensure_index,

100)

101from pandas.core.indexes.datetimes import DatetimeIndex

102from pandas.core.indexes.timedeltas import TimedeltaIndex

103from pandas.core.reshape.concat import concat

104

105from pandas.io.common import (

106 check_parent_directory,

107 stringify_path,

108)

109from pandas.io.formats.printing import (

110 adjoin,

111 justify,

112 pprint_thing,

113)

114

115if TYPE_CHECKING: 115 ↛ 116line 115 didn't jump to line 116, because the condition on line 115 was never true

116 from pandas import (

117 DataFrame,

118 Series,

119 )

120

121

122common_docstring: Final = """

123 Parameters

124 ----------

125 buf : str, Path or StringIO-like, optional, default None

126 Buffer to write to. If None, the output is returned as a string.

127 columns : sequence, optional, default None

128 The subset of columns to write. Writes all columns by default.

129 col_space : %(col_space_type)s, optional

130 %(col_space)s.

131 header : %(header_type)s, optional

132 %(header)s.

133 index : bool, optional, default True

134 Whether to print index (row) labels.

135 na_rep : str, optional, default 'NaN'

136 String representation of ``NaN`` to use.

137 formatters : list, tuple or dict of one-param. functions, optional

138 Formatter functions to apply to columns' elements by position or

139 name.

140 The result of each function must be a unicode string.

141 List/tuple must be of length equal to the number of columns.

142 float_format : one-parameter function, optional, default None

143 Formatter function to apply to columns' elements if they are

144 floats. This function must return a unicode string and will be

145 applied only to the non-``NaN`` elements, with ``NaN`` being

146 handled by ``na_rep``.

147

148 .. versionchanged:: 1.2.0

149

150 sparsify : bool, optional, default True

151 Set to False for a DataFrame with a hierarchical index to print

152 every multiindex key at each row.

153 index_names : bool, optional, default True

154 Prints the names of the indexes.

155 justify : str, default None

156 How to justify the column labels. If None uses the option from

157 the print configuration (controlled by set_option), 'right' out

158 of the box. Valid values are

159

160 * left

161 * right

162 * center

163 * justify

164 * justify-all

165 * start

166 * end

167 * inherit

168 * match-parent

169 * initial

170 * unset.

171 max_rows : int, optional

172 Maximum number of rows to display in the console.

173 max_cols : int, optional

174 Maximum number of columns to display in the console.

175 show_dimensions : bool, default False

176 Display DataFrame dimensions (number of rows by number of columns).

177 decimal : str, default '.'

178 Character recognized as decimal separator, e.g. ',' in Europe.

179 """

180

181_VALID_JUSTIFY_PARAMETERS = (

182 "left",

183 "right",

184 "center",

185 "justify",

186 "justify-all",

187 "start",

188 "end",

189 "inherit",

190 "match-parent",

191 "initial",

192 "unset",

193)

194

195return_docstring: Final = """

196 Returns

197 -------

198 str or None

199 If buf is None, returns the result as a string. Otherwise returns

200 None.

201 """

202

203

204class CategoricalFormatter:

205 def __init__(

206 self,

207 categorical: Categorical,

208 buf: IO[str] | None = None,

209 length: bool = True,

210 na_rep: str = "NaN",

211 footer: bool = True,

212 ) -> None:

213 self.categorical = categorical

214 self.buf = buf if buf is not None else StringIO("")

215 self.na_rep = na_rep

216 self.length = length

217 self.footer = footer

218 self.quoting = QUOTE_NONNUMERIC

219

220 def _get_footer(self) -> str:

221 footer = ""

222

223 if self.length:

224 if footer:

225 footer += ", "

226 footer += f"Length: {len(self.categorical)}"

227

228 level_info = self.categorical._repr_categories_info()

229

230 # Levels are added in a newline

231 if footer:

232 footer += "\n"

233 footer += level_info

234

235 return str(footer)

236

237 def _get_formatted_values(self) -> list[str]:

238 return format_array(

239 self.categorical._internal_get_values(),

240 None,

241 float_format=None,

242 na_rep=self.na_rep,

243 quoting=self.quoting,

244 )

245

246 def to_string(self) -> str:

247 categorical = self.categorical

248

249 if len(categorical) == 0:

250 if self.footer:

251 return self._get_footer()

252 else:

253 return ""

254

255 fmt_values = self._get_formatted_values()

256

257 fmt_values = [i.strip() for i in fmt_values]

258 values = ", ".join(fmt_values)

259 result = ["[" + values + "]"]

260 if self.footer:

261 footer = self._get_footer()

262 if footer:

263 result.append(footer)

264

265 return str("\n".join(result))

266

267

268class SeriesFormatter:

269 def __init__(

270 self,

271 series: Series,

272 buf: IO[str] | None = None,

273 length: bool | str = True,

274 header: bool = True,

275 index: bool = True,

276 na_rep: str = "NaN",

277 name: bool = False,

278 float_format: str | None = None,

279 dtype: bool = True,

280 max_rows: int | None = None,

281 min_rows: int | None = None,

282 ) -> None:

283 self.series = series

284 self.buf = buf if buf is not None else StringIO()

285 self.name = name

286 self.na_rep = na_rep

287 self.header = header

288 self.length = length

289 self.index = index

290 self.max_rows = max_rows

291 self.min_rows = min_rows

292

293 if float_format is None:

294 float_format = get_option("display.float_format")

295 self.float_format = float_format

296 self.dtype = dtype

297 self.adj = get_adjustment()

298

299 self._chk_truncate()

300

301 def _chk_truncate(self) -> None:

302 self.tr_row_num: int | None

303

304 min_rows = self.min_rows

305 max_rows = self.max_rows

306 # truncation determined by max_rows, actual truncated number of rows

307 # used below by min_rows

308 is_truncated_vertically = max_rows and (len(self.series) > max_rows)

309 series = self.series

310 if is_truncated_vertically:

311 max_rows = cast(int, max_rows)

312 if min_rows:

313 # if min_rows is set (not None or 0), set max_rows to minimum

314 # of both

315 max_rows = min(min_rows, max_rows)

316 if max_rows == 1:

317 row_num = max_rows

318 series = series.iloc[:max_rows]

319 else:

320 row_num = max_rows // 2

321 series = concat((series.iloc[:row_num], series.iloc[-row_num:]))

322 self.tr_row_num = row_num

323 else:

324 self.tr_row_num = None

325 self.tr_series = series

326 self.is_truncated_vertically = is_truncated_vertically

327

328 def _get_footer(self) -> str:

329 name = self.series.name

330 footer = ""

331

332 if getattr(self.series.index, "freq", None) is not None:

333 assert isinstance(

334 self.series.index, (DatetimeIndex, PeriodIndex, TimedeltaIndex)

335 )

336 footer += f"Freq: {self.series.index.freqstr}"

337

338 if self.name is not False and name is not None:

339 if footer:

340 footer += ", "

341

342 series_name = pprint_thing(name, escape_chars=("\t", "\r", "\n"))

343 footer += f"Name: {series_name}"

344

345 if self.length is True or (

346 self.length == "truncate" and self.is_truncated_vertically

347 ):

348 if footer:

349 footer += ", "

350 footer += f"Length: {len(self.series)}"

351

352 if self.dtype is not False and self.dtype is not None:

353 dtype_name = getattr(self.tr_series.dtype, "name", None)

354 if dtype_name:

355 if footer:

356 footer += ", "

357 footer += f"dtype: {pprint_thing(dtype_name)}"

358

359 # level infos are added to the end and in a new line, like it is done

360 # for Categoricals

361 if is_categorical_dtype(self.tr_series.dtype):

362 level_info = self.tr_series._values._repr_categories_info()

363 if footer:

364 footer += "\n"

365 footer += level_info

366

367 return str(footer)

368

369 def _get_formatted_index(self) -> tuple[list[str], bool]:

370 index = self.tr_series.index

371

372 if isinstance(index, MultiIndex):

373 have_header = any(name for name in index.names)

374 fmt_index = index.format(names=True)

375 else:

376 have_header = index.name is not None

377 fmt_index = index.format(name=True)

378 return fmt_index, have_header

379

380 def _get_formatted_values(self) -> list[str]:

381 return format_array(

382 self.tr_series._values,

383 None,

384 float_format=self.float_format,

385 na_rep=self.na_rep,

386 leading_space=self.index,

387 )

388

389 def to_string(self) -> str:

390 series = self.tr_series

391 footer = self._get_footer()

392

393 if len(series) == 0:

394 return f"{type(self.series).__name__}([], {footer})"

395

396 fmt_index, have_header = self._get_formatted_index()

397 fmt_values = self._get_formatted_values()

398

399 if self.is_truncated_vertically:

400 n_header_rows = 0

401 row_num = self.tr_row_num

402 row_num = cast(int, row_num)

403 width = self.adj.len(fmt_values[row_num - 1])

404 if width > 3:

405 dot_str = "..."

406 else:

407 dot_str = ".."

408 # Series uses mode=center because it has single value columns

409 # DataFrame uses mode=left

410 dot_str = self.adj.justify([dot_str], width, mode="center")[0]

411 fmt_values.insert(row_num + n_header_rows, dot_str)

412 fmt_index.insert(row_num + 1, "")

413

414 if self.index:

415 result = self.adj.adjoin(3, *[fmt_index[1:], fmt_values])

416 else:

417 result = self.adj.adjoin(3, fmt_values)

418

419 if self.header and have_header:

420 result = fmt_index[0] + "\n" + result

421

422 if footer:

423 result += "\n" + footer

424

425 return str("".join(result))

426

427

428class TextAdjustment:

429 def __init__(self) -> None:

430 self.encoding = get_option("display.encoding")

431

432 def len(self, text: str) -> int:

433 return len(text)

434

435 def justify(self, texts: Any, max_len: int, mode: str = "right") -> list[str]:

436 return justify(texts, max_len, mode=mode)

437

438 def adjoin(self, space: int, *lists, **kwargs) -> str:

439 return adjoin(space, *lists, strlen=self.len, justfunc=self.justify, **kwargs)

440

441

442class EastAsianTextAdjustment(TextAdjustment):

443 def __init__(self) -> None:

444 super().__init__()

445 if get_option("display.unicode.ambiguous_as_wide"):

446 self.ambiguous_width = 2

447 else:

448 self.ambiguous_width = 1

449

450 # Definition of East Asian Width

451 # https://unicode.org/reports/tr11/

452 # Ambiguous width can be changed by option

453 self._EAW_MAP = {"Na": 1, "N": 1, "W": 2, "F": 2, "H": 1}

454

455 def len(self, text: str) -> int:

456 """

457 Calculate display width considering unicode East Asian Width

458 """

459 if not isinstance(text, str):

460 return len(text)

461

462 return sum(

463 self._EAW_MAP.get(east_asian_width(c), self.ambiguous_width) for c in text

464 )

465

466 def justify(

467 self, texts: Iterable[str], max_len: int, mode: str = "right"

468 ) -> list[str]:

469 # re-calculate padding space per str considering East Asian Width

470 def _get_pad(t):

471 return max_len - self.len(t) + len(t)

472

473 if mode == "left":

474 return [x.ljust(_get_pad(x)) for x in texts]

475 elif mode == "center":

476 return [x.center(_get_pad(x)) for x in texts]

477 else:

478 return [x.rjust(_get_pad(x)) for x in texts]

479

480

481def get_adjustment() -> TextAdjustment:

482 use_east_asian_width = get_option("display.unicode.east_asian_width")

483 if use_east_asian_width:

484 return EastAsianTextAdjustment()

485 else:

486 return TextAdjustment()

487

488

489def get_dataframe_repr_params() -> dict[str, Any]:

490 """Get the parameters used to repr(dataFrame) calls using DataFrame.to_string.

491

492 Supplying these parameters to DataFrame.to_string is equivalent to calling

493 ``repr(DataFrame)``. This is useful if you want to adjust the repr output.

494

495 .. versionadded:: 1.4.0

496

497 Example

498 -------

499 >>> import pandas as pd

500 >>>

501 >>> df = pd.DataFrame([[1, 2], [3, 4]])

502 >>> repr_params = pd.io.formats.format.get_dataframe_repr_params()

503 >>> repr(df) == df.to_string(**repr_params)

504 True

505 """

506 from pandas.io.formats import console

507

508 if get_option("display.expand_frame_repr"):

509 line_width, _ = console.get_console_size()

510 else:

511 line_width = None

512 return {

513 "max_rows": get_option("display.max_rows"),

514 "min_rows": get_option("display.min_rows"),

515 "max_cols": get_option("display.max_columns"),

516 "max_colwidth": get_option("display.max_colwidth"),

517 "show_dimensions": get_option("display.show_dimensions"),

518 "line_width": line_width,

519 }

520

521

522def get_series_repr_params() -> dict[str, Any]:

523 """Get the parameters used to repr(Series) calls using Series.to_string.

524

525 Supplying these parameters to Series.to_string is equivalent to calling

526 ``repr(series)``. This is useful if you want to adjust the series repr output.

527

528 .. versionadded:: 1.4.0

529

530 Example

531 -------

532 >>> import pandas as pd

533 >>>

534 >>> ser = pd.Series([1, 2, 3, 4])

535 >>> repr_params = pd.io.formats.format.get_series_repr_params()

536 >>> repr(ser) == ser.to_string(**repr_params)

537 True

538 """

539 width, height = get_terminal_size()

540 max_rows = (

541 height

542 if get_option("display.max_rows") == 0

543 else get_option("display.max_rows")

544 )

545 min_rows = (

546 height

547 if get_option("display.max_rows") == 0

548 else get_option("display.min_rows")

549 )

550

551 return {

552 "name": True,

553 "dtype": True,

554 "min_rows": min_rows,

555 "max_rows": max_rows,

556 "length": get_option("display.show_dimensions"),

557 }

558

559

560class DataFrameFormatter:

561 """Class for processing dataframe formatting options and data."""

562

563 __doc__ = __doc__ if __doc__ else ""

564 __doc__ += common_docstring + return_docstring

565

566 def __init__(

567 self,

568 frame: DataFrame,

569 columns: Sequence[Hashable] | None = None,

570 col_space: ColspaceArgType | None = None,

571 header: bool | Sequence[str] = True,

572 index: bool = True,

573 na_rep: str = "NaN",

574 formatters: FormattersType | None = None,

575 justify: str | None = None,

576 float_format: FloatFormatType | None = None,

577 sparsify: bool | None = None,

578 index_names: bool = True,

579 max_rows: int | None = None,

580 min_rows: int | None = None,

581 max_cols: int | None = None,

582 show_dimensions: bool | str = False,

583 decimal: str = ".",

584 bold_rows: bool = False,

585 escape: bool = True,

586 ) -> None:

587 self.frame = frame

588 self.columns = self._initialize_columns(columns)

589 self.col_space = self._initialize_colspace(col_space)

590 self.header = header

591 self.index = index

592 self.na_rep = na_rep

593 self.formatters = self._initialize_formatters(formatters)

594 self.justify = self._initialize_justify(justify)

595 self.float_format = float_format

596 self.sparsify = self._initialize_sparsify(sparsify)

597 self.show_index_names = index_names

598 self.decimal = decimal

599 self.bold_rows = bold_rows

600 self.escape = escape

601 self.max_rows = max_rows

602 self.min_rows = min_rows

603 self.max_cols = max_cols

604 self.show_dimensions = show_dimensions

605

606 self.max_cols_fitted = self._calc_max_cols_fitted()

607 self.max_rows_fitted = self._calc_max_rows_fitted()

608

609 self.tr_frame = self.frame

610 self.truncate()

611 self.adj = get_adjustment()

612

613 def get_strcols(self) -> list[list[str]]:

614 """

615 Render a DataFrame to a list of columns (as lists of strings).

616 """

617 strcols = self._get_strcols_without_index()

618

619 if self.index:

620 str_index = self._get_formatted_index(self.tr_frame)

621 strcols.insert(0, str_index)

622

623 return strcols

624

625 @property

626 def should_show_dimensions(self) -> bool:

627 return self.show_dimensions is True or (

628 self.show_dimensions == "truncate" and self.is_truncated

629 )

630

631 @property

632 def is_truncated(self) -> bool:

633 return bool(self.is_truncated_horizontally or self.is_truncated_vertically)

634

635 @property

636 def is_truncated_horizontally(self) -> bool:

637 return bool(self.max_cols_fitted and (len(self.columns) > self.max_cols_fitted))

638

639 @property

640 def is_truncated_vertically(self) -> bool:

641 return bool(self.max_rows_fitted and (len(self.frame) > self.max_rows_fitted))

642

643 @property

644 def dimensions_info(self) -> str:

645 return f"\n\n[{len(self.frame)} rows x {len(self.frame.columns)} columns]"

646

647 @property

648 def has_index_names(self) -> bool:

649 return _has_names(self.frame.index)

650

651 @property

652 def has_column_names(self) -> bool:

653 return _has_names(self.frame.columns)

654

655 @property

656 def show_row_idx_names(self) -> bool:

657 return all((self.has_index_names, self.index, self.show_index_names))

658

659 @property

660 def show_col_idx_names(self) -> bool:

661 return all((self.has_column_names, self.show_index_names, self.header))

662

663 @property

664 def max_rows_displayed(self) -> int:

665 return min(self.max_rows or len(self.frame), len(self.frame))

666

667 def _initialize_sparsify(self, sparsify: bool | None) -> bool:

668 if sparsify is None:

669 return get_option("display.multi_sparse")

670 return sparsify

671

672 def _initialize_formatters(

673 self, formatters: FormattersType | None

674 ) -> FormattersType:

675 if formatters is None:

676 return {}

677 elif len(self.frame.columns) == len(formatters) or isinstance(formatters, dict):

678 return formatters

679 else:

680 raise ValueError(

681 f"Formatters length({len(formatters)}) should match "

682 f"DataFrame number of columns({len(self.frame.columns)})"

683 )

684

685 def _initialize_justify(self, justify: str | None) -> str:

686 if justify is None:

687 return get_option("display.colheader_justify")

688 else:

689 return justify

690

691 def _initialize_columns(self, columns: Sequence[Hashable] | None) -> Index:

692 if columns is not None:

693 # GH 47231 - columns doesn't have to be `Sequence[str]`

694 # Will fix in later PR

695 cols = ensure_index(cast(Axes, columns))

696 self.frame = self.frame[cols]

697 return cols

698 else:

699 return self.frame.columns

700

701 def _initialize_colspace(self, col_space: ColspaceArgType | None) -> ColspaceType:

702 result: ColspaceType

703

704 if col_space is None:

705 result = {}

706 elif isinstance(col_space, (int, str)):

707 result = {"": col_space}

708 result.update({column: col_space for column in self.frame.columns})

709 elif isinstance(col_space, Mapping):

710 for column in col_space.keys():

711 if column not in self.frame.columns and column != "":

712 raise ValueError(

713 f"Col_space is defined for an unknown column: {column}"

714 )

715 result = col_space

716 else:

717 if len(self.frame.columns) != len(col_space):

718 raise ValueError(

719 f"Col_space length({len(col_space)}) should match "

720 f"DataFrame number of columns({len(self.frame.columns)})"

721 )

722 result = dict(zip(self.frame.columns, col_space))

723 return result

724

725 def _calc_max_cols_fitted(self) -> int | None:

726 """Number of columns fitting the screen."""

727 if not self._is_in_terminal():

728 return self.max_cols

729

730 width, _ = get_terminal_size()

731 if self._is_screen_narrow(width):

732 return width

733 else:

734 return self.max_cols

735

736 def _calc_max_rows_fitted(self) -> int | None:

737 """Number of rows with data fitting the screen."""

738 max_rows: int | None

739

740 if self._is_in_terminal():

741 _, height = get_terminal_size()

742 if self.max_rows == 0:

743 # rows available to fill with actual data

744 return height - self._get_number_of_auxillary_rows()

745

746 if self._is_screen_short(height):

747 max_rows = height

748 else:

749 max_rows = self.max_rows

750 else:

751 max_rows = self.max_rows

752

753 return self._adjust_max_rows(max_rows)

754

755 def _adjust_max_rows(self, max_rows: int | None) -> int | None:

756 """Adjust max_rows using display logic.

757

758 See description here:

759 https://pandas.pydata.org/docs/dev/user_guide/options.html#frequently-used-options

760

761 GH #37359

762 """

763 if max_rows:

764 if (len(self.frame) > max_rows) and self.min_rows:

765 # if truncated, set max_rows showed to min_rows

766 max_rows = min(self.min_rows, max_rows)

767 return max_rows

768

769 def _is_in_terminal(self) -> bool:

770 """Check if the output is to be shown in terminal."""

771 return bool(self.max_cols == 0 or self.max_rows == 0)

772

773 def _is_screen_narrow(self, max_width) -> bool:

774 return bool(self.max_cols == 0 and len(self.frame.columns) > max_width)

775

776 def _is_screen_short(self, max_height) -> bool:

777 return bool(self.max_rows == 0 and len(self.frame) > max_height)

778

779 def _get_number_of_auxillary_rows(self) -> int:

780 """Get number of rows occupied by prompt, dots and dimension info."""

781 dot_row = 1

782 prompt_row = 1

783 num_rows = dot_row + prompt_row

784

785 if self.show_dimensions:

786 num_rows += len(self.dimensions_info.splitlines())

787

788 if self.header:

789 num_rows += 1

790

791 return num_rows

792

793 def truncate(self) -> None:

794 """

795 Check whether the frame should be truncated. If so, slice the frame up.

796 """

797 if self.is_truncated_horizontally:

798 self._truncate_horizontally()

799

800 if self.is_truncated_vertically:

801 self._truncate_vertically()

802

803 def _truncate_horizontally(self) -> None:

804 """Remove columns, which are not to be displayed and adjust formatters.

805

806 Attributes affected:

807 - tr_frame

808 - formatters

809 - tr_col_num

810 """

811 assert self.max_cols_fitted is not None

812 col_num = self.max_cols_fitted // 2

813 if col_num >= 1:

814 left = self.tr_frame.iloc[:, :col_num]

815 right = self.tr_frame.iloc[:, -col_num:]

816 self.tr_frame = concat((left, right), axis=1)

817

818 # truncate formatter

819 if isinstance(self.formatters, (list, tuple)):

820 self.formatters = [

821 *self.formatters[:col_num],

822 *self.formatters[-col_num:],

823 ]

824 else:

825 col_num = cast(int, self.max_cols)

826 self.tr_frame = self.tr_frame.iloc[:, :col_num]

827 self.tr_col_num = col_num

828

829 def _truncate_vertically(self) -> None:

830 """Remove rows, which are not to be displayed.

831

832 Attributes affected:

833 - tr_frame

834 - tr_row_num

835 """

836 assert self.max_rows_fitted is not None

837 row_num = self.max_rows_fitted // 2

838 if row_num >= 1:

839 head = self.tr_frame.iloc[:row_num, :]

840 tail = self.tr_frame.iloc[-row_num:, :]

841 self.tr_frame = concat((head, tail))

842 else:

843 row_num = cast(int, self.max_rows)

844 self.tr_frame = self.tr_frame.iloc[:row_num, :]

845 self.tr_row_num = row_num

846

847 def _get_strcols_without_index(self) -> list[list[str]]:

848 strcols: list[list[str]] = []

849

850 if not is_list_like(self.header) and not self.header:

851 for i, c in enumerate(self.tr_frame):

852 fmt_values = self.format_col(i)

853 fmt_values = _make_fixed_width(

854 strings=fmt_values,

855 justify=self.justify,

856 minimum=int(self.col_space.get(c, 0)),

857 adj=self.adj,

858 )

859 strcols.append(fmt_values)

860 return strcols

861

862 if is_list_like(self.header):

863 # cast here since can't be bool if is_list_like

864 self.header = cast(List[str], self.header)

865 if len(self.header) != len(self.columns):

866 raise ValueError(

867 f"Writing {len(self.columns)} cols "

868 f"but got {len(self.header)} aliases"

869 )

870 str_columns = [[label] for label in self.header]

871 else:

872 str_columns = self._get_formatted_column_labels(self.tr_frame)

873

874 if self.show_row_idx_names:

875 for x in str_columns:

876 x.append("")

877

878 for i, c in enumerate(self.tr_frame):

879 cheader = str_columns[i]

880 header_colwidth = max(

881 int(self.col_space.get(c, 0)), *(self.adj.len(x) for x in cheader)

882 )

883 fmt_values = self.format_col(i)

884 fmt_values = _make_fixed_width(

885 fmt_values, self.justify, minimum=header_colwidth, adj=self.adj

886 )

887

888 max_len = max(max(self.adj.len(x) for x in fmt_values), header_colwidth)

889 cheader = self.adj.justify(cheader, max_len, mode=self.justify)

890 strcols.append(cheader + fmt_values)

891

892 return strcols

893

894 def format_col(self, i: int) -> list[str]:

895 frame = self.tr_frame

896 formatter = self._get_formatter(i)

897 return format_array(

898 frame.iloc[:, i]._values,

899 formatter,

900 float_format=self.float_format,

901 na_rep=self.na_rep,

902 space=self.col_space.get(frame.columns[i]),

903 decimal=self.decimal,

904 leading_space=self.index,

905 )

906

907 def _get_formatter(self, i: str | int) -> Callable | None:

908 if isinstance(self.formatters, (list, tuple)):

909 if is_integer(i):

910 i = cast(int, i)

911 return self.formatters[i]

912 else:

913 return None

914 else:

915 if is_integer(i) and i not in self.columns:

916 i = self.columns[i]

917 return self.formatters.get(i, None)

918

919 def _get_formatted_column_labels(self, frame: DataFrame) -> list[list[str]]:

920 from pandas.core.indexes.multi import sparsify_labels

921

922 columns = frame.columns

923

924 if isinstance(columns, MultiIndex):

925 fmt_columns = columns.format(sparsify=False, adjoin=False)

926 fmt_columns = list(zip(*fmt_columns))

927 dtypes = self.frame.dtypes._values

928

929 # if we have a Float level, they don't use leading space at all

930 restrict_formatting = any(level.is_floating for level in columns.levels)

931 need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))

932

933 def space_format(x, y):

934 if (

935 y not in self.formatters

936 and need_leadsp[x]

937 and not restrict_formatting

938 ):

939 return " " + y

940 return y

941

942 str_columns = list(

943 zip(*([space_format(x, y) for y in x] for x in fmt_columns))

944 )

945 if self.sparsify and len(str_columns):

946 str_columns = sparsify_labels(str_columns)

947

948 str_columns = [list(x) for x in zip(*str_columns)]

949 else:

950 fmt_columns = columns.format()

951 dtypes = self.frame.dtypes

952 need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))

953 str_columns = [

954 [" " + x if not self._get_formatter(i) and need_leadsp[x] else x]

955 for i, x in enumerate(fmt_columns)

956 ]

957 # self.str_columns = str_columns

958 return str_columns

959

960 def _get_formatted_index(self, frame: DataFrame) -> list[str]:

961 # Note: this is only used by to_string() and to_latex(), not by

962 # to_html(). so safe to cast col_space here.

963 col_space = {k: cast(int, v) for k, v in self.col_space.items()}

964 index = frame.index

965 columns = frame.columns

966 fmt = self._get_formatter("__index__")

967

968 if isinstance(index, MultiIndex):

969 fmt_index = index.format(

970 sparsify=self.sparsify,

971 adjoin=False,

972 names=self.show_row_idx_names,

973 formatter=fmt,

974 )

975 else:

976 fmt_index = [index.format(name=self.show_row_idx_names, formatter=fmt)]

977

978 fmt_index = [

979 tuple(

980 _make_fixed_width(

981 list(x), justify="left", minimum=col_space.get("", 0), adj=self.adj

982 )

983 )

984 for x in fmt_index

985 ]

986

987 adjoined = self.adj.adjoin(1, *fmt_index).split("\n")

988

989 # empty space for columns

990 if self.show_col_idx_names:

991 col_header = [str(x) for x in self._get_column_name_list()]

992 else:

993 col_header = [""] * columns.nlevels

994

995 if self.header:

996 return col_header + adjoined

997 else:

998 return adjoined

999

1000 def _get_column_name_list(self) -> list[Hashable]:

1001 names: list[Hashable] = []

1002 columns = self.frame.columns

1003 if isinstance(columns, MultiIndex):

1004 names.extend("" if name is None else name for name in columns.names)

1005 else:

1006 names.append("" if columns.name is None else columns.name)

1007 return names

1008

1009

1010class DataFrameRenderer:

1011 """Class for creating dataframe output in multiple formats.

1012

1013 Called in pandas.core.generic.NDFrame:

1014 - to_csv

1015 - to_latex

1016

1017 Called in pandas.core.frame.DataFrame:

1018 - to_html

1019 - to_string

1020

1021 Parameters

1022 ----------

1023 fmt : DataFrameFormatter

1024 Formatter with the formatting options.

1025 """

1026

1027 def __init__(self, fmt: DataFrameFormatter) -> None:

1028 self.fmt = fmt

1029

1030 def to_latex(

1031 self,

1032 buf: FilePath | WriteBuffer[str] | None = None,

1033 column_format: str | None = None,

1034 longtable: bool = False,

1035 encoding: str | None = None,

1036 multicolumn: bool = False,

1037 multicolumn_format: str | None = None,

1038 multirow: bool = False,

1039 caption: str | tuple[str, str] | None = None,

1040 label: str | None = None,

1041 position: str | None = None,

1042 ) -> str | None:

1043 """

1044 Render a DataFrame to a LaTeX tabular/longtable environment output.

1045 """

1046 from pandas.io.formats.latex import LatexFormatter

1047

1048 latex_formatter = LatexFormatter(

1049 self.fmt,

1050 longtable=longtable,

1051 column_format=column_format,

1052 multicolumn=multicolumn,

1053 multicolumn_format=multicolumn_format,

1054 multirow=multirow,

1055 caption=caption,

1056 label=label,

1057 position=position,

1058 )

1059 string = latex_formatter.to_string()

1060 return save_to_buffer(string, buf=buf, encoding=encoding)

1061

1062 def to_html(

1063 self,

1064 buf: FilePath | WriteBuffer[str] | None = None,

1065 encoding: str | None = None,

1066 classes: str | list | tuple | None = None,

1067 notebook: bool = False,

1068 border: int | bool | None = None,

1069 table_id: str | None = None,

1070 render_links: bool = False,

1071 ) -> str | None:

1072 """

1073 Render a DataFrame to a html table.

1074

1075 Parameters

1076 ----------

1077 buf : str, path object, file-like object, or None, default None

1078 String, path object (implementing ``os.PathLike[str]``), or file-like

1079 object implementing a string ``write()`` function. If None, the result is

1080 returned as a string.

1081 encoding : str, default “utf-8”

1082 Set character encoding.

1083 classes : str or list-like

1084 classes to include in the `class` attribute of the opening

1085 ``<table>`` tag, in addition to the default "dataframe".

1086 notebook : {True, False}, optional, default False

1087 Whether the generated HTML is for IPython Notebook.

1088 border : int

1089 A ``border=border`` attribute is included in the opening

1090 ``<table>`` tag. Default ``pd.options.display.html.border``.

1091 table_id : str, optional

1092 A css id is included in the opening `<table>` tag if specified.

1093 render_links : bool, default False

1094 Convert URLs to HTML links.

1095 """

1096 from pandas.io.formats.html import (

1097 HTMLFormatter,

1098 NotebookFormatter,

1099 )

1100

1101 Klass = NotebookFormatter if notebook else HTMLFormatter

1102

1103 html_formatter = Klass(

1104 self.fmt,

1105 classes=classes,

1106 border=border,

1107 table_id=table_id,

1108 render_links=render_links,

1109 )

1110 string = html_formatter.to_string()

1111 return save_to_buffer(string, buf=buf, encoding=encoding)

1112

1113 def to_string(

1114 self,

1115 buf: FilePath | WriteBuffer[str] | None = None,

1116 encoding: str | None = None,

1117 line_width: int | None = None,

1118 ) -> str | None:

1119 """

1120 Render a DataFrame to a console-friendly tabular output.

1121

1122 Parameters

1123 ----------

1124 buf : str, path object, file-like object, or None, default None

1125 String, path object (implementing ``os.PathLike[str]``), or file-like

1126 object implementing a string ``write()`` function. If None, the result is

1127 returned as a string.

1128 encoding: str, default “utf-8”

1129 Set character encoding.

1130 line_width : int, optional

1131 Width to wrap a line in characters.

1132 """

1133 from pandas.io.formats.string import StringFormatter

1134

1135 string_formatter = StringFormatter(self.fmt, line_width=line_width)

1136 string = string_formatter.to_string()

1137 return save_to_buffer(string, buf=buf, encoding=encoding)

1138

1139 @deprecate_kwarg(old_arg_name="line_terminator", new_arg_name="lineterminator")

1140 def to_csv(

1141 self,

1142 path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,

1143 encoding: str | None = None,

1144 sep: str = ",",

1145 columns: Sequence[Hashable] | None = None,

1146 index_label: IndexLabel | None = None,

1147 mode: str = "w",

1148 compression: CompressionOptions = "infer",

1149 quoting: int | None = None,

1150 quotechar: str = '"',

1151 lineterminator: str | None = None,

1152 chunksize: int | None = None,

1153 date_format: str | None = None,

1154 doublequote: bool = True,

1155 escapechar: str | None = None,

1156 errors: str = "strict",

1157 storage_options: StorageOptions = None,

1158 ) -> str | None:

1159 """

1160 Render dataframe as comma-separated file.

1161 """

1162 from pandas.io.formats.csvs import CSVFormatter

1163

1164 if path_or_buf is None:

1165 created_buffer = True

1166 path_or_buf = StringIO()

1167 else:

1168 created_buffer = False

1169

1170 csv_formatter = CSVFormatter(

1171 path_or_buf=path_or_buf,

1172 lineterminator=lineterminator,

1173 sep=sep,

1174 encoding=encoding,

1175 errors=errors,

1176 compression=compression,

1177 quoting=quoting,

1178 cols=columns,

1179 index_label=index_label,

1180 mode=mode,

1181 chunksize=chunksize,

1182 quotechar=quotechar,

1183 date_format=date_format,

1184 doublequote=doublequote,

1185 escapechar=escapechar,

1186 storage_options=storage_options,

1187 formatter=self.fmt,

1188 )

1189 csv_formatter.save()

1190

1191 if created_buffer:

1192 assert isinstance(path_or_buf, StringIO)

1193 content = path_or_buf.getvalue()

1194 path_or_buf.close()

1195 return content

1196

1197 return None

1198

1199

1200def save_to_buffer(

1201 string: str,

1202 buf: FilePath | WriteBuffer[str] | None = None,

1203 encoding: str | None = None,

1204) -> str | None:

1205 """

1206 Perform serialization. Write to buf or return as string if buf is None.

1207 """

1208 with get_buffer(buf, encoding=encoding) as f:

1209 f.write(string)

1210 if buf is None:

1211 # error: "WriteBuffer[str]" has no attribute "getvalue"

1212 return f.getvalue() # type: ignore[attr-defined]

1213 return None

1214

1215

1216@contextmanager

1217def get_buffer(

1218 buf: FilePath | WriteBuffer[str] | None, encoding: str | None = None

1219) -> Iterator[WriteBuffer[str]] | Iterator[StringIO]:

1220 """

1221 Context manager to open, yield and close buffer for filenames or Path-like

1222 objects, otherwise yield buf unchanged.

1223 """

1224 if buf is not None:

1225 buf = stringify_path(buf)

1226 else:

1227 buf = StringIO()

1228

1229 if encoding is None:

1230 encoding = "utf-8"

1231 elif not isinstance(buf, str):

1232 raise ValueError("buf is not a file name and encoding is specified.")

1233

1234 if hasattr(buf, "write"):

1235 yield buf

1236 elif isinstance(buf, str):

1237 check_parent_directory(str(buf))

1238 with open(buf, "w", encoding=encoding, newline="") as f:

1239 # GH#30034 open instead of codecs.open prevents a file leak

1240 # if we have an invalid encoding argument.

1241 # newline="" is needed to roundtrip correctly on

1242 # windows test_to_latex_filename

1243 yield f

1244 else:

1245 raise TypeError("buf is not a file name and it has no write method")

1246

1247

1248# ----------------------------------------------------------------------

1249# Array formatters

1250

1251

1252def format_array(

1253 values: Any,

1254 formatter: Callable | None,

1255 float_format: FloatFormatType | None = None,

1256 na_rep: str = "NaN",

1257 digits: int | None = None,

1258 space: str | int | None = None,

1259 justify: str = "right",

1260 decimal: str = ".",

1261 leading_space: bool | None = True,

1262 quoting: int | None = None,

1263) -> list[str]:

1264 """

1265 Format an array for printing.

1266

1267 Parameters

1268 ----------

1269 values

1270 formatter

1271 float_format

1272 na_rep

1273 digits

1274 space

1275 justify

1276 decimal

1277 leading_space : bool, optional, default True

1278 Whether the array should be formatted with a leading space.

1279 When an array as a column of a Series or DataFrame, we do want

1280 the leading space to pad between columns.

1281

1282 When formatting an Index subclass

1283 (e.g. IntervalIndex._format_native_types), we don't want the

1284 leading space since it should be left-aligned.

1285

1286 Returns

1287 -------

1288 List[str]

1289 """

1290 fmt_klass: type[GenericArrayFormatter]

1291 if is_datetime64_dtype(values.dtype):

1292 fmt_klass = Datetime64Formatter

1293 elif isinstance(values.dtype, DatetimeTZDtype):

1294 fmt_klass = Datetime64TZFormatter

1295 elif is_timedelta64_dtype(values.dtype):

1296 fmt_klass = Timedelta64Formatter

1297 elif is_extension_array_dtype(values.dtype):

1298 fmt_klass = ExtensionArrayFormatter

1299 elif is_float_dtype(values.dtype) or is_complex_dtype(values.dtype):

1300 fmt_klass = FloatArrayFormatter

1301 elif is_integer_dtype(values.dtype):

1302 fmt_klass = IntArrayFormatter

1303 else:

1304 fmt_klass = GenericArrayFormatter

1305

1306 if space is None:

1307 space = 12

1308

1309 if float_format is None:

1310 float_format = get_option("display.float_format")

1311

1312 if digits is None:

1313 digits = get_option("display.precision")

1314

1315 fmt_obj = fmt_klass(

1316 values,

1317 digits=digits,

1318 na_rep=na_rep,

1319 float_format=float_format,

1320 formatter=formatter,

1321 space=space,

1322 justify=justify,

1323 decimal=decimal,

1324 leading_space=leading_space,

1325 quoting=quoting,

1326 )

1327

1328 return fmt_obj.get_result()

1329

1330

1331class GenericArrayFormatter:

1332 def __init__(

1333 self,

1334 values: Any,

1335 digits: int = 7,

1336 formatter: Callable | None = None,

1337 na_rep: str = "NaN",

1338 space: str | int = 12,

1339 float_format: FloatFormatType | None = None,

1340 justify: str = "right",

1341 decimal: str = ".",

1342 quoting: int | None = None,

1343 fixed_width: bool = True,

1344 leading_space: bool | None = True,

1345 ) -> None:

1346 self.values = values

1347 self.digits = digits

1348 self.na_rep = na_rep

1349 self.space = space

1350 self.formatter = formatter

1351 self.float_format = float_format

1352 self.justify = justify

1353 self.decimal = decimal

1354 self.quoting = quoting

1355 self.fixed_width = fixed_width

1356 self.leading_space = leading_space

1357

1358 def get_result(self) -> list[str]:

1359 fmt_values = self._format_strings()

1360 return _make_fixed_width(fmt_values, self.justify)

1361

1362 def _format_strings(self) -> list[str]:

1363 if self.float_format is None:

1364 float_format = get_option("display.float_format")

1365 if float_format is None:

1366 precision = get_option("display.precision")

1367 float_format = lambda x: _trim_zeros_single_float(

1368 f"{x: .{precision:d}f}"

1369 )

1370 else:

1371 float_format = self.float_format

1372

1373 if self.formatter is not None:

1374 formatter = self.formatter

1375 else:

1376 quote_strings = self.quoting is not None and self.quoting != QUOTE_NONE

1377 formatter = partial(

1378 pprint_thing,

1379 escape_chars=("\t", "\r", "\n"),

1380 quote_strings=quote_strings,

1381 )

1382

1383 def _format(x):

1384 if self.na_rep is not None and is_scalar(x) and isna(x):

1385 try:

1386 # try block for np.isnat specifically

1387 # determine na_rep if x is None or NaT-like

1388 if x is None:

1389 return "None"

1390 elif x is NA:

1391 return str(NA)

1392 elif x is NaT or np.isnat(x):

1393 return "NaT"

1394 except (TypeError, ValueError):

1395 # np.isnat only handles datetime or timedelta objects

1396 pass

1397 return self.na_rep

1398 elif isinstance(x, PandasObject):

1399 return str(x)

1400 else:

1401 # object dtype

1402 return str(formatter(x))

1403

1404 vals = extract_array(self.values, extract_numpy=True)

1405 if not isinstance(vals, np.ndarray):

1406 raise TypeError(

1407 "ExtensionArray formatting should use ExtensionArrayFormatter"

1408 )

1409 inferred = lib.map_infer(vals, is_float)

1410 is_float_type = (

1411 inferred

1412 # vals may have 2 or more dimensions

1413 & np.all(notna(vals), axis=tuple(range(1, len(vals.shape))))

1414 )

1415 leading_space = self.leading_space

1416 if leading_space is None:

1417 leading_space = is_float_type.any()

1418

1419 fmt_values = []

1420 for i, v in enumerate(vals):

1421 if not is_float_type[i] and leading_space:

1422 fmt_values.append(f" {_format(v)}")

1423 elif is_float_type[i]:

1424 fmt_values.append(float_format(v))

1425 else:

1426 if leading_space is False:

1427 # False specifically, so that the default is

1428 # to include a space if we get here.

1429 tpl = "{v}"

1430 else:

1431 tpl = " {v}"

1432 fmt_values.append(tpl.format(v=_format(v)))

1433

1434 return fmt_values

1435

1436

1437class FloatArrayFormatter(GenericArrayFormatter):

1438 def __init__(self, *args, **kwargs) -> None:

1439 super().__init__(*args, **kwargs)

1440

1441 # float_format is expected to be a string

1442 # formatter should be used to pass a function

1443 if self.float_format is not None and self.formatter is None:

1444 # GH21625, GH22270

1445 self.fixed_width = False

1446 if callable(self.float_format):

1447 self.formatter = self.float_format

1448 self.float_format = None

1449

1450 def _value_formatter(

1451 self,

1452 float_format: FloatFormatType | None = None,

1453 threshold: float | None = None,

1454 ) -> Callable:

1455 """Returns a function to be applied on each value to format it"""

1456 # the float_format parameter supersedes self.float_format

1457 if float_format is None:

1458 float_format = self.float_format

1459

1460 # we are going to compose different functions, to first convert to

1461 # a string, then replace the decimal symbol, and finally chop according

1462 # to the threshold

1463

1464 # when there is no float_format, we use str instead of '%g'

1465 # because str(0.0) = '0.0' while '%g' % 0.0 = '0'

1466 if float_format:

1467

1468 def base_formatter(v):

1469 assert float_format is not None # for mypy

1470 # error: "str" not callable

1471 # error: Unexpected keyword argument "value" for "__call__" of

1472 # "EngFormatter"

1473 return (

1474 float_format(value=v) # type: ignore[operator,call-arg]

1475 if notna(v)

1476 else self.na_rep

1477 )

1478

1479 else:

1480

1481 def base_formatter(v):

1482 return str(v) if notna(v) else self.na_rep

1483

1484 if self.decimal != ".":

1485

1486 def decimal_formatter(v):

1487 return base_formatter(v).replace(".", self.decimal, 1)

1488

1489 else:

1490 decimal_formatter = base_formatter

1491

1492 if threshold is None:

1493 return decimal_formatter

1494

1495 def formatter(value):

1496 if notna(value):

1497 if abs(value) > threshold:

1498 return decimal_formatter(value)

1499 else:

1500 return decimal_formatter(0.0)

1501 else:

1502 return self.na_rep

1503

1504 return formatter

1505

1506 def get_result_as_array(self) -> np.ndarray:

1507 """

1508 Returns the float values converted into strings using

1509 the parameters given at initialisation, as a numpy array

1510 """

1511

1512 def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str):

1513 mask = isna(values)

1514 formatted = np.array(

1515 [

1516 formatter(val) if not m else na_rep

1517 for val, m in zip(values.ravel(), mask.ravel())

1518 ]

1519 ).reshape(values.shape)

1520 return formatted

1521

1522 if self.formatter is not None:

1523 return format_with_na_rep(self.values, self.formatter, self.na_rep)

1524

1525 if self.fixed_width:

1526 threshold = get_option("display.chop_threshold")

1527 else:

1528 threshold = None

1529

1530 # if we have a fixed_width, we'll need to try different float_format

1531 def format_values_with(float_format):

1532 formatter = self._value_formatter(float_format, threshold)

1533

1534 # default formatter leaves a space to the left when formatting

1535 # floats, must be consistent for left-justifying NaNs (GH #25061)

1536 if self.justify == "left":

1537 na_rep = " " + self.na_rep

1538 else:

1539 na_rep = self.na_rep

1540

1541 # separate the wheat from the chaff

1542 values = self.values

1543 is_complex = is_complex_dtype(values)

1544 values = format_with_na_rep(values, formatter, na_rep)

1545

1546 if self.fixed_width:

1547 if is_complex:

1548 result = _trim_zeros_complex(values, self.decimal)

1549 else:

1550 result = _trim_zeros_float(values, self.decimal)

1551 return np.asarray(result, dtype="object")

1552

1553 return values

1554

1555 # There is a special default string when we are fixed-width

1556 # The default is otherwise to use str instead of a formatting string

1557 float_format: FloatFormatType | None

1558 if self.float_format is None:

1559 if self.fixed_width:

1560 if self.leading_space is True:

1561 fmt_str = "{value: .{digits:d}f}"

1562 else:

1563 fmt_str = "{value:.{digits:d}f}"

1564 float_format = partial(fmt_str.format, digits=self.digits)

1565 else:

1566 float_format = self.float_format

1567 else:

1568 float_format = lambda value: self.float_format % value

1569

1570 formatted_values = format_values_with(float_format)

1571

1572 if not self.fixed_width:

1573 return formatted_values

1574

1575 # we need do convert to engineering format if some values are too small

1576 # and would appear as 0, or if some values are too big and take too

1577 # much space

1578

1579 if len(formatted_values) > 0:

1580 maxlen = max(len(x) for x in formatted_values)

1581 too_long = maxlen > self.digits + 6

1582 else:

1583 too_long = False

1584

1585 with np.errstate(invalid="ignore"):

1586 abs_vals = np.abs(self.values)

1587 # this is pretty arbitrary for now

1588 # large values: more that 8 characters including decimal symbol

1589 # and first digit, hence > 1e6

1590 has_large_values = (abs_vals > 1e6).any()

1591 has_small_values = (

1592 (abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)

1593 ).any()

1594

1595 if has_small_values or (too_long and has_large_values):

1596 if self.leading_space is True:

1597 fmt_str = "{value: .{digits:d}e}"

1598 else:

1599 fmt_str = "{value:.{digits:d}e}"

1600 float_format = partial(fmt_str.format, digits=self.digits)

1601 formatted_values = format_values_with(float_format)

1602

1603 return formatted_values

1604

1605 def _format_strings(self) -> list[str]:

1606 return list(self.get_result_as_array())

1607

1608

1609class IntArrayFormatter(GenericArrayFormatter):

1610 def _format_strings(self) -> list[str]:

1611 if self.leading_space is False:

1612 formatter_str = lambda x: f"{x:d}".format(x=x)

1613 else:

1614 formatter_str = lambda x: f"{x: d}".format(x=x)

1615 formatter = self.formatter or formatter_str

1616 fmt_values = [formatter(x) for x in self.values]

1617 return fmt_values

1618

1619

1620class Datetime64Formatter(GenericArrayFormatter):

1621 def __init__(

1622 self,

1623 values: np.ndarray | Series | DatetimeIndex | DatetimeArray,

1624 nat_rep: str = "NaT",

1625 date_format: None = None,

1626 **kwargs,

1627 ) -> None:

1628 super().__init__(values, **kwargs)

1629 self.nat_rep = nat_rep

1630 self.date_format = date_format

1631

1632 def _format_strings(self) -> list[str]:

1633 """we by definition have DO NOT have a TZ"""

1634 values = self.values

1635

1636 if not isinstance(values, DatetimeIndex):

1637 values = DatetimeIndex(values)

1638

1639 if self.formatter is not None and callable(self.formatter):

1640 return [self.formatter(x) for x in values]

1641

1642 fmt_values = values._data._format_native_types(

1643 na_rep=self.nat_rep, date_format=self.date_format

1644 )

1645 return fmt_values.tolist()

1646

1647

1648class ExtensionArrayFormatter(GenericArrayFormatter):

1649 def _format_strings(self) -> list[str]:

1650 values = extract_array(self.values, extract_numpy=True)

1651

1652 formatter = self.formatter

1653 if formatter is None:

1654 formatter = values._formatter(boxed=True)

1655

1656 if isinstance(values, Categorical):

1657 # Categorical is special for now, so that we can preserve tzinfo

1658 array = values._internal_get_values()

1659 else:

1660 array = np.asarray(values)

1661

1662 fmt_values = format_array(

1663 array,

1664 formatter,

1665 float_format=self.float_format,

1666 na_rep=self.na_rep,

1667 digits=self.digits,

1668 space=self.space,

1669 justify=self.justify,

1670 decimal=self.decimal,

1671 leading_space=self.leading_space,

1672 quoting=self.quoting,

1673 )

1674 return fmt_values

1675

1676

1677def format_percentiles(

1678 percentiles: (np.ndarray | Sequence[float]),

1679) -> list[str]:

1680 """

1681 Outputs rounded and formatted percentiles.

1682

1683 Parameters

1684 ----------

1685 percentiles : list-like, containing floats from interval [0,1]

1686

1687 Returns

1688 -------

1689 formatted : list of strings

1690

1691 Notes

1692 -----

1693 Rounding precision is chosen so that: (1) if any two elements of

1694 ``percentiles`` differ, they remain different after rounding

1695 (2) no entry is *rounded* to 0% or 100%.

1696 Any non-integer is always rounded to at least 1 decimal place.

1697

1698 Examples

1699 --------

1700 Keeps all entries different after rounding:

1701

1702 >>> format_percentiles([0.01999, 0.02001, 0.5, 0.666666, 0.9999])

1703 ['1.999%', '2.001%', '50%', '66.667%', '99.99%']

1704

1705 No element is rounded to 0% or 100% (unless already equal to it).

1706 Duplicates are allowed:

1707

1708 >>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999])

1709 ['0%', '50%', '2.0%', '50%', '66.67%', '99.99%']

1710 """

1711 percentiles = np.asarray(percentiles)

1712

1713 # It checks for np.NaN as well

1714 with np.errstate(invalid="ignore"):

1715 if (

1716 not is_numeric_dtype(percentiles)

1717 or not np.all(percentiles >= 0)

1718 or not np.all(percentiles <= 1)

1719 ):

1720 raise ValueError("percentiles should all be in the interval [0,1]")

1721

1722 percentiles = 100 * percentiles

1723

1724 int_idx = np.isclose(percentiles.astype(int), percentiles)

1725

1726 if np.all(int_idx):

1727 out = percentiles.astype(int).astype(str)

1728 return [i + "%" for i in out]

1729

1730 unique_pcts = np.unique(percentiles)

1731 to_begin = unique_pcts[0] if unique_pcts[0] > 0 else None

1732 to_end = 100 - unique_pcts[-1] if unique_pcts[-1] < 100 else None

1733

1734 # Least precision that keeps percentiles unique after rounding

1735 prec = -np.floor(

1736 np.log10(np.min(np.ediff1d(unique_pcts, to_begin=to_begin, to_end=to_end)))

1737 ).astype(int)

1738 prec = max(1, prec)

1739 out = np.empty_like(percentiles, dtype=object)

1740 out[int_idx] = percentiles[int_idx].astype(int).astype(str)

1741

1742 out[~int_idx] = percentiles[~int_idx].round(prec).astype(str)

1743 return [i + "%" for i in out]

1744

1745

1746def is_dates_only(values: np.ndarray | DatetimeArray | Index | DatetimeIndex) -> bool:

1747 # return a boolean if we are only dates (and don't have a timezone)

1748 if not isinstance(values, Index):

1749 values = values.ravel()

1750

1751 if not isinstance(values, (DatetimeArray, DatetimeIndex)):

1752 values = DatetimeIndex(values)

1753

1754 if values.tz is not None:

1755 return False

1756

1757 values_int = values.asi8

1758 consider_values = values_int != iNaT

1759 # error: Argument 1 to "py_get_unit_from_dtype" has incompatible type

1760 # "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]"

1761 reso = get_unit_from_dtype(values.dtype) # type: ignore[arg-type]

1762 ppd = periods_per_day(reso)

1763

1764 # TODO: can we reuse is_date_array_normalized? would need a skipna kwd

1765 even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0

1766 if even_days:

1767 return True

1768 return False

1769

1770

1771def _format_datetime64(x: NaTType | Timestamp, nat_rep: str = "NaT") -> str:

1772 if x is NaT:

1773 return nat_rep

1774

1775 # Timestamp.__str__ falls back to datetime.datetime.__str__ = isoformat(sep=' ')

1776 # so it already uses string formatting rather than strftime (faster).

1777 return str(x)

1778

1779

1780def _format_datetime64_dateonly(

1781 x: NaTType | Timestamp,

1782 nat_rep: str = "NaT",

1783 date_format: str | None = None,

1784) -> str:

1785 if isinstance(x, NaTType):

1786 return nat_rep

1787

1788 if date_format:

1789 return x.strftime(date_format)

1790 else:

1791 # Timestamp._date_repr relies on string formatting (faster than strftime)

1792 return x._date_repr

1793

1794

1795def get_format_datetime64(

1796 is_dates_only: bool, nat_rep: str = "NaT", date_format: str | None = None

1797) -> Callable:

1798 """Return a formatter callable taking a datetime64 as input and providing

1799 a string as output"""

1800

1801 if is_dates_only:

1802 return lambda x: _format_datetime64_dateonly(

1803 x, nat_rep=nat_rep, date_format=date_format

1804 )

1805 else:

1806 return lambda x: _format_datetime64(x, nat_rep=nat_rep)

1807

1808

1809def get_format_datetime64_from_values(

1810 values: np.ndarray | DatetimeArray | DatetimeIndex, date_format: str | None

1811) -> str | None:

1812 """given values and a date_format, return a string format"""

1813 if isinstance(values, np.ndarray) and values.ndim > 1:

1814 # We don't actually care about the order of values, and DatetimeIndex

1815 # only accepts 1D values

1816 values = values.ravel()

1817

1818 ido = is_dates_only(values)

1819 if ido:

1820 # Only dates and no timezone: provide a default format

1821 return date_format or "%Y-%m-%d"

1822 return date_format

1823

1824

1825class Datetime64TZFormatter(Datetime64Formatter):

1826 def _format_strings(self) -> list[str]:

1827 """we by definition have a TZ"""

1828 values = self.values.astype(object)

1829 ido = is_dates_only(values)

1830 formatter = self.formatter or get_format_datetime64(

1831 ido, date_format=self.date_format

1832 )

1833 fmt_values = [formatter(x) for x in values]

1834

1835 return fmt_values

1836

1837

1838class Timedelta64Formatter(GenericArrayFormatter):

1839 def __init__(

1840 self,

1841 values: np.ndarray | TimedeltaIndex,

1842 nat_rep: str = "NaT",

1843 box: bool = False,

1844 **kwargs,

1845 ) -> None:

1846 super().__init__(values, **kwargs)

1847 self.nat_rep = nat_rep

1848 self.box = box

1849

1850 def _format_strings(self) -> list[str]:

1851 formatter = self.formatter or get_format_timedelta64(

1852 self.values, nat_rep=self.nat_rep, box=self.box

1853 )

1854 return [formatter(x) for x in self.values]

1855

1856

1857def get_format_timedelta64(

1858 values: np.ndarray | TimedeltaIndex | TimedeltaArray,

1859 nat_rep: str = "NaT",

1860 box: bool = False,

1861) -> Callable:

1862 """

1863 Return a formatter function for a range of timedeltas.

1864 These will all have the same format argument

1865

1866 If box, then show the return in quotes

1867 """

1868 values_int = values.view(np.int64)

1869

1870 consider_values = values_int != iNaT

1871

1872 one_day_nanos = 86400 * 10**9

1873 # error: Unsupported operand types for % ("ExtensionArray" and "int")

1874 not_midnight = values_int % one_day_nanos != 0 # type: ignore[operator]

1875 # error: Argument 1 to "__call__" of "ufunc" has incompatible type

1876 # "Union[Any, ExtensionArray, ndarray]"; expected

1877 # "Union[Union[int, float, complex, str, bytes, generic],

1878 # Sequence[Union[int, float, complex, str, bytes, generic]],

1879 # Sequence[Sequence[Any]], _SupportsArray]"

1880 both = np.logical_and(consider_values, not_midnight) # type: ignore[arg-type]

1881 even_days = both.sum() == 0

1882

1883 if even_days:

1884 format = None

1885 else:

1886 format = "long"

1887

1888 def _formatter(x):

1889 if x is None or (is_scalar(x) and isna(x)):

1890 return nat_rep

1891

1892 if not isinstance(x, Timedelta):

1893 x = Timedelta(x)

1894

1895 # Timedelta._repr_base uses string formatting (faster than strftime)

1896 result = x._repr_base(format=format)

1897 if box:

1898 result = f"'{result}'"

1899 return result

1900

1901 return _formatter

1902

1903

1904def _make_fixed_width(

1905 strings: list[str],

1906 justify: str = "right",

1907 minimum: int | None = None,

1908 adj: TextAdjustment | None = None,

1909) -> list[str]:

1910

1911 if len(strings) == 0 or justify == "all":

1912 return strings

1913

1914 if adj is None:

1915 adjustment = get_adjustment()

1916 else:

1917 adjustment = adj

1918

1919 max_len = max(adjustment.len(x) for x in strings)

1920

1921 if minimum is not None:

1922 max_len = max(minimum, max_len)

1923

1924 conf_max = get_option("display.max_colwidth")

1925 if conf_max is not None and max_len > conf_max:

1926 max_len = conf_max

1927

1928 def just(x: str) -> str:

1929 if conf_max is not None:

1930 if (conf_max > 3) & (adjustment.len(x) > max_len):

1931 x = x[: max_len - 3] + "..."

1932 return x

1933

1934 strings = [just(x) for x in strings]

1935 result = adjustment.justify(strings, max_len, mode=justify)

1936 return result

1937

1938

1939def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> list[str]:

1940 """

1941 Separates the real and imaginary parts from the complex number, and

1942 executes the _trim_zeros_float method on each of those.

1943 """

1944 trimmed = [

1945 "".join(_trim_zeros_float(re.split(r"([j+-])", x), decimal))

1946 for x in str_complexes

1947 ]

1948

1949 # pad strings to the length of the longest trimmed string for alignment

1950 lengths = [len(s) for s in trimmed]

1951 max_length = max(lengths)

1952 padded = [

1953 s[: -((k - 1) // 2 + 1)] # real part

1954 + (max_length - k) // 2 * "0"

1955 + s[-((k - 1) // 2 + 1) : -((k - 1) // 2)] # + / -

1956 + s[-((k - 1) // 2) : -1] # imaginary part

1957 + (max_length - k) // 2 * "0"

1958 + s[-1]

1959 for s, k in zip(trimmed, lengths)

1960 ]

1961 return padded

1962

1963

1964def _trim_zeros_single_float(str_float: str) -> str:

1965 """

1966 Trims trailing zeros after a decimal point,

1967 leaving just one if necessary.

1968 """

1969 str_float = str_float.rstrip("0")

1970 if str_float.endswith("."):

1971 str_float += "0"

1972

1973 return str_float

1974

1975

1976def _trim_zeros_float(

1977 str_floats: np.ndarray | list[str], decimal: str = "."

1978) -> list[str]:

1979 """

1980 Trims the maximum number of trailing zeros equally from

1981 all numbers containing decimals, leaving just one if

1982 necessary.

1983 """

1984 trimmed = str_floats

1985 number_regex = re.compile(rf"^\s*[\+-]?[0-9]+\{decimal}[0-9]*$")

1986

1987 def is_number_with_decimal(x):

1988 return re.match(number_regex, x) is not None

1989

1990 def should_trim(values: np.ndarray | list[str]) -> bool:

1991 """

1992 Determine if an array of strings should be trimmed.

1993

1994 Returns True if all numbers containing decimals (defined by the

1995 above regular expression) within the array end in a zero, otherwise

1996 returns False.

1997 """

1998 numbers = [x for x in values if is_number_with_decimal(x)]

1999 return len(numbers) > 0 and all(x.endswith("0") for x in numbers)

2000

2001 while should_trim(trimmed):

2002 trimmed = [x[:-1] if is_number_with_decimal(x) else x for x in trimmed]

2003

2004 # leave one 0 after the decimal points if need be.

2005 result = [

2006 x + "0" if is_number_with_decimal(x) and x.endswith(decimal) else x

2007 for x in trimmed

2008 ]

2009 return result

2010

2011

2012def _has_names(index: Index) -> bool:

2013 if isinstance(index, MultiIndex):

2014 return com.any_not_none(*index.names)

2015 else:

2016 return index.name is not None

2017

2018

2019class EngFormatter:

2020 """

2021 Formats float values according to engineering format.

2022

2023 Based on matplotlib.ticker.EngFormatter

2024 """

2025

2026 # The SI engineering prefixes

2027 ENG_PREFIXES = {

2028 -24: "y",

2029 -21: "z",

2030 -18: "a",

2031 -15: "f",

2032 -12: "p",

2033 -9: "n",

2034 -6: "u",

2035 -3: "m",

2036 0: "",

2037 3: "k",

2038 6: "M",

2039 9: "G",

2040 12: "T",

2041 15: "P",

2042 18: "E",

2043 21: "Z",

2044 24: "Y",

2045 }

2046

2047 def __init__(

2048 self, accuracy: int | None = None, use_eng_prefix: bool = False

2049 ) -> None:

2050 self.accuracy = accuracy

2051 self.use_eng_prefix = use_eng_prefix

2052

2053 def __call__(self, num: float) -> str:

2054 """

2055 Formats a number in engineering notation, appending a letter

2056 representing the power of 1000 of the original number. Some examples:

2057 >>> format_eng = EngFormatter(accuracy=0, use_eng_prefix=True)

2058 >>> format_eng(0)

2059 ' 0'

2060 >>> format_eng = EngFormatter(accuracy=1, use_eng_prefix=True)

2061 >>> format_eng(1_000_000)

2062 ' 1.0M'

2063 >>> format_eng = EngFormatter(accuracy=2, use_eng_prefix=False)

2064 >>> format_eng("-1e-6")

2065 '-1.00E-06'

2066

2067 @param num: the value to represent

2068 @type num: either a numeric value or a string that can be converted to

2069 a numeric value (as per decimal.Decimal constructor)

2070

2071 @return: engineering formatted string

2072 """

2073 dnum = decimal.Decimal(str(num))

2074

2075 if decimal.Decimal.is_nan(dnum):

2076 return "NaN"

2077

2078 if decimal.Decimal.is_infinite(dnum):

2079 return "inf"

2080

2081 sign = 1

2082

2083 if dnum < 0: # pragma: no cover

2084 sign = -1

2085 dnum = -dnum

2086

2087 if dnum != 0:

2088 pow10 = decimal.Decimal(int(math.floor(dnum.log10() / 3) * 3))

2089 else:

2090 pow10 = decimal.Decimal(0)

2091

2092 pow10 = pow10.min(max(self.ENG_PREFIXES.keys()))

2093 pow10 = pow10.max(min(self.ENG_PREFIXES.keys()))

2094 int_pow10 = int(pow10)

2095

2096 if self.use_eng_prefix:

2097 prefix = self.ENG_PREFIXES[int_pow10]

2098 else:

2099 if int_pow10 < 0:

2100 prefix = f"E-{-int_pow10:02d}"

2101 else:

2102 prefix = f"E+{int_pow10:02d}"

2103

2104 mant = sign * dnum / (10**pow10)

2105

2106 if self.accuracy is None: # pragma: no cover

2107 format_str = "{mant: g}{prefix}"

2108 else:

2109 format_str = f"{{mant: .{self.accuracy:d}f}}{{prefix}}"

2110

2111 formatted = format_str.format(mant=mant, prefix=prefix)

2112

2113 return formatted

2114

2115

2116def set_eng_float_format(accuracy: int = 3, use_eng_prefix: bool = False) -> None:

2117 """

2118 Alter default behavior on how float is formatted in DataFrame.

2119 Format float in engineering format. By accuracy, we mean the number of

2120 decimal digits after the floating point.

2121

2122 See also EngFormatter.

2123 """

2124 set_option("display.float_format", EngFormatter(accuracy, use_eng_prefix))

2125

2126

2127def get_level_lengths(

2128 levels: Any, sentinel: bool | object | str = ""

2129) -> list[dict[int, int]]:

2130 """

2131 For each index in each level the function returns lengths of indexes.

2132

2133 Parameters

2134 ----------

2135 levels : list of lists

2136 List of values on for level.

2137 sentinel : string, optional

2138 Value which states that no new index starts on there.

2139

2140 Returns

2141 -------

2142 Returns list of maps. For each level returns map of indexes (key is index

2143 in row and value is length of index).

2144 """

2145 if len(levels) == 0:

2146 return []

2147

2148 control = [True] * len(levels[0])

2149

2150 result = []

2151 for level in levels:

2152 last_index = 0

2153

2154 lengths = {}

2155 for i, key in enumerate(level):

2156 if control[i] and key == sentinel:

2157 pass

2158 else:

2159 control[i] = False

2160 lengths[last_index] = i - last_index

2161 last_index = i

2162

2163 lengths[last_index] = len(level) - last_index

2164

2165 result.append(lengths)

2166

2167 return result

2168

2169

2170def buffer_put_lines(buf: WriteBuffer[str], lines: list[str]) -> None:

2171 """

2172 Appends lines to a buffer.

2173

2174 Parameters

2175 ----------

2176 buf

2177 The buffer to write to

2178 lines

2179 The lines to append.

2180 """

2181 if any(isinstance(x, str) for x in lines):

2182 lines = [str(x) for x in lines]

2183 buf.write("\n".join(lines))