Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/io/formats/format.py: 14%
907 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1"""
2Internal module for formatting output data in csv, html, xml,
3and latex files. This module also applies to display formatting.
4"""
5from __future__ import annotations
7from contextlib import contextmanager
8from csv import (
9 QUOTE_NONE,
10 QUOTE_NONNUMERIC,
11)
12import decimal
13from functools import partial
14from io import StringIO
15import math
16import re
17from shutil import get_terminal_size
18from typing import (
19 IO,
20 TYPE_CHECKING,
21 Any,
22 Callable,
23 Final,
24 Hashable,
25 Iterable,
26 Iterator,
27 List,
28 Mapping,
29 Sequence,
30 cast,
31)
32from unicodedata import east_asian_width
34import numpy as np
36from pandas._config.config import (
37 get_option,
38 set_option,
39)
41from pandas._libs import lib
42from pandas._libs.missing import NA
43from pandas._libs.tslibs import (
44 NaT,
45 Timedelta,
46 Timestamp,
47 get_unit_from_dtype,
48 iNaT,
49 periods_per_day,
50)
51from pandas._libs.tslibs.nattype import NaTType
52from pandas._typing import (
53 ArrayLike,
54 Axes,
55 ColspaceArgType,
56 ColspaceType,
57 CompressionOptions,
58 FilePath,
59 FloatFormatType,
60 FormattersType,
61 IndexLabel,
62 StorageOptions,
63 WriteBuffer,
64)
65from pandas.util._decorators import deprecate_kwarg
67from pandas.core.dtypes.common import (
68 is_categorical_dtype,
69 is_complex_dtype,
70 is_datetime64_dtype,
71 is_extension_array_dtype,
72 is_float,
73 is_float_dtype,
74 is_integer,
75 is_integer_dtype,
76 is_list_like,
77 is_numeric_dtype,
78 is_scalar,
79 is_timedelta64_dtype,
80)
81from pandas.core.dtypes.dtypes import DatetimeTZDtype
82from pandas.core.dtypes.missing import (
83 isna,
84 notna,
85)
87from pandas.core.arrays import (
88 Categorical,
89 DatetimeArray,
90 TimedeltaArray,
91)
92from pandas.core.base import PandasObject
93import pandas.core.common as com
94from pandas.core.construction import extract_array
95from pandas.core.indexes.api import (
96 Index,
97 MultiIndex,
98 PeriodIndex,
99 ensure_index,
100)
101from pandas.core.indexes.datetimes import DatetimeIndex
102from pandas.core.indexes.timedeltas import TimedeltaIndex
103from pandas.core.reshape.concat import concat
105from pandas.io.common import (
106 check_parent_directory,
107 stringify_path,
108)
109from pandas.io.formats.printing import (
110 adjoin,
111 justify,
112 pprint_thing,
113)
115if TYPE_CHECKING: 115 ↛ 116line 115 didn't jump to line 116, because the condition on line 115 was never true
116 from pandas import (
117 DataFrame,
118 Series,
119 )
122common_docstring: Final = """
123 Parameters
124 ----------
125 buf : str, Path or StringIO-like, optional, default None
126 Buffer to write to. If None, the output is returned as a string.
127 columns : sequence, optional, default None
128 The subset of columns to write. Writes all columns by default.
129 col_space : %(col_space_type)s, optional
130 %(col_space)s.
131 header : %(header_type)s, optional
132 %(header)s.
133 index : bool, optional, default True
134 Whether to print index (row) labels.
135 na_rep : str, optional, default 'NaN'
136 String representation of ``NaN`` to use.
137 formatters : list, tuple or dict of one-param. functions, optional
138 Formatter functions to apply to columns' elements by position or
139 name.
140 The result of each function must be a unicode string.
141 List/tuple must be of length equal to the number of columns.
142 float_format : one-parameter function, optional, default None
143 Formatter function to apply to columns' elements if they are
144 floats. This function must return a unicode string and will be
145 applied only to the non-``NaN`` elements, with ``NaN`` being
146 handled by ``na_rep``.
148 .. versionchanged:: 1.2.0
150 sparsify : bool, optional, default True
151 Set to False for a DataFrame with a hierarchical index to print
152 every multiindex key at each row.
153 index_names : bool, optional, default True
154 Prints the names of the indexes.
155 justify : str, default None
156 How to justify the column labels. If None uses the option from
157 the print configuration (controlled by set_option), 'right' out
158 of the box. Valid values are
160 * left
161 * right
162 * center
163 * justify
164 * justify-all
165 * start
166 * end
167 * inherit
168 * match-parent
169 * initial
170 * unset.
171 max_rows : int, optional
172 Maximum number of rows to display in the console.
173 max_cols : int, optional
174 Maximum number of columns to display in the console.
175 show_dimensions : bool, default False
176 Display DataFrame dimensions (number of rows by number of columns).
177 decimal : str, default '.'
178 Character recognized as decimal separator, e.g. ',' in Europe.
179 """
181_VALID_JUSTIFY_PARAMETERS = (
182 "left",
183 "right",
184 "center",
185 "justify",
186 "justify-all",
187 "start",
188 "end",
189 "inherit",
190 "match-parent",
191 "initial",
192 "unset",
193)
195return_docstring: Final = """
196 Returns
197 -------
198 str or None
199 If buf is None, returns the result as a string. Otherwise returns
200 None.
201 """
204class CategoricalFormatter:
205 def __init__(
206 self,
207 categorical: Categorical,
208 buf: IO[str] | None = None,
209 length: bool = True,
210 na_rep: str = "NaN",
211 footer: bool = True,
212 ) -> None:
213 self.categorical = categorical
214 self.buf = buf if buf is not None else StringIO("")
215 self.na_rep = na_rep
216 self.length = length
217 self.footer = footer
218 self.quoting = QUOTE_NONNUMERIC
220 def _get_footer(self) -> str:
221 footer = ""
223 if self.length:
224 if footer:
225 footer += ", "
226 footer += f"Length: {len(self.categorical)}"
228 level_info = self.categorical._repr_categories_info()
230 # Levels are added in a newline
231 if footer:
232 footer += "\n"
233 footer += level_info
235 return str(footer)
237 def _get_formatted_values(self) -> list[str]:
238 return format_array(
239 self.categorical._internal_get_values(),
240 None,
241 float_format=None,
242 na_rep=self.na_rep,
243 quoting=self.quoting,
244 )
246 def to_string(self) -> str:
247 categorical = self.categorical
249 if len(categorical) == 0:
250 if self.footer:
251 return self._get_footer()
252 else:
253 return ""
255 fmt_values = self._get_formatted_values()
257 fmt_values = [i.strip() for i in fmt_values]
258 values = ", ".join(fmt_values)
259 result = ["[" + values + "]"]
260 if self.footer:
261 footer = self._get_footer()
262 if footer:
263 result.append(footer)
265 return str("\n".join(result))
268class SeriesFormatter:
269 def __init__(
270 self,
271 series: Series,
272 buf: IO[str] | None = None,
273 length: bool | str = True,
274 header: bool = True,
275 index: bool = True,
276 na_rep: str = "NaN",
277 name: bool = False,
278 float_format: str | None = None,
279 dtype: bool = True,
280 max_rows: int | None = None,
281 min_rows: int | None = None,
282 ) -> None:
283 self.series = series
284 self.buf = buf if buf is not None else StringIO()
285 self.name = name
286 self.na_rep = na_rep
287 self.header = header
288 self.length = length
289 self.index = index
290 self.max_rows = max_rows
291 self.min_rows = min_rows
293 if float_format is None:
294 float_format = get_option("display.float_format")
295 self.float_format = float_format
296 self.dtype = dtype
297 self.adj = get_adjustment()
299 self._chk_truncate()
301 def _chk_truncate(self) -> None:
302 self.tr_row_num: int | None
304 min_rows = self.min_rows
305 max_rows = self.max_rows
306 # truncation determined by max_rows, actual truncated number of rows
307 # used below by min_rows
308 is_truncated_vertically = max_rows and (len(self.series) > max_rows)
309 series = self.series
310 if is_truncated_vertically:
311 max_rows = cast(int, max_rows)
312 if min_rows:
313 # if min_rows is set (not None or 0), set max_rows to minimum
314 # of both
315 max_rows = min(min_rows, max_rows)
316 if max_rows == 1:
317 row_num = max_rows
318 series = series.iloc[:max_rows]
319 else:
320 row_num = max_rows // 2
321 series = concat((series.iloc[:row_num], series.iloc[-row_num:]))
322 self.tr_row_num = row_num
323 else:
324 self.tr_row_num = None
325 self.tr_series = series
326 self.is_truncated_vertically = is_truncated_vertically
328 def _get_footer(self) -> str:
329 name = self.series.name
330 footer = ""
332 if getattr(self.series.index, "freq", None) is not None:
333 assert isinstance(
334 self.series.index, (DatetimeIndex, PeriodIndex, TimedeltaIndex)
335 )
336 footer += f"Freq: {self.series.index.freqstr}"
338 if self.name is not False and name is not None:
339 if footer:
340 footer += ", "
342 series_name = pprint_thing(name, escape_chars=("\t", "\r", "\n"))
343 footer += f"Name: {series_name}"
345 if self.length is True or (
346 self.length == "truncate" and self.is_truncated_vertically
347 ):
348 if footer:
349 footer += ", "
350 footer += f"Length: {len(self.series)}"
352 if self.dtype is not False and self.dtype is not None:
353 dtype_name = getattr(self.tr_series.dtype, "name", None)
354 if dtype_name:
355 if footer:
356 footer += ", "
357 footer += f"dtype: {pprint_thing(dtype_name)}"
359 # level infos are added to the end and in a new line, like it is done
360 # for Categoricals
361 if is_categorical_dtype(self.tr_series.dtype):
362 level_info = self.tr_series._values._repr_categories_info()
363 if footer:
364 footer += "\n"
365 footer += level_info
367 return str(footer)
369 def _get_formatted_index(self) -> tuple[list[str], bool]:
370 index = self.tr_series.index
372 if isinstance(index, MultiIndex):
373 have_header = any(name for name in index.names)
374 fmt_index = index.format(names=True)
375 else:
376 have_header = index.name is not None
377 fmt_index = index.format(name=True)
378 return fmt_index, have_header
380 def _get_formatted_values(self) -> list[str]:
381 return format_array(
382 self.tr_series._values,
383 None,
384 float_format=self.float_format,
385 na_rep=self.na_rep,
386 leading_space=self.index,
387 )
389 def to_string(self) -> str:
390 series = self.tr_series
391 footer = self._get_footer()
393 if len(series) == 0:
394 return f"{type(self.series).__name__}([], {footer})"
396 fmt_index, have_header = self._get_formatted_index()
397 fmt_values = self._get_formatted_values()
399 if self.is_truncated_vertically:
400 n_header_rows = 0
401 row_num = self.tr_row_num
402 row_num = cast(int, row_num)
403 width = self.adj.len(fmt_values[row_num - 1])
404 if width > 3:
405 dot_str = "..."
406 else:
407 dot_str = ".."
408 # Series uses mode=center because it has single value columns
409 # DataFrame uses mode=left
410 dot_str = self.adj.justify([dot_str], width, mode="center")[0]
411 fmt_values.insert(row_num + n_header_rows, dot_str)
412 fmt_index.insert(row_num + 1, "")
414 if self.index:
415 result = self.adj.adjoin(3, *[fmt_index[1:], fmt_values])
416 else:
417 result = self.adj.adjoin(3, fmt_values)
419 if self.header and have_header:
420 result = fmt_index[0] + "\n" + result
422 if footer:
423 result += "\n" + footer
425 return str("".join(result))
428class TextAdjustment:
429 def __init__(self) -> None:
430 self.encoding = get_option("display.encoding")
432 def len(self, text: str) -> int:
433 return len(text)
435 def justify(self, texts: Any, max_len: int, mode: str = "right") -> list[str]:
436 return justify(texts, max_len, mode=mode)
438 def adjoin(self, space: int, *lists, **kwargs) -> str:
439 return adjoin(space, *lists, strlen=self.len, justfunc=self.justify, **kwargs)
442class EastAsianTextAdjustment(TextAdjustment):
443 def __init__(self) -> None:
444 super().__init__()
445 if get_option("display.unicode.ambiguous_as_wide"):
446 self.ambiguous_width = 2
447 else:
448 self.ambiguous_width = 1
450 # Definition of East Asian Width
451 # https://unicode.org/reports/tr11/
452 # Ambiguous width can be changed by option
453 self._EAW_MAP = {"Na": 1, "N": 1, "W": 2, "F": 2, "H": 1}
455 def len(self, text: str) -> int:
456 """
457 Calculate display width considering unicode East Asian Width
458 """
459 if not isinstance(text, str):
460 return len(text)
462 return sum(
463 self._EAW_MAP.get(east_asian_width(c), self.ambiguous_width) for c in text
464 )
466 def justify(
467 self, texts: Iterable[str], max_len: int, mode: str = "right"
468 ) -> list[str]:
469 # re-calculate padding space per str considering East Asian Width
470 def _get_pad(t):
471 return max_len - self.len(t) + len(t)
473 if mode == "left":
474 return [x.ljust(_get_pad(x)) for x in texts]
475 elif mode == "center":
476 return [x.center(_get_pad(x)) for x in texts]
477 else:
478 return [x.rjust(_get_pad(x)) for x in texts]
481def get_adjustment() -> TextAdjustment:
482 use_east_asian_width = get_option("display.unicode.east_asian_width")
483 if use_east_asian_width:
484 return EastAsianTextAdjustment()
485 else:
486 return TextAdjustment()
489def get_dataframe_repr_params() -> dict[str, Any]:
490 """Get the parameters used to repr(dataFrame) calls using DataFrame.to_string.
492 Supplying these parameters to DataFrame.to_string is equivalent to calling
493 ``repr(DataFrame)``. This is useful if you want to adjust the repr output.
495 .. versionadded:: 1.4.0
497 Example
498 -------
499 >>> import pandas as pd
500 >>>
501 >>> df = pd.DataFrame([[1, 2], [3, 4]])
502 >>> repr_params = pd.io.formats.format.get_dataframe_repr_params()
503 >>> repr(df) == df.to_string(**repr_params)
504 True
505 """
506 from pandas.io.formats import console
508 if get_option("display.expand_frame_repr"):
509 line_width, _ = console.get_console_size()
510 else:
511 line_width = None
512 return {
513 "max_rows": get_option("display.max_rows"),
514 "min_rows": get_option("display.min_rows"),
515 "max_cols": get_option("display.max_columns"),
516 "max_colwidth": get_option("display.max_colwidth"),
517 "show_dimensions": get_option("display.show_dimensions"),
518 "line_width": line_width,
519 }
522def get_series_repr_params() -> dict[str, Any]:
523 """Get the parameters used to repr(Series) calls using Series.to_string.
525 Supplying these parameters to Series.to_string is equivalent to calling
526 ``repr(series)``. This is useful if you want to adjust the series repr output.
528 .. versionadded:: 1.4.0
530 Example
531 -------
532 >>> import pandas as pd
533 >>>
534 >>> ser = pd.Series([1, 2, 3, 4])
535 >>> repr_params = pd.io.formats.format.get_series_repr_params()
536 >>> repr(ser) == ser.to_string(**repr_params)
537 True
538 """
539 width, height = get_terminal_size()
540 max_rows = (
541 height
542 if get_option("display.max_rows") == 0
543 else get_option("display.max_rows")
544 )
545 min_rows = (
546 height
547 if get_option("display.max_rows") == 0
548 else get_option("display.min_rows")
549 )
551 return {
552 "name": True,
553 "dtype": True,
554 "min_rows": min_rows,
555 "max_rows": max_rows,
556 "length": get_option("display.show_dimensions"),
557 }
560class DataFrameFormatter:
561 """Class for processing dataframe formatting options and data."""
563 __doc__ = __doc__ if __doc__ else ""
564 __doc__ += common_docstring + return_docstring
566 def __init__(
567 self,
568 frame: DataFrame,
569 columns: Sequence[Hashable] | None = None,
570 col_space: ColspaceArgType | None = None,
571 header: bool | Sequence[str] = True,
572 index: bool = True,
573 na_rep: str = "NaN",
574 formatters: FormattersType | None = None,
575 justify: str | None = None,
576 float_format: FloatFormatType | None = None,
577 sparsify: bool | None = None,
578 index_names: bool = True,
579 max_rows: int | None = None,
580 min_rows: int | None = None,
581 max_cols: int | None = None,
582 show_dimensions: bool | str = False,
583 decimal: str = ".",
584 bold_rows: bool = False,
585 escape: bool = True,
586 ) -> None:
587 self.frame = frame
588 self.columns = self._initialize_columns(columns)
589 self.col_space = self._initialize_colspace(col_space)
590 self.header = header
591 self.index = index
592 self.na_rep = na_rep
593 self.formatters = self._initialize_formatters(formatters)
594 self.justify = self._initialize_justify(justify)
595 self.float_format = float_format
596 self.sparsify = self._initialize_sparsify(sparsify)
597 self.show_index_names = index_names
598 self.decimal = decimal
599 self.bold_rows = bold_rows
600 self.escape = escape
601 self.max_rows = max_rows
602 self.min_rows = min_rows
603 self.max_cols = max_cols
604 self.show_dimensions = show_dimensions
606 self.max_cols_fitted = self._calc_max_cols_fitted()
607 self.max_rows_fitted = self._calc_max_rows_fitted()
609 self.tr_frame = self.frame
610 self.truncate()
611 self.adj = get_adjustment()
613 def get_strcols(self) -> list[list[str]]:
614 """
615 Render a DataFrame to a list of columns (as lists of strings).
616 """
617 strcols = self._get_strcols_without_index()
619 if self.index:
620 str_index = self._get_formatted_index(self.tr_frame)
621 strcols.insert(0, str_index)
623 return strcols
625 @property
626 def should_show_dimensions(self) -> bool:
627 return self.show_dimensions is True or (
628 self.show_dimensions == "truncate" and self.is_truncated
629 )
631 @property
632 def is_truncated(self) -> bool:
633 return bool(self.is_truncated_horizontally or self.is_truncated_vertically)
635 @property
636 def is_truncated_horizontally(self) -> bool:
637 return bool(self.max_cols_fitted and (len(self.columns) > self.max_cols_fitted))
639 @property
640 def is_truncated_vertically(self) -> bool:
641 return bool(self.max_rows_fitted and (len(self.frame) > self.max_rows_fitted))
643 @property
644 def dimensions_info(self) -> str:
645 return f"\n\n[{len(self.frame)} rows x {len(self.frame.columns)} columns]"
647 @property
648 def has_index_names(self) -> bool:
649 return _has_names(self.frame.index)
651 @property
652 def has_column_names(self) -> bool:
653 return _has_names(self.frame.columns)
655 @property
656 def show_row_idx_names(self) -> bool:
657 return all((self.has_index_names, self.index, self.show_index_names))
659 @property
660 def show_col_idx_names(self) -> bool:
661 return all((self.has_column_names, self.show_index_names, self.header))
663 @property
664 def max_rows_displayed(self) -> int:
665 return min(self.max_rows or len(self.frame), len(self.frame))
667 def _initialize_sparsify(self, sparsify: bool | None) -> bool:
668 if sparsify is None:
669 return get_option("display.multi_sparse")
670 return sparsify
672 def _initialize_formatters(
673 self, formatters: FormattersType | None
674 ) -> FormattersType:
675 if formatters is None:
676 return {}
677 elif len(self.frame.columns) == len(formatters) or isinstance(formatters, dict):
678 return formatters
679 else:
680 raise ValueError(
681 f"Formatters length({len(formatters)}) should match "
682 f"DataFrame number of columns({len(self.frame.columns)})"
683 )
685 def _initialize_justify(self, justify: str | None) -> str:
686 if justify is None:
687 return get_option("display.colheader_justify")
688 else:
689 return justify
691 def _initialize_columns(self, columns: Sequence[Hashable] | None) -> Index:
692 if columns is not None:
693 # GH 47231 - columns doesn't have to be `Sequence[str]`
694 # Will fix in later PR
695 cols = ensure_index(cast(Axes, columns))
696 self.frame = self.frame[cols]
697 return cols
698 else:
699 return self.frame.columns
701 def _initialize_colspace(self, col_space: ColspaceArgType | None) -> ColspaceType:
702 result: ColspaceType
704 if col_space is None:
705 result = {}
706 elif isinstance(col_space, (int, str)):
707 result = {"": col_space}
708 result.update({column: col_space for column in self.frame.columns})
709 elif isinstance(col_space, Mapping):
710 for column in col_space.keys():
711 if column not in self.frame.columns and column != "":
712 raise ValueError(
713 f"Col_space is defined for an unknown column: {column}"
714 )
715 result = col_space
716 else:
717 if len(self.frame.columns) != len(col_space):
718 raise ValueError(
719 f"Col_space length({len(col_space)}) should match "
720 f"DataFrame number of columns({len(self.frame.columns)})"
721 )
722 result = dict(zip(self.frame.columns, col_space))
723 return result
725 def _calc_max_cols_fitted(self) -> int | None:
726 """Number of columns fitting the screen."""
727 if not self._is_in_terminal():
728 return self.max_cols
730 width, _ = get_terminal_size()
731 if self._is_screen_narrow(width):
732 return width
733 else:
734 return self.max_cols
736 def _calc_max_rows_fitted(self) -> int | None:
737 """Number of rows with data fitting the screen."""
738 max_rows: int | None
740 if self._is_in_terminal():
741 _, height = get_terminal_size()
742 if self.max_rows == 0:
743 # rows available to fill with actual data
744 return height - self._get_number_of_auxillary_rows()
746 if self._is_screen_short(height):
747 max_rows = height
748 else:
749 max_rows = self.max_rows
750 else:
751 max_rows = self.max_rows
753 return self._adjust_max_rows(max_rows)
755 def _adjust_max_rows(self, max_rows: int | None) -> int | None:
756 """Adjust max_rows using display logic.
758 See description here:
759 https://pandas.pydata.org/docs/dev/user_guide/options.html#frequently-used-options
761 GH #37359
762 """
763 if max_rows:
764 if (len(self.frame) > max_rows) and self.min_rows:
765 # if truncated, set max_rows showed to min_rows
766 max_rows = min(self.min_rows, max_rows)
767 return max_rows
769 def _is_in_terminal(self) -> bool:
770 """Check if the output is to be shown in terminal."""
771 return bool(self.max_cols == 0 or self.max_rows == 0)
773 def _is_screen_narrow(self, max_width) -> bool:
774 return bool(self.max_cols == 0 and len(self.frame.columns) > max_width)
776 def _is_screen_short(self, max_height) -> bool:
777 return bool(self.max_rows == 0 and len(self.frame) > max_height)
779 def _get_number_of_auxillary_rows(self) -> int:
780 """Get number of rows occupied by prompt, dots and dimension info."""
781 dot_row = 1
782 prompt_row = 1
783 num_rows = dot_row + prompt_row
785 if self.show_dimensions:
786 num_rows += len(self.dimensions_info.splitlines())
788 if self.header:
789 num_rows += 1
791 return num_rows
793 def truncate(self) -> None:
794 """
795 Check whether the frame should be truncated. If so, slice the frame up.
796 """
797 if self.is_truncated_horizontally:
798 self._truncate_horizontally()
800 if self.is_truncated_vertically:
801 self._truncate_vertically()
803 def _truncate_horizontally(self) -> None:
804 """Remove columns, which are not to be displayed and adjust formatters.
806 Attributes affected:
807 - tr_frame
808 - formatters
809 - tr_col_num
810 """
811 assert self.max_cols_fitted is not None
812 col_num = self.max_cols_fitted // 2
813 if col_num >= 1:
814 left = self.tr_frame.iloc[:, :col_num]
815 right = self.tr_frame.iloc[:, -col_num:]
816 self.tr_frame = concat((left, right), axis=1)
818 # truncate formatter
819 if isinstance(self.formatters, (list, tuple)):
820 self.formatters = [
821 *self.formatters[:col_num],
822 *self.formatters[-col_num:],
823 ]
824 else:
825 col_num = cast(int, self.max_cols)
826 self.tr_frame = self.tr_frame.iloc[:, :col_num]
827 self.tr_col_num = col_num
829 def _truncate_vertically(self) -> None:
830 """Remove rows, which are not to be displayed.
832 Attributes affected:
833 - tr_frame
834 - tr_row_num
835 """
836 assert self.max_rows_fitted is not None
837 row_num = self.max_rows_fitted // 2
838 if row_num >= 1:
839 head = self.tr_frame.iloc[:row_num, :]
840 tail = self.tr_frame.iloc[-row_num:, :]
841 self.tr_frame = concat((head, tail))
842 else:
843 row_num = cast(int, self.max_rows)
844 self.tr_frame = self.tr_frame.iloc[:row_num, :]
845 self.tr_row_num = row_num
847 def _get_strcols_without_index(self) -> list[list[str]]:
848 strcols: list[list[str]] = []
850 if not is_list_like(self.header) and not self.header:
851 for i, c in enumerate(self.tr_frame):
852 fmt_values = self.format_col(i)
853 fmt_values = _make_fixed_width(
854 strings=fmt_values,
855 justify=self.justify,
856 minimum=int(self.col_space.get(c, 0)),
857 adj=self.adj,
858 )
859 strcols.append(fmt_values)
860 return strcols
862 if is_list_like(self.header):
863 # cast here since can't be bool if is_list_like
864 self.header = cast(List[str], self.header)
865 if len(self.header) != len(self.columns):
866 raise ValueError(
867 f"Writing {len(self.columns)} cols "
868 f"but got {len(self.header)} aliases"
869 )
870 str_columns = [[label] for label in self.header]
871 else:
872 str_columns = self._get_formatted_column_labels(self.tr_frame)
874 if self.show_row_idx_names:
875 for x in str_columns:
876 x.append("")
878 for i, c in enumerate(self.tr_frame):
879 cheader = str_columns[i]
880 header_colwidth = max(
881 int(self.col_space.get(c, 0)), *(self.adj.len(x) for x in cheader)
882 )
883 fmt_values = self.format_col(i)
884 fmt_values = _make_fixed_width(
885 fmt_values, self.justify, minimum=header_colwidth, adj=self.adj
886 )
888 max_len = max(max(self.adj.len(x) for x in fmt_values), header_colwidth)
889 cheader = self.adj.justify(cheader, max_len, mode=self.justify)
890 strcols.append(cheader + fmt_values)
892 return strcols
894 def format_col(self, i: int) -> list[str]:
895 frame = self.tr_frame
896 formatter = self._get_formatter(i)
897 return format_array(
898 frame.iloc[:, i]._values,
899 formatter,
900 float_format=self.float_format,
901 na_rep=self.na_rep,
902 space=self.col_space.get(frame.columns[i]),
903 decimal=self.decimal,
904 leading_space=self.index,
905 )
907 def _get_formatter(self, i: str | int) -> Callable | None:
908 if isinstance(self.formatters, (list, tuple)):
909 if is_integer(i):
910 i = cast(int, i)
911 return self.formatters[i]
912 else:
913 return None
914 else:
915 if is_integer(i) and i not in self.columns:
916 i = self.columns[i]
917 return self.formatters.get(i, None)
919 def _get_formatted_column_labels(self, frame: DataFrame) -> list[list[str]]:
920 from pandas.core.indexes.multi import sparsify_labels
922 columns = frame.columns
924 if isinstance(columns, MultiIndex):
925 fmt_columns = columns.format(sparsify=False, adjoin=False)
926 fmt_columns = list(zip(*fmt_columns))
927 dtypes = self.frame.dtypes._values
929 # if we have a Float level, they don't use leading space at all
930 restrict_formatting = any(level.is_floating for level in columns.levels)
931 need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
933 def space_format(x, y):
934 if (
935 y not in self.formatters
936 and need_leadsp[x]
937 and not restrict_formatting
938 ):
939 return " " + y
940 return y
942 str_columns = list(
943 zip(*([space_format(x, y) for y in x] for x in fmt_columns))
944 )
945 if self.sparsify and len(str_columns):
946 str_columns = sparsify_labels(str_columns)
948 str_columns = [list(x) for x in zip(*str_columns)]
949 else:
950 fmt_columns = columns.format()
951 dtypes = self.frame.dtypes
952 need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
953 str_columns = [
954 [" " + x if not self._get_formatter(i) and need_leadsp[x] else x]
955 for i, x in enumerate(fmt_columns)
956 ]
957 # self.str_columns = str_columns
958 return str_columns
960 def _get_formatted_index(self, frame: DataFrame) -> list[str]:
961 # Note: this is only used by to_string() and to_latex(), not by
962 # to_html(). so safe to cast col_space here.
963 col_space = {k: cast(int, v) for k, v in self.col_space.items()}
964 index = frame.index
965 columns = frame.columns
966 fmt = self._get_formatter("__index__")
968 if isinstance(index, MultiIndex):
969 fmt_index = index.format(
970 sparsify=self.sparsify,
971 adjoin=False,
972 names=self.show_row_idx_names,
973 formatter=fmt,
974 )
975 else:
976 fmt_index = [index.format(name=self.show_row_idx_names, formatter=fmt)]
978 fmt_index = [
979 tuple(
980 _make_fixed_width(
981 list(x), justify="left", minimum=col_space.get("", 0), adj=self.adj
982 )
983 )
984 for x in fmt_index
985 ]
987 adjoined = self.adj.adjoin(1, *fmt_index).split("\n")
989 # empty space for columns
990 if self.show_col_idx_names:
991 col_header = [str(x) for x in self._get_column_name_list()]
992 else:
993 col_header = [""] * columns.nlevels
995 if self.header:
996 return col_header + adjoined
997 else:
998 return adjoined
1000 def _get_column_name_list(self) -> list[Hashable]:
1001 names: list[Hashable] = []
1002 columns = self.frame.columns
1003 if isinstance(columns, MultiIndex):
1004 names.extend("" if name is None else name for name in columns.names)
1005 else:
1006 names.append("" if columns.name is None else columns.name)
1007 return names
1010class DataFrameRenderer:
1011 """Class for creating dataframe output in multiple formats.
1013 Called in pandas.core.generic.NDFrame:
1014 - to_csv
1015 - to_latex
1017 Called in pandas.core.frame.DataFrame:
1018 - to_html
1019 - to_string
1021 Parameters
1022 ----------
1023 fmt : DataFrameFormatter
1024 Formatter with the formatting options.
1025 """
1027 def __init__(self, fmt: DataFrameFormatter) -> None:
1028 self.fmt = fmt
1030 def to_latex(
1031 self,
1032 buf: FilePath | WriteBuffer[str] | None = None,
1033 column_format: str | None = None,
1034 longtable: bool = False,
1035 encoding: str | None = None,
1036 multicolumn: bool = False,
1037 multicolumn_format: str | None = None,
1038 multirow: bool = False,
1039 caption: str | tuple[str, str] | None = None,
1040 label: str | None = None,
1041 position: str | None = None,
1042 ) -> str | None:
1043 """
1044 Render a DataFrame to a LaTeX tabular/longtable environment output.
1045 """
1046 from pandas.io.formats.latex import LatexFormatter
1048 latex_formatter = LatexFormatter(
1049 self.fmt,
1050 longtable=longtable,
1051 column_format=column_format,
1052 multicolumn=multicolumn,
1053 multicolumn_format=multicolumn_format,
1054 multirow=multirow,
1055 caption=caption,
1056 label=label,
1057 position=position,
1058 )
1059 string = latex_formatter.to_string()
1060 return save_to_buffer(string, buf=buf, encoding=encoding)
1062 def to_html(
1063 self,
1064 buf: FilePath | WriteBuffer[str] | None = None,
1065 encoding: str | None = None,
1066 classes: str | list | tuple | None = None,
1067 notebook: bool = False,
1068 border: int | bool | None = None,
1069 table_id: str | None = None,
1070 render_links: bool = False,
1071 ) -> str | None:
1072 """
1073 Render a DataFrame to a html table.
1075 Parameters
1076 ----------
1077 buf : str, path object, file-like object, or None, default None
1078 String, path object (implementing ``os.PathLike[str]``), or file-like
1079 object implementing a string ``write()`` function. If None, the result is
1080 returned as a string.
1081 encoding : str, default “utf-8”
1082 Set character encoding.
1083 classes : str or list-like
1084 classes to include in the `class` attribute of the opening
1085 ``<table>`` tag, in addition to the default "dataframe".
1086 notebook : {True, False}, optional, default False
1087 Whether the generated HTML is for IPython Notebook.
1088 border : int
1089 A ``border=border`` attribute is included in the opening
1090 ``<table>`` tag. Default ``pd.options.display.html.border``.
1091 table_id : str, optional
1092 A css id is included in the opening `<table>` tag if specified.
1093 render_links : bool, default False
1094 Convert URLs to HTML links.
1095 """
1096 from pandas.io.formats.html import (
1097 HTMLFormatter,
1098 NotebookFormatter,
1099 )
1101 Klass = NotebookFormatter if notebook else HTMLFormatter
1103 html_formatter = Klass(
1104 self.fmt,
1105 classes=classes,
1106 border=border,
1107 table_id=table_id,
1108 render_links=render_links,
1109 )
1110 string = html_formatter.to_string()
1111 return save_to_buffer(string, buf=buf, encoding=encoding)
1113 def to_string(
1114 self,
1115 buf: FilePath | WriteBuffer[str] | None = None,
1116 encoding: str | None = None,
1117 line_width: int | None = None,
1118 ) -> str | None:
1119 """
1120 Render a DataFrame to a console-friendly tabular output.
1122 Parameters
1123 ----------
1124 buf : str, path object, file-like object, or None, default None
1125 String, path object (implementing ``os.PathLike[str]``), or file-like
1126 object implementing a string ``write()`` function. If None, the result is
1127 returned as a string.
1128 encoding: str, default “utf-8”
1129 Set character encoding.
1130 line_width : int, optional
1131 Width to wrap a line in characters.
1132 """
1133 from pandas.io.formats.string import StringFormatter
1135 string_formatter = StringFormatter(self.fmt, line_width=line_width)
1136 string = string_formatter.to_string()
1137 return save_to_buffer(string, buf=buf, encoding=encoding)
1139 @deprecate_kwarg(old_arg_name="line_terminator", new_arg_name="lineterminator")
1140 def to_csv(
1141 self,
1142 path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
1143 encoding: str | None = None,
1144 sep: str = ",",
1145 columns: Sequence[Hashable] | None = None,
1146 index_label: IndexLabel | None = None,
1147 mode: str = "w",
1148 compression: CompressionOptions = "infer",
1149 quoting: int | None = None,
1150 quotechar: str = '"',
1151 lineterminator: str | None = None,
1152 chunksize: int | None = None,
1153 date_format: str | None = None,
1154 doublequote: bool = True,
1155 escapechar: str | None = None,
1156 errors: str = "strict",
1157 storage_options: StorageOptions = None,
1158 ) -> str | None:
1159 """
1160 Render dataframe as comma-separated file.
1161 """
1162 from pandas.io.formats.csvs import CSVFormatter
1164 if path_or_buf is None:
1165 created_buffer = True
1166 path_or_buf = StringIO()
1167 else:
1168 created_buffer = False
1170 csv_formatter = CSVFormatter(
1171 path_or_buf=path_or_buf,
1172 lineterminator=lineterminator,
1173 sep=sep,
1174 encoding=encoding,
1175 errors=errors,
1176 compression=compression,
1177 quoting=quoting,
1178 cols=columns,
1179 index_label=index_label,
1180 mode=mode,
1181 chunksize=chunksize,
1182 quotechar=quotechar,
1183 date_format=date_format,
1184 doublequote=doublequote,
1185 escapechar=escapechar,
1186 storage_options=storage_options,
1187 formatter=self.fmt,
1188 )
1189 csv_formatter.save()
1191 if created_buffer:
1192 assert isinstance(path_or_buf, StringIO)
1193 content = path_or_buf.getvalue()
1194 path_or_buf.close()
1195 return content
1197 return None
1200def save_to_buffer(
1201 string: str,
1202 buf: FilePath | WriteBuffer[str] | None = None,
1203 encoding: str | None = None,
1204) -> str | None:
1205 """
1206 Perform serialization. Write to buf or return as string if buf is None.
1207 """
1208 with get_buffer(buf, encoding=encoding) as f:
1209 f.write(string)
1210 if buf is None:
1211 # error: "WriteBuffer[str]" has no attribute "getvalue"
1212 return f.getvalue() # type: ignore[attr-defined]
1213 return None
1216@contextmanager
1217def get_buffer(
1218 buf: FilePath | WriteBuffer[str] | None, encoding: str | None = None
1219) -> Iterator[WriteBuffer[str]] | Iterator[StringIO]:
1220 """
1221 Context manager to open, yield and close buffer for filenames or Path-like
1222 objects, otherwise yield buf unchanged.
1223 """
1224 if buf is not None:
1225 buf = stringify_path(buf)
1226 else:
1227 buf = StringIO()
1229 if encoding is None:
1230 encoding = "utf-8"
1231 elif not isinstance(buf, str):
1232 raise ValueError("buf is not a file name and encoding is specified.")
1234 if hasattr(buf, "write"):
1235 yield buf
1236 elif isinstance(buf, str):
1237 check_parent_directory(str(buf))
1238 with open(buf, "w", encoding=encoding, newline="") as f:
1239 # GH#30034 open instead of codecs.open prevents a file leak
1240 # if we have an invalid encoding argument.
1241 # newline="" is needed to roundtrip correctly on
1242 # windows test_to_latex_filename
1243 yield f
1244 else:
1245 raise TypeError("buf is not a file name and it has no write method")
1248# ----------------------------------------------------------------------
1249# Array formatters
1252def format_array(
1253 values: Any,
1254 formatter: Callable | None,
1255 float_format: FloatFormatType | None = None,
1256 na_rep: str = "NaN",
1257 digits: int | None = None,
1258 space: str | int | None = None,
1259 justify: str = "right",
1260 decimal: str = ".",
1261 leading_space: bool | None = True,
1262 quoting: int | None = None,
1263) -> list[str]:
1264 """
1265 Format an array for printing.
1267 Parameters
1268 ----------
1269 values
1270 formatter
1271 float_format
1272 na_rep
1273 digits
1274 space
1275 justify
1276 decimal
1277 leading_space : bool, optional, default True
1278 Whether the array should be formatted with a leading space.
1279 When an array as a column of a Series or DataFrame, we do want
1280 the leading space to pad between columns.
1282 When formatting an Index subclass
1283 (e.g. IntervalIndex._format_native_types), we don't want the
1284 leading space since it should be left-aligned.
1286 Returns
1287 -------
1288 List[str]
1289 """
1290 fmt_klass: type[GenericArrayFormatter]
1291 if is_datetime64_dtype(values.dtype):
1292 fmt_klass = Datetime64Formatter
1293 elif isinstance(values.dtype, DatetimeTZDtype):
1294 fmt_klass = Datetime64TZFormatter
1295 elif is_timedelta64_dtype(values.dtype):
1296 fmt_klass = Timedelta64Formatter
1297 elif is_extension_array_dtype(values.dtype):
1298 fmt_klass = ExtensionArrayFormatter
1299 elif is_float_dtype(values.dtype) or is_complex_dtype(values.dtype):
1300 fmt_klass = FloatArrayFormatter
1301 elif is_integer_dtype(values.dtype):
1302 fmt_klass = IntArrayFormatter
1303 else:
1304 fmt_klass = GenericArrayFormatter
1306 if space is None:
1307 space = 12
1309 if float_format is None:
1310 float_format = get_option("display.float_format")
1312 if digits is None:
1313 digits = get_option("display.precision")
1315 fmt_obj = fmt_klass(
1316 values,
1317 digits=digits,
1318 na_rep=na_rep,
1319 float_format=float_format,
1320 formatter=formatter,
1321 space=space,
1322 justify=justify,
1323 decimal=decimal,
1324 leading_space=leading_space,
1325 quoting=quoting,
1326 )
1328 return fmt_obj.get_result()
1331class GenericArrayFormatter:
1332 def __init__(
1333 self,
1334 values: Any,
1335 digits: int = 7,
1336 formatter: Callable | None = None,
1337 na_rep: str = "NaN",
1338 space: str | int = 12,
1339 float_format: FloatFormatType | None = None,
1340 justify: str = "right",
1341 decimal: str = ".",
1342 quoting: int | None = None,
1343 fixed_width: bool = True,
1344 leading_space: bool | None = True,
1345 ) -> None:
1346 self.values = values
1347 self.digits = digits
1348 self.na_rep = na_rep
1349 self.space = space
1350 self.formatter = formatter
1351 self.float_format = float_format
1352 self.justify = justify
1353 self.decimal = decimal
1354 self.quoting = quoting
1355 self.fixed_width = fixed_width
1356 self.leading_space = leading_space
1358 def get_result(self) -> list[str]:
1359 fmt_values = self._format_strings()
1360 return _make_fixed_width(fmt_values, self.justify)
1362 def _format_strings(self) -> list[str]:
1363 if self.float_format is None:
1364 float_format = get_option("display.float_format")
1365 if float_format is None:
1366 precision = get_option("display.precision")
1367 float_format = lambda x: _trim_zeros_single_float(
1368 f"{x: .{precision:d}f}"
1369 )
1370 else:
1371 float_format = self.float_format
1373 if self.formatter is not None:
1374 formatter = self.formatter
1375 else:
1376 quote_strings = self.quoting is not None and self.quoting != QUOTE_NONE
1377 formatter = partial(
1378 pprint_thing,
1379 escape_chars=("\t", "\r", "\n"),
1380 quote_strings=quote_strings,
1381 )
1383 def _format(x):
1384 if self.na_rep is not None and is_scalar(x) and isna(x):
1385 try:
1386 # try block for np.isnat specifically
1387 # determine na_rep if x is None or NaT-like
1388 if x is None:
1389 return "None"
1390 elif x is NA:
1391 return str(NA)
1392 elif x is NaT or np.isnat(x):
1393 return "NaT"
1394 except (TypeError, ValueError):
1395 # np.isnat only handles datetime or timedelta objects
1396 pass
1397 return self.na_rep
1398 elif isinstance(x, PandasObject):
1399 return str(x)
1400 else:
1401 # object dtype
1402 return str(formatter(x))
1404 vals = extract_array(self.values, extract_numpy=True)
1405 if not isinstance(vals, np.ndarray):
1406 raise TypeError(
1407 "ExtensionArray formatting should use ExtensionArrayFormatter"
1408 )
1409 inferred = lib.map_infer(vals, is_float)
1410 is_float_type = (
1411 inferred
1412 # vals may have 2 or more dimensions
1413 & np.all(notna(vals), axis=tuple(range(1, len(vals.shape))))
1414 )
1415 leading_space = self.leading_space
1416 if leading_space is None:
1417 leading_space = is_float_type.any()
1419 fmt_values = []
1420 for i, v in enumerate(vals):
1421 if not is_float_type[i] and leading_space:
1422 fmt_values.append(f" {_format(v)}")
1423 elif is_float_type[i]:
1424 fmt_values.append(float_format(v))
1425 else:
1426 if leading_space is False:
1427 # False specifically, so that the default is
1428 # to include a space if we get here.
1429 tpl = "{v}"
1430 else:
1431 tpl = " {v}"
1432 fmt_values.append(tpl.format(v=_format(v)))
1434 return fmt_values
1437class FloatArrayFormatter(GenericArrayFormatter):
1438 def __init__(self, *args, **kwargs) -> None:
1439 super().__init__(*args, **kwargs)
1441 # float_format is expected to be a string
1442 # formatter should be used to pass a function
1443 if self.float_format is not None and self.formatter is None:
1444 # GH21625, GH22270
1445 self.fixed_width = False
1446 if callable(self.float_format):
1447 self.formatter = self.float_format
1448 self.float_format = None
1450 def _value_formatter(
1451 self,
1452 float_format: FloatFormatType | None = None,
1453 threshold: float | None = None,
1454 ) -> Callable:
1455 """Returns a function to be applied on each value to format it"""
1456 # the float_format parameter supersedes self.float_format
1457 if float_format is None:
1458 float_format = self.float_format
1460 # we are going to compose different functions, to first convert to
1461 # a string, then replace the decimal symbol, and finally chop according
1462 # to the threshold
1464 # when there is no float_format, we use str instead of '%g'
1465 # because str(0.0) = '0.0' while '%g' % 0.0 = '0'
1466 if float_format:
1468 def base_formatter(v):
1469 assert float_format is not None # for mypy
1470 # error: "str" not callable
1471 # error: Unexpected keyword argument "value" for "__call__" of
1472 # "EngFormatter"
1473 return (
1474 float_format(value=v) # type: ignore[operator,call-arg]
1475 if notna(v)
1476 else self.na_rep
1477 )
1479 else:
1481 def base_formatter(v):
1482 return str(v) if notna(v) else self.na_rep
1484 if self.decimal != ".":
1486 def decimal_formatter(v):
1487 return base_formatter(v).replace(".", self.decimal, 1)
1489 else:
1490 decimal_formatter = base_formatter
1492 if threshold is None:
1493 return decimal_formatter
1495 def formatter(value):
1496 if notna(value):
1497 if abs(value) > threshold:
1498 return decimal_formatter(value)
1499 else:
1500 return decimal_formatter(0.0)
1501 else:
1502 return self.na_rep
1504 return formatter
1506 def get_result_as_array(self) -> np.ndarray:
1507 """
1508 Returns the float values converted into strings using
1509 the parameters given at initialisation, as a numpy array
1510 """
1512 def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str):
1513 mask = isna(values)
1514 formatted = np.array(
1515 [
1516 formatter(val) if not m else na_rep
1517 for val, m in zip(values.ravel(), mask.ravel())
1518 ]
1519 ).reshape(values.shape)
1520 return formatted
1522 if self.formatter is not None:
1523 return format_with_na_rep(self.values, self.formatter, self.na_rep)
1525 if self.fixed_width:
1526 threshold = get_option("display.chop_threshold")
1527 else:
1528 threshold = None
1530 # if we have a fixed_width, we'll need to try different float_format
1531 def format_values_with(float_format):
1532 formatter = self._value_formatter(float_format, threshold)
1534 # default formatter leaves a space to the left when formatting
1535 # floats, must be consistent for left-justifying NaNs (GH #25061)
1536 if self.justify == "left":
1537 na_rep = " " + self.na_rep
1538 else:
1539 na_rep = self.na_rep
1541 # separate the wheat from the chaff
1542 values = self.values
1543 is_complex = is_complex_dtype(values)
1544 values = format_with_na_rep(values, formatter, na_rep)
1546 if self.fixed_width:
1547 if is_complex:
1548 result = _trim_zeros_complex(values, self.decimal)
1549 else:
1550 result = _trim_zeros_float(values, self.decimal)
1551 return np.asarray(result, dtype="object")
1553 return values
1555 # There is a special default string when we are fixed-width
1556 # The default is otherwise to use str instead of a formatting string
1557 float_format: FloatFormatType | None
1558 if self.float_format is None:
1559 if self.fixed_width:
1560 if self.leading_space is True:
1561 fmt_str = "{value: .{digits:d}f}"
1562 else:
1563 fmt_str = "{value:.{digits:d}f}"
1564 float_format = partial(fmt_str.format, digits=self.digits)
1565 else:
1566 float_format = self.float_format
1567 else:
1568 float_format = lambda value: self.float_format % value
1570 formatted_values = format_values_with(float_format)
1572 if not self.fixed_width:
1573 return formatted_values
1575 # we need do convert to engineering format if some values are too small
1576 # and would appear as 0, or if some values are too big and take too
1577 # much space
1579 if len(formatted_values) > 0:
1580 maxlen = max(len(x) for x in formatted_values)
1581 too_long = maxlen > self.digits + 6
1582 else:
1583 too_long = False
1585 with np.errstate(invalid="ignore"):
1586 abs_vals = np.abs(self.values)
1587 # this is pretty arbitrary for now
1588 # large values: more that 8 characters including decimal symbol
1589 # and first digit, hence > 1e6
1590 has_large_values = (abs_vals > 1e6).any()
1591 has_small_values = (
1592 (abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)
1593 ).any()
1595 if has_small_values or (too_long and has_large_values):
1596 if self.leading_space is True:
1597 fmt_str = "{value: .{digits:d}e}"
1598 else:
1599 fmt_str = "{value:.{digits:d}e}"
1600 float_format = partial(fmt_str.format, digits=self.digits)
1601 formatted_values = format_values_with(float_format)
1603 return formatted_values
1605 def _format_strings(self) -> list[str]:
1606 return list(self.get_result_as_array())
1609class IntArrayFormatter(GenericArrayFormatter):
1610 def _format_strings(self) -> list[str]:
1611 if self.leading_space is False:
1612 formatter_str = lambda x: f"{x:d}".format(x=x)
1613 else:
1614 formatter_str = lambda x: f"{x: d}".format(x=x)
1615 formatter = self.formatter or formatter_str
1616 fmt_values = [formatter(x) for x in self.values]
1617 return fmt_values
1620class Datetime64Formatter(GenericArrayFormatter):
1621 def __init__(
1622 self,
1623 values: np.ndarray | Series | DatetimeIndex | DatetimeArray,
1624 nat_rep: str = "NaT",
1625 date_format: None = None,
1626 **kwargs,
1627 ) -> None:
1628 super().__init__(values, **kwargs)
1629 self.nat_rep = nat_rep
1630 self.date_format = date_format
1632 def _format_strings(self) -> list[str]:
1633 """we by definition have DO NOT have a TZ"""
1634 values = self.values
1636 if not isinstance(values, DatetimeIndex):
1637 values = DatetimeIndex(values)
1639 if self.formatter is not None and callable(self.formatter):
1640 return [self.formatter(x) for x in values]
1642 fmt_values = values._data._format_native_types(
1643 na_rep=self.nat_rep, date_format=self.date_format
1644 )
1645 return fmt_values.tolist()
1648class ExtensionArrayFormatter(GenericArrayFormatter):
1649 def _format_strings(self) -> list[str]:
1650 values = extract_array(self.values, extract_numpy=True)
1652 formatter = self.formatter
1653 if formatter is None:
1654 formatter = values._formatter(boxed=True)
1656 if isinstance(values, Categorical):
1657 # Categorical is special for now, so that we can preserve tzinfo
1658 array = values._internal_get_values()
1659 else:
1660 array = np.asarray(values)
1662 fmt_values = format_array(
1663 array,
1664 formatter,
1665 float_format=self.float_format,
1666 na_rep=self.na_rep,
1667 digits=self.digits,
1668 space=self.space,
1669 justify=self.justify,
1670 decimal=self.decimal,
1671 leading_space=self.leading_space,
1672 quoting=self.quoting,
1673 )
1674 return fmt_values
1677def format_percentiles(
1678 percentiles: (np.ndarray | Sequence[float]),
1679) -> list[str]:
1680 """
1681 Outputs rounded and formatted percentiles.
1683 Parameters
1684 ----------
1685 percentiles : list-like, containing floats from interval [0,1]
1687 Returns
1688 -------
1689 formatted : list of strings
1691 Notes
1692 -----
1693 Rounding precision is chosen so that: (1) if any two elements of
1694 ``percentiles`` differ, they remain different after rounding
1695 (2) no entry is *rounded* to 0% or 100%.
1696 Any non-integer is always rounded to at least 1 decimal place.
1698 Examples
1699 --------
1700 Keeps all entries different after rounding:
1702 >>> format_percentiles([0.01999, 0.02001, 0.5, 0.666666, 0.9999])
1703 ['1.999%', '2.001%', '50%', '66.667%', '99.99%']
1705 No element is rounded to 0% or 100% (unless already equal to it).
1706 Duplicates are allowed:
1708 >>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999])
1709 ['0%', '50%', '2.0%', '50%', '66.67%', '99.99%']
1710 """
1711 percentiles = np.asarray(percentiles)
1713 # It checks for np.NaN as well
1714 with np.errstate(invalid="ignore"):
1715 if (
1716 not is_numeric_dtype(percentiles)
1717 or not np.all(percentiles >= 0)
1718 or not np.all(percentiles <= 1)
1719 ):
1720 raise ValueError("percentiles should all be in the interval [0,1]")
1722 percentiles = 100 * percentiles
1724 int_idx = np.isclose(percentiles.astype(int), percentiles)
1726 if np.all(int_idx):
1727 out = percentiles.astype(int).astype(str)
1728 return [i + "%" for i in out]
1730 unique_pcts = np.unique(percentiles)
1731 to_begin = unique_pcts[0] if unique_pcts[0] > 0 else None
1732 to_end = 100 - unique_pcts[-1] if unique_pcts[-1] < 100 else None
1734 # Least precision that keeps percentiles unique after rounding
1735 prec = -np.floor(
1736 np.log10(np.min(np.ediff1d(unique_pcts, to_begin=to_begin, to_end=to_end)))
1737 ).astype(int)
1738 prec = max(1, prec)
1739 out = np.empty_like(percentiles, dtype=object)
1740 out[int_idx] = percentiles[int_idx].astype(int).astype(str)
1742 out[~int_idx] = percentiles[~int_idx].round(prec).astype(str)
1743 return [i + "%" for i in out]
1746def is_dates_only(values: np.ndarray | DatetimeArray | Index | DatetimeIndex) -> bool:
1747 # return a boolean if we are only dates (and don't have a timezone)
1748 if not isinstance(values, Index):
1749 values = values.ravel()
1751 if not isinstance(values, (DatetimeArray, DatetimeIndex)):
1752 values = DatetimeIndex(values)
1754 if values.tz is not None:
1755 return False
1757 values_int = values.asi8
1758 consider_values = values_int != iNaT
1759 # error: Argument 1 to "py_get_unit_from_dtype" has incompatible type
1760 # "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]"
1761 reso = get_unit_from_dtype(values.dtype) # type: ignore[arg-type]
1762 ppd = periods_per_day(reso)
1764 # TODO: can we reuse is_date_array_normalized? would need a skipna kwd
1765 even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0
1766 if even_days:
1767 return True
1768 return False
1771def _format_datetime64(x: NaTType | Timestamp, nat_rep: str = "NaT") -> str:
1772 if x is NaT:
1773 return nat_rep
1775 # Timestamp.__str__ falls back to datetime.datetime.__str__ = isoformat(sep=' ')
1776 # so it already uses string formatting rather than strftime (faster).
1777 return str(x)
1780def _format_datetime64_dateonly(
1781 x: NaTType | Timestamp,
1782 nat_rep: str = "NaT",
1783 date_format: str | None = None,
1784) -> str:
1785 if isinstance(x, NaTType):
1786 return nat_rep
1788 if date_format:
1789 return x.strftime(date_format)
1790 else:
1791 # Timestamp._date_repr relies on string formatting (faster than strftime)
1792 return x._date_repr
1795def get_format_datetime64(
1796 is_dates_only: bool, nat_rep: str = "NaT", date_format: str | None = None
1797) -> Callable:
1798 """Return a formatter callable taking a datetime64 as input and providing
1799 a string as output"""
1801 if is_dates_only:
1802 return lambda x: _format_datetime64_dateonly(
1803 x, nat_rep=nat_rep, date_format=date_format
1804 )
1805 else:
1806 return lambda x: _format_datetime64(x, nat_rep=nat_rep)
1809def get_format_datetime64_from_values(
1810 values: np.ndarray | DatetimeArray | DatetimeIndex, date_format: str | None
1811) -> str | None:
1812 """given values and a date_format, return a string format"""
1813 if isinstance(values, np.ndarray) and values.ndim > 1:
1814 # We don't actually care about the order of values, and DatetimeIndex
1815 # only accepts 1D values
1816 values = values.ravel()
1818 ido = is_dates_only(values)
1819 if ido:
1820 # Only dates and no timezone: provide a default format
1821 return date_format or "%Y-%m-%d"
1822 return date_format
1825class Datetime64TZFormatter(Datetime64Formatter):
1826 def _format_strings(self) -> list[str]:
1827 """we by definition have a TZ"""
1828 values = self.values.astype(object)
1829 ido = is_dates_only(values)
1830 formatter = self.formatter or get_format_datetime64(
1831 ido, date_format=self.date_format
1832 )
1833 fmt_values = [formatter(x) for x in values]
1835 return fmt_values
1838class Timedelta64Formatter(GenericArrayFormatter):
1839 def __init__(
1840 self,
1841 values: np.ndarray | TimedeltaIndex,
1842 nat_rep: str = "NaT",
1843 box: bool = False,
1844 **kwargs,
1845 ) -> None:
1846 super().__init__(values, **kwargs)
1847 self.nat_rep = nat_rep
1848 self.box = box
1850 def _format_strings(self) -> list[str]:
1851 formatter = self.formatter or get_format_timedelta64(
1852 self.values, nat_rep=self.nat_rep, box=self.box
1853 )
1854 return [formatter(x) for x in self.values]
1857def get_format_timedelta64(
1858 values: np.ndarray | TimedeltaIndex | TimedeltaArray,
1859 nat_rep: str = "NaT",
1860 box: bool = False,
1861) -> Callable:
1862 """
1863 Return a formatter function for a range of timedeltas.
1864 These will all have the same format argument
1866 If box, then show the return in quotes
1867 """
1868 values_int = values.view(np.int64)
1870 consider_values = values_int != iNaT
1872 one_day_nanos = 86400 * 10**9
1873 # error: Unsupported operand types for % ("ExtensionArray" and "int")
1874 not_midnight = values_int % one_day_nanos != 0 # type: ignore[operator]
1875 # error: Argument 1 to "__call__" of "ufunc" has incompatible type
1876 # "Union[Any, ExtensionArray, ndarray]"; expected
1877 # "Union[Union[int, float, complex, str, bytes, generic],
1878 # Sequence[Union[int, float, complex, str, bytes, generic]],
1879 # Sequence[Sequence[Any]], _SupportsArray]"
1880 both = np.logical_and(consider_values, not_midnight) # type: ignore[arg-type]
1881 even_days = both.sum() == 0
1883 if even_days:
1884 format = None
1885 else:
1886 format = "long"
1888 def _formatter(x):
1889 if x is None or (is_scalar(x) and isna(x)):
1890 return nat_rep
1892 if not isinstance(x, Timedelta):
1893 x = Timedelta(x)
1895 # Timedelta._repr_base uses string formatting (faster than strftime)
1896 result = x._repr_base(format=format)
1897 if box:
1898 result = f"'{result}'"
1899 return result
1901 return _formatter
1904def _make_fixed_width(
1905 strings: list[str],
1906 justify: str = "right",
1907 minimum: int | None = None,
1908 adj: TextAdjustment | None = None,
1909) -> list[str]:
1911 if len(strings) == 0 or justify == "all":
1912 return strings
1914 if adj is None:
1915 adjustment = get_adjustment()
1916 else:
1917 adjustment = adj
1919 max_len = max(adjustment.len(x) for x in strings)
1921 if minimum is not None:
1922 max_len = max(minimum, max_len)
1924 conf_max = get_option("display.max_colwidth")
1925 if conf_max is not None and max_len > conf_max:
1926 max_len = conf_max
1928 def just(x: str) -> str:
1929 if conf_max is not None:
1930 if (conf_max > 3) & (adjustment.len(x) > max_len):
1931 x = x[: max_len - 3] + "..."
1932 return x
1934 strings = [just(x) for x in strings]
1935 result = adjustment.justify(strings, max_len, mode=justify)
1936 return result
1939def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> list[str]:
1940 """
1941 Separates the real and imaginary parts from the complex number, and
1942 executes the _trim_zeros_float method on each of those.
1943 """
1944 trimmed = [
1945 "".join(_trim_zeros_float(re.split(r"([j+-])", x), decimal))
1946 for x in str_complexes
1947 ]
1949 # pad strings to the length of the longest trimmed string for alignment
1950 lengths = [len(s) for s in trimmed]
1951 max_length = max(lengths)
1952 padded = [
1953 s[: -((k - 1) // 2 + 1)] # real part
1954 + (max_length - k) // 2 * "0"
1955 + s[-((k - 1) // 2 + 1) : -((k - 1) // 2)] # + / -
1956 + s[-((k - 1) // 2) : -1] # imaginary part
1957 + (max_length - k) // 2 * "0"
1958 + s[-1]
1959 for s, k in zip(trimmed, lengths)
1960 ]
1961 return padded
1964def _trim_zeros_single_float(str_float: str) -> str:
1965 """
1966 Trims trailing zeros after a decimal point,
1967 leaving just one if necessary.
1968 """
1969 str_float = str_float.rstrip("0")
1970 if str_float.endswith("."):
1971 str_float += "0"
1973 return str_float
1976def _trim_zeros_float(
1977 str_floats: np.ndarray | list[str], decimal: str = "."
1978) -> list[str]:
1979 """
1980 Trims the maximum number of trailing zeros equally from
1981 all numbers containing decimals, leaving just one if
1982 necessary.
1983 """
1984 trimmed = str_floats
1985 number_regex = re.compile(rf"^\s*[\+-]?[0-9]+\{decimal}[0-9]*$")
1987 def is_number_with_decimal(x):
1988 return re.match(number_regex, x) is not None
1990 def should_trim(values: np.ndarray | list[str]) -> bool:
1991 """
1992 Determine if an array of strings should be trimmed.
1994 Returns True if all numbers containing decimals (defined by the
1995 above regular expression) within the array end in a zero, otherwise
1996 returns False.
1997 """
1998 numbers = [x for x in values if is_number_with_decimal(x)]
1999 return len(numbers) > 0 and all(x.endswith("0") for x in numbers)
2001 while should_trim(trimmed):
2002 trimmed = [x[:-1] if is_number_with_decimal(x) else x for x in trimmed]
2004 # leave one 0 after the decimal points if need be.
2005 result = [
2006 x + "0" if is_number_with_decimal(x) and x.endswith(decimal) else x
2007 for x in trimmed
2008 ]
2009 return result
2012def _has_names(index: Index) -> bool:
2013 if isinstance(index, MultiIndex):
2014 return com.any_not_none(*index.names)
2015 else:
2016 return index.name is not None
2019class EngFormatter:
2020 """
2021 Formats float values according to engineering format.
2023 Based on matplotlib.ticker.EngFormatter
2024 """
2026 # The SI engineering prefixes
2027 ENG_PREFIXES = {
2028 -24: "y",
2029 -21: "z",
2030 -18: "a",
2031 -15: "f",
2032 -12: "p",
2033 -9: "n",
2034 -6: "u",
2035 -3: "m",
2036 0: "",
2037 3: "k",
2038 6: "M",
2039 9: "G",
2040 12: "T",
2041 15: "P",
2042 18: "E",
2043 21: "Z",
2044 24: "Y",
2045 }
2047 def __init__(
2048 self, accuracy: int | None = None, use_eng_prefix: bool = False
2049 ) -> None:
2050 self.accuracy = accuracy
2051 self.use_eng_prefix = use_eng_prefix
2053 def __call__(self, num: float) -> str:
2054 """
2055 Formats a number in engineering notation, appending a letter
2056 representing the power of 1000 of the original number. Some examples:
2057 >>> format_eng = EngFormatter(accuracy=0, use_eng_prefix=True)
2058 >>> format_eng(0)
2059 ' 0'
2060 >>> format_eng = EngFormatter(accuracy=1, use_eng_prefix=True)
2061 >>> format_eng(1_000_000)
2062 ' 1.0M'
2063 >>> format_eng = EngFormatter(accuracy=2, use_eng_prefix=False)
2064 >>> format_eng("-1e-6")
2065 '-1.00E-06'
2067 @param num: the value to represent
2068 @type num: either a numeric value or a string that can be converted to
2069 a numeric value (as per decimal.Decimal constructor)
2071 @return: engineering formatted string
2072 """
2073 dnum = decimal.Decimal(str(num))
2075 if decimal.Decimal.is_nan(dnum):
2076 return "NaN"
2078 if decimal.Decimal.is_infinite(dnum):
2079 return "inf"
2081 sign = 1
2083 if dnum < 0: # pragma: no cover
2084 sign = -1
2085 dnum = -dnum
2087 if dnum != 0:
2088 pow10 = decimal.Decimal(int(math.floor(dnum.log10() / 3) * 3))
2089 else:
2090 pow10 = decimal.Decimal(0)
2092 pow10 = pow10.min(max(self.ENG_PREFIXES.keys()))
2093 pow10 = pow10.max(min(self.ENG_PREFIXES.keys()))
2094 int_pow10 = int(pow10)
2096 if self.use_eng_prefix:
2097 prefix = self.ENG_PREFIXES[int_pow10]
2098 else:
2099 if int_pow10 < 0:
2100 prefix = f"E-{-int_pow10:02d}"
2101 else:
2102 prefix = f"E+{int_pow10:02d}"
2104 mant = sign * dnum / (10**pow10)
2106 if self.accuracy is None: # pragma: no cover
2107 format_str = "{mant: g}{prefix}"
2108 else:
2109 format_str = f"{{mant: .{self.accuracy:d}f}}{{prefix}}"
2111 formatted = format_str.format(mant=mant, prefix=prefix)
2113 return formatted
2116def set_eng_float_format(accuracy: int = 3, use_eng_prefix: bool = False) -> None:
2117 """
2118 Alter default behavior on how float is formatted in DataFrame.
2119 Format float in engineering format. By accuracy, we mean the number of
2120 decimal digits after the floating point.
2122 See also EngFormatter.
2123 """
2124 set_option("display.float_format", EngFormatter(accuracy, use_eng_prefix))
2127def get_level_lengths(
2128 levels: Any, sentinel: bool | object | str = ""
2129) -> list[dict[int, int]]:
2130 """
2131 For each index in each level the function returns lengths of indexes.
2133 Parameters
2134 ----------
2135 levels : list of lists
2136 List of values on for level.
2137 sentinel : string, optional
2138 Value which states that no new index starts on there.
2140 Returns
2141 -------
2142 Returns list of maps. For each level returns map of indexes (key is index
2143 in row and value is length of index).
2144 """
2145 if len(levels) == 0:
2146 return []
2148 control = [True] * len(levels[0])
2150 result = []
2151 for level in levels:
2152 last_index = 0
2154 lengths = {}
2155 for i, key in enumerate(level):
2156 if control[i] and key == sentinel:
2157 pass
2158 else:
2159 control[i] = False
2160 lengths[last_index] = i - last_index
2161 last_index = i
2163 lengths[last_index] = len(level) - last_index
2165 result.append(lengths)
2167 return result
2170def buffer_put_lines(buf: WriteBuffer[str], lines: list[str]) -> None:
2171 """
2172 Appends lines to a buffer.
2174 Parameters
2175 ----------
2176 buf
2177 The buffer to write to
2178 lines
2179 The lines to append.
2180 """
2181 if any(isinstance(x, str) for x in lines):
2182 lines = [str(x) for x in lines]
2183 buf.write("\n".join(lines))