Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/io/formats/info.py: 44%
363 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1from __future__ import annotations
3from abc import (
4 ABC,
5 abstractmethod,
6)
7import sys
8from textwrap import dedent
9from typing import (
10 TYPE_CHECKING,
11 Iterable,
12 Iterator,
13 Mapping,
14 Sequence,
15)
17from pandas._config import get_option
19from pandas._typing import (
20 Dtype,
21 WriteBuffer,
22)
24from pandas.io.formats import format as fmt
25from pandas.io.formats.printing import pprint_thing
27if TYPE_CHECKING: 27 ↛ 28line 27 didn't jump to line 28, because the condition on line 27 was never true
28 from pandas import (
29 DataFrame,
30 Index,
31 Series,
32 )
35frame_max_cols_sub = dedent(
36 """\
37 max_cols : int, optional
38 When to switch from the verbose to the truncated output. If the
39 DataFrame has more than `max_cols` columns, the truncated output
40 is used. By default, the setting in
41 ``pandas.options.display.max_info_columns`` is used."""
42)
45show_counts_sub = dedent(
46 """\
47 show_counts : bool, optional
48 Whether to show the non-null counts. By default, this is shown
49 only if the DataFrame is smaller than
50 ``pandas.options.display.max_info_rows`` and
51 ``pandas.options.display.max_info_columns``. A value of True always
52 shows the counts, and False never shows the counts."""
53)
55null_counts_sub = dedent(
56 """
57 null_counts : bool, optional
58 .. deprecated:: 1.2.0
59 Use show_counts instead."""
60)
63frame_examples_sub = dedent(
64 """\
65 >>> int_values = [1, 2, 3, 4, 5]
66 >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']
67 >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0]
68 >>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values,
69 ... "float_col": float_values})
70 >>> df
71 int_col text_col float_col
72 0 1 alpha 0.00
73 1 2 beta 0.25
74 2 3 gamma 0.50
75 3 4 delta 0.75
76 4 5 epsilon 1.00
78 Prints information of all columns:
80 >>> df.info(verbose=True)
81 <class 'pandas.core.frame.DataFrame'>
82 RangeIndex: 5 entries, 0 to 4
83 Data columns (total 3 columns):
84 # Column Non-Null Count Dtype
85 --- ------ -------------- -----
86 0 int_col 5 non-null int64
87 1 text_col 5 non-null object
88 2 float_col 5 non-null float64
89 dtypes: float64(1), int64(1), object(1)
90 memory usage: 248.0+ bytes
92 Prints a summary of columns count and its dtypes but not per column
93 information:
95 >>> df.info(verbose=False)
96 <class 'pandas.core.frame.DataFrame'>
97 RangeIndex: 5 entries, 0 to 4
98 Columns: 3 entries, int_col to float_col
99 dtypes: float64(1), int64(1), object(1)
100 memory usage: 248.0+ bytes
102 Pipe output of DataFrame.info to buffer instead of sys.stdout, get
103 buffer content and writes to a text file:
105 >>> import io
106 >>> buffer = io.StringIO()
107 >>> df.info(buf=buffer)
108 >>> s = buffer.getvalue()
109 >>> with open("df_info.txt", "w",
110 ... encoding="utf-8") as f: # doctest: +SKIP
111 ... f.write(s)
112 260
114 The `memory_usage` parameter allows deep introspection mode, specially
115 useful for big DataFrames and fine-tune memory optimization:
117 >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)
118 >>> df = pd.DataFrame({
119 ... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6),
120 ... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6),
121 ... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6)
122 ... })
123 >>> df.info()
124 <class 'pandas.core.frame.DataFrame'>
125 RangeIndex: 1000000 entries, 0 to 999999
126 Data columns (total 3 columns):
127 # Column Non-Null Count Dtype
128 --- ------ -------------- -----
129 0 column_1 1000000 non-null object
130 1 column_2 1000000 non-null object
131 2 column_3 1000000 non-null object
132 dtypes: object(3)
133 memory usage: 22.9+ MB
135 >>> df.info(memory_usage='deep')
136 <class 'pandas.core.frame.DataFrame'>
137 RangeIndex: 1000000 entries, 0 to 999999
138 Data columns (total 3 columns):
139 # Column Non-Null Count Dtype
140 --- ------ -------------- -----
141 0 column_1 1000000 non-null object
142 1 column_2 1000000 non-null object
143 2 column_3 1000000 non-null object
144 dtypes: object(3)
145 memory usage: 165.9 MB"""
146)
149frame_see_also_sub = dedent(
150 """\
151 DataFrame.describe: Generate descriptive statistics of DataFrame
152 columns.
153 DataFrame.memory_usage: Memory usage of DataFrame columns."""
154)
157frame_sub_kwargs = {
158 "klass": "DataFrame",
159 "type_sub": " and columns",
160 "max_cols_sub": frame_max_cols_sub,
161 "show_counts_sub": show_counts_sub,
162 "null_counts_sub": null_counts_sub,
163 "examples_sub": frame_examples_sub,
164 "see_also_sub": frame_see_also_sub,
165 "version_added_sub": "",
166}
169series_examples_sub = dedent(
170 """\
171 >>> int_values = [1, 2, 3, 4, 5]
172 >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']
173 >>> s = pd.Series(text_values, index=int_values)
174 >>> s.info()
175 <class 'pandas.core.series.Series'>
176 Int64Index: 5 entries, 1 to 5
177 Series name: None
178 Non-Null Count Dtype
179 -------------- -----
180 5 non-null object
181 dtypes: object(1)
182 memory usage: 80.0+ bytes
184 Prints a summary excluding information about its values:
186 >>> s.info(verbose=False)
187 <class 'pandas.core.series.Series'>
188 Int64Index: 5 entries, 1 to 5
189 dtypes: object(1)
190 memory usage: 80.0+ bytes
192 Pipe output of Series.info to buffer instead of sys.stdout, get
193 buffer content and writes to a text file:
195 >>> import io
196 >>> buffer = io.StringIO()
197 >>> s.info(buf=buffer)
198 >>> s = buffer.getvalue()
199 >>> with open("df_info.txt", "w",
200 ... encoding="utf-8") as f: # doctest: +SKIP
201 ... f.write(s)
202 260
204 The `memory_usage` parameter allows deep introspection mode, specially
205 useful for big Series and fine-tune memory optimization:
207 >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)
208 >>> s = pd.Series(np.random.choice(['a', 'b', 'c'], 10 ** 6))
209 >>> s.info()
210 <class 'pandas.core.series.Series'>
211 RangeIndex: 1000000 entries, 0 to 999999
212 Series name: None
213 Non-Null Count Dtype
214 -------------- -----
215 1000000 non-null object
216 dtypes: object(1)
217 memory usage: 7.6+ MB
219 >>> s.info(memory_usage='deep')
220 <class 'pandas.core.series.Series'>
221 RangeIndex: 1000000 entries, 0 to 999999
222 Series name: None
223 Non-Null Count Dtype
224 -------------- -----
225 1000000 non-null object
226 dtypes: object(1)
227 memory usage: 55.3 MB"""
228)
231series_see_also_sub = dedent(
232 """\
233 Series.describe: Generate descriptive statistics of Series.
234 Series.memory_usage: Memory usage of Series."""
235)
238series_sub_kwargs = {
239 "klass": "Series",
240 "type_sub": "",
241 "max_cols_sub": "",
242 "show_counts_sub": show_counts_sub,
243 "null_counts_sub": "",
244 "examples_sub": series_examples_sub,
245 "see_also_sub": series_see_also_sub,
246 "version_added_sub": "\n.. versionadded:: 1.4.0\n",
247}
250INFO_DOCSTRING = dedent(
251 """
252 Print a concise summary of a {klass}.
254 This method prints information about a {klass} including
255 the index dtype{type_sub}, non-null values and memory usage.
256 {version_added_sub}\
258 Parameters
259 ----------
260 verbose : bool, optional
261 Whether to print the full summary. By default, the setting in
262 ``pandas.options.display.max_info_columns`` is followed.
263 buf : writable buffer, defaults to sys.stdout
264 Where to send the output. By default, the output is printed to
265 sys.stdout. Pass a writable buffer if you need to further process
266 the output.\
267 {max_cols_sub}
268 memory_usage : bool, str, optional
269 Specifies whether total memory usage of the {klass}
270 elements (including the index) should be displayed. By default,
271 this follows the ``pandas.options.display.memory_usage`` setting.
273 True always show memory usage. False never shows memory usage.
274 A value of 'deep' is equivalent to "True with deep introspection".
275 Memory usage is shown in human-readable units (base-2
276 representation). Without deep introspection a memory estimation is
277 made based in column dtype and number of rows assuming values
278 consume the same memory amount for corresponding dtypes. With deep
279 memory introspection, a real memory usage calculation is performed
280 at the cost of computational resources. See the
281 :ref:`Frequently Asked Questions <df-memory-usage>` for more
282 details.
283 {show_counts_sub}{null_counts_sub}
285 Returns
286 -------
287 None
288 This method prints a summary of a {klass} and returns None.
290 See Also
291 --------
292 {see_also_sub}
294 Examples
295 --------
296 {examples_sub}
297 """
298)
301def _put_str(s: str | Dtype, space: int) -> str:
302 """
303 Make string of specified length, padding to the right if necessary.
305 Parameters
306 ----------
307 s : Union[str, Dtype]
308 String to be formatted.
309 space : int
310 Length to force string to be of.
312 Returns
313 -------
314 str
315 String coerced to given length.
317 Examples
318 --------
319 >>> pd.io.formats.info._put_str("panda", 6)
320 'panda '
321 >>> pd.io.formats.info._put_str("panda", 4)
322 'pand'
323 """
324 return str(s)[:space].ljust(space)
327def _sizeof_fmt(num: float, size_qualifier: str) -> str:
328 """
329 Return size in human readable format.
331 Parameters
332 ----------
333 num : int
334 Size in bytes.
335 size_qualifier : str
336 Either empty, or '+' (if lower bound).
338 Returns
339 -------
340 str
341 Size in human readable format.
343 Examples
344 --------
345 >>> _sizeof_fmt(23028, '')
346 '22.5 KB'
348 >>> _sizeof_fmt(23028, '+')
349 '22.5+ KB'
350 """
351 for x in ["bytes", "KB", "MB", "GB", "TB"]:
352 if num < 1024.0:
353 return f"{num:3.1f}{size_qualifier} {x}"
354 num /= 1024.0
355 return f"{num:3.1f}{size_qualifier} PB"
358def _initialize_memory_usage(
359 memory_usage: bool | str | None = None,
360) -> bool | str:
361 """Get memory usage based on inputs and display options."""
362 if memory_usage is None:
363 memory_usage = get_option("display.memory_usage")
364 return memory_usage
367class BaseInfo(ABC):
368 """
369 Base class for DataFrameInfo and SeriesInfo.
371 Parameters
372 ----------
373 data : DataFrame or Series
374 Either dataframe or series.
375 memory_usage : bool or str, optional
376 If "deep", introspect the data deeply by interrogating object dtypes
377 for system-level memory consumption, and include it in the returned
378 values.
379 """
381 data: DataFrame | Series
382 memory_usage: bool | str
384 @property
385 @abstractmethod
386 def dtypes(self) -> Iterable[Dtype]:
387 """
388 Dtypes.
390 Returns
391 -------
392 dtypes : sequence
393 Dtype of each of the DataFrame's columns (or one series column).
394 """
396 @property
397 @abstractmethod
398 def dtype_counts(self) -> Mapping[str, int]:
399 """Mapping dtype - number of counts."""
401 @property
402 @abstractmethod
403 def non_null_counts(self) -> Sequence[int]:
404 """Sequence of non-null counts for all columns or column (if series)."""
406 @property
407 @abstractmethod
408 def memory_usage_bytes(self) -> int:
409 """
410 Memory usage in bytes.
412 Returns
413 -------
414 memory_usage_bytes : int
415 Object's total memory usage in bytes.
416 """
418 @property
419 def memory_usage_string(self) -> str:
420 """Memory usage in a form of human readable string."""
421 return f"{_sizeof_fmt(self.memory_usage_bytes, self.size_qualifier)}\n"
423 @property
424 def size_qualifier(self) -> str:
425 size_qualifier = ""
426 if self.memory_usage:
427 if self.memory_usage != "deep":
428 # size_qualifier is just a best effort; not guaranteed to catch
429 # all cases (e.g., it misses categorical data even with object
430 # categories)
431 if (
432 "object" in self.dtype_counts
433 or self.data.index._is_memory_usage_qualified()
434 ):
435 size_qualifier = "+"
436 return size_qualifier
438 @abstractmethod
439 def render(
440 self,
441 *,
442 buf: WriteBuffer[str] | None,
443 max_cols: int | None,
444 verbose: bool | None,
445 show_counts: bool | None,
446 ) -> None:
447 pass
450class DataFrameInfo(BaseInfo):
451 """
452 Class storing dataframe-specific info.
453 """
455 def __init__(
456 self,
457 data: DataFrame,
458 memory_usage: bool | str | None = None,
459 ) -> None:
460 self.data: DataFrame = data
461 self.memory_usage = _initialize_memory_usage(memory_usage)
463 @property
464 def dtype_counts(self) -> Mapping[str, int]:
465 return _get_dataframe_dtype_counts(self.data)
467 @property
468 def dtypes(self) -> Iterable[Dtype]:
469 """
470 Dtypes.
472 Returns
473 -------
474 dtypes
475 Dtype of each of the DataFrame's columns.
476 """
477 return self.data.dtypes
479 @property
480 def ids(self) -> Index:
481 """
482 Column names.
484 Returns
485 -------
486 ids : Index
487 DataFrame's column names.
488 """
489 return self.data.columns
491 @property
492 def col_count(self) -> int:
493 """Number of columns to be summarized."""
494 return len(self.ids)
496 @property
497 def non_null_counts(self) -> Sequence[int]:
498 """Sequence of non-null counts for all columns or column (if series)."""
499 return self.data.count()
501 @property
502 def memory_usage_bytes(self) -> int:
503 if self.memory_usage == "deep":
504 deep = True
505 else:
506 deep = False
507 return self.data.memory_usage(index=True, deep=deep).sum()
509 def render(
510 self,
511 *,
512 buf: WriteBuffer[str] | None,
513 max_cols: int | None,
514 verbose: bool | None,
515 show_counts: bool | None,
516 ) -> None:
517 printer = DataFrameInfoPrinter(
518 info=self,
519 max_cols=max_cols,
520 verbose=verbose,
521 show_counts=show_counts,
522 )
523 printer.to_buffer(buf)
526class SeriesInfo(BaseInfo):
527 """
528 Class storing series-specific info.
529 """
531 def __init__(
532 self,
533 data: Series,
534 memory_usage: bool | str | None = None,
535 ) -> None:
536 self.data: Series = data
537 self.memory_usage = _initialize_memory_usage(memory_usage)
539 def render(
540 self,
541 *,
542 buf: WriteBuffer[str] | None = None,
543 max_cols: int | None = None,
544 verbose: bool | None = None,
545 show_counts: bool | None = None,
546 ) -> None:
547 if max_cols is not None:
548 raise ValueError(
549 "Argument `max_cols` can only be passed "
550 "in DataFrame.info, not Series.info"
551 )
552 printer = SeriesInfoPrinter(
553 info=self,
554 verbose=verbose,
555 show_counts=show_counts,
556 )
557 printer.to_buffer(buf)
559 @property
560 def non_null_counts(self) -> Sequence[int]:
561 return [self.data.count()]
563 @property
564 def dtypes(self) -> Iterable[Dtype]:
565 return [self.data.dtypes]
567 @property
568 def dtype_counts(self) -> Mapping[str, int]:
569 from pandas.core.frame import DataFrame
571 return _get_dataframe_dtype_counts(DataFrame(self.data))
573 @property
574 def memory_usage_bytes(self) -> int:
575 """Memory usage in bytes.
577 Returns
578 -------
579 memory_usage_bytes : int
580 Object's total memory usage in bytes.
581 """
582 if self.memory_usage == "deep":
583 deep = True
584 else:
585 deep = False
586 return self.data.memory_usage(index=True, deep=deep)
589class InfoPrinterAbstract:
590 """
591 Class for printing dataframe or series info.
592 """
594 def to_buffer(self, buf: WriteBuffer[str] | None = None) -> None:
595 """Save dataframe info into buffer."""
596 table_builder = self._create_table_builder()
597 lines = table_builder.get_lines()
598 if buf is None: # pragma: no cover
599 buf = sys.stdout
600 fmt.buffer_put_lines(buf, lines)
602 @abstractmethod
603 def _create_table_builder(self) -> TableBuilderAbstract:
604 """Create instance of table builder."""
607class DataFrameInfoPrinter(InfoPrinterAbstract):
608 """
609 Class for printing dataframe info.
611 Parameters
612 ----------
613 info : DataFrameInfo
614 Instance of DataFrameInfo.
615 max_cols : int, optional
616 When to switch from the verbose to the truncated output.
617 verbose : bool, optional
618 Whether to print the full summary.
619 show_counts : bool, optional
620 Whether to show the non-null counts.
621 """
623 def __init__(
624 self,
625 info: DataFrameInfo,
626 max_cols: int | None = None,
627 verbose: bool | None = None,
628 show_counts: bool | None = None,
629 ) -> None:
630 self.info = info
631 self.data = info.data
632 self.verbose = verbose
633 self.max_cols = self._initialize_max_cols(max_cols)
634 self.show_counts = self._initialize_show_counts(show_counts)
636 @property
637 def max_rows(self) -> int:
638 """Maximum info rows to be displayed."""
639 return get_option("display.max_info_rows", len(self.data) + 1)
641 @property
642 def exceeds_info_cols(self) -> bool:
643 """Check if number of columns to be summarized does not exceed maximum."""
644 return bool(self.col_count > self.max_cols)
646 @property
647 def exceeds_info_rows(self) -> bool:
648 """Check if number of rows to be summarized does not exceed maximum."""
649 return bool(len(self.data) > self.max_rows)
651 @property
652 def col_count(self) -> int:
653 """Number of columns to be summarized."""
654 return self.info.col_count
656 def _initialize_max_cols(self, max_cols: int | None) -> int:
657 if max_cols is None:
658 return get_option("display.max_info_columns", self.col_count + 1)
659 return max_cols
661 def _initialize_show_counts(self, show_counts: bool | None) -> bool:
662 if show_counts is None:
663 return bool(not self.exceeds_info_cols and not self.exceeds_info_rows)
664 else:
665 return show_counts
667 def _create_table_builder(self) -> DataFrameTableBuilder:
668 """
669 Create instance of table builder based on verbosity and display settings.
670 """
671 if self.verbose:
672 return DataFrameTableBuilderVerbose(
673 info=self.info,
674 with_counts=self.show_counts,
675 )
676 elif self.verbose is False: # specifically set to False, not necessarily None
677 return DataFrameTableBuilderNonVerbose(info=self.info)
678 else:
679 if self.exceeds_info_cols:
680 return DataFrameTableBuilderNonVerbose(info=self.info)
681 else:
682 return DataFrameTableBuilderVerbose(
683 info=self.info,
684 with_counts=self.show_counts,
685 )
688class SeriesInfoPrinter(InfoPrinterAbstract):
689 """Class for printing series info.
691 Parameters
692 ----------
693 info : SeriesInfo
694 Instance of SeriesInfo.
695 verbose : bool, optional
696 Whether to print the full summary.
697 show_counts : bool, optional
698 Whether to show the non-null counts.
699 """
701 def __init__(
702 self,
703 info: SeriesInfo,
704 verbose: bool | None = None,
705 show_counts: bool | None = None,
706 ) -> None:
707 self.info = info
708 self.data = info.data
709 self.verbose = verbose
710 self.show_counts = self._initialize_show_counts(show_counts)
712 def _create_table_builder(self) -> SeriesTableBuilder:
713 """
714 Create instance of table builder based on verbosity.
715 """
716 if self.verbose or self.verbose is None:
717 return SeriesTableBuilderVerbose(
718 info=self.info,
719 with_counts=self.show_counts,
720 )
721 else:
722 return SeriesTableBuilderNonVerbose(info=self.info)
724 def _initialize_show_counts(self, show_counts: bool | None) -> bool:
725 if show_counts is None:
726 return True
727 else:
728 return show_counts
731class TableBuilderAbstract(ABC):
732 """
733 Abstract builder for info table.
734 """
736 _lines: list[str]
737 info: BaseInfo
739 @abstractmethod
740 def get_lines(self) -> list[str]:
741 """Product in a form of list of lines (strings)."""
743 @property
744 def data(self) -> DataFrame | Series:
745 return self.info.data
747 @property
748 def dtypes(self) -> Iterable[Dtype]:
749 """Dtypes of each of the DataFrame's columns."""
750 return self.info.dtypes
752 @property
753 def dtype_counts(self) -> Mapping[str, int]:
754 """Mapping dtype - number of counts."""
755 return self.info.dtype_counts
757 @property
758 def display_memory_usage(self) -> bool:
759 """Whether to display memory usage."""
760 return bool(self.info.memory_usage)
762 @property
763 def memory_usage_string(self) -> str:
764 """Memory usage string with proper size qualifier."""
765 return self.info.memory_usage_string
767 @property
768 def non_null_counts(self) -> Sequence[int]:
769 return self.info.non_null_counts
771 def add_object_type_line(self) -> None:
772 """Add line with string representation of dataframe to the table."""
773 self._lines.append(str(type(self.data)))
775 def add_index_range_line(self) -> None:
776 """Add line with range of indices to the table."""
777 self._lines.append(self.data.index._summary())
779 def add_dtypes_line(self) -> None:
780 """Add summary line with dtypes present in dataframe."""
781 collected_dtypes = [
782 f"{key}({val:d})" for key, val in sorted(self.dtype_counts.items())
783 ]
784 self._lines.append(f"dtypes: {', '.join(collected_dtypes)}")
787class DataFrameTableBuilder(TableBuilderAbstract):
788 """
789 Abstract builder for dataframe info table.
791 Parameters
792 ----------
793 info : DataFrameInfo.
794 Instance of DataFrameInfo.
795 """
797 def __init__(self, *, info: DataFrameInfo) -> None:
798 self.info: DataFrameInfo = info
800 def get_lines(self) -> list[str]:
801 self._lines = []
802 if self.col_count == 0:
803 self._fill_empty_info()
804 else:
805 self._fill_non_empty_info()
806 return self._lines
808 def _fill_empty_info(self) -> None:
809 """Add lines to the info table, pertaining to empty dataframe."""
810 self.add_object_type_line()
811 self.add_index_range_line()
812 self._lines.append(f"Empty {type(self.data).__name__}\n")
814 @abstractmethod
815 def _fill_non_empty_info(self) -> None:
816 """Add lines to the info table, pertaining to non-empty dataframe."""
818 @property
819 def data(self) -> DataFrame:
820 """DataFrame."""
821 return self.info.data
823 @property
824 def ids(self) -> Index:
825 """Dataframe columns."""
826 return self.info.ids
828 @property
829 def col_count(self) -> int:
830 """Number of dataframe columns to be summarized."""
831 return self.info.col_count
833 def add_memory_usage_line(self) -> None:
834 """Add line containing memory usage."""
835 self._lines.append(f"memory usage: {self.memory_usage_string}")
838class DataFrameTableBuilderNonVerbose(DataFrameTableBuilder):
839 """
840 Dataframe info table builder for non-verbose output.
841 """
843 def _fill_non_empty_info(self) -> None:
844 """Add lines to the info table, pertaining to non-empty dataframe."""
845 self.add_object_type_line()
846 self.add_index_range_line()
847 self.add_columns_summary_line()
848 self.add_dtypes_line()
849 if self.display_memory_usage:
850 self.add_memory_usage_line()
852 def add_columns_summary_line(self) -> None:
853 self._lines.append(self.ids._summary(name="Columns"))
856class TableBuilderVerboseMixin(TableBuilderAbstract):
857 """
858 Mixin for verbose info output.
859 """
861 SPACING: str = " " * 2
862 strrows: Sequence[Sequence[str]]
863 gross_column_widths: Sequence[int]
864 with_counts: bool
866 @property
867 @abstractmethod
868 def headers(self) -> Sequence[str]:
869 """Headers names of the columns in verbose table."""
871 @property
872 def header_column_widths(self) -> Sequence[int]:
873 """Widths of header columns (only titles)."""
874 return [len(col) for col in self.headers]
876 def _get_gross_column_widths(self) -> Sequence[int]:
877 """Get widths of columns containing both headers and actual content."""
878 body_column_widths = self._get_body_column_widths()
879 return [
880 max(*widths)
881 for widths in zip(self.header_column_widths, body_column_widths)
882 ]
884 def _get_body_column_widths(self) -> Sequence[int]:
885 """Get widths of table content columns."""
886 strcols: Sequence[Sequence[str]] = list(zip(*self.strrows))
887 return [max(len(x) for x in col) for col in strcols]
889 def _gen_rows(self) -> Iterator[Sequence[str]]:
890 """
891 Generator function yielding rows content.
893 Each element represents a row comprising a sequence of strings.
894 """
895 if self.with_counts:
896 return self._gen_rows_with_counts()
897 else:
898 return self._gen_rows_without_counts()
900 @abstractmethod
901 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:
902 """Iterator with string representation of body data with counts."""
904 @abstractmethod
905 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:
906 """Iterator with string representation of body data without counts."""
908 def add_header_line(self) -> None:
909 header_line = self.SPACING.join(
910 [
911 _put_str(header, col_width)
912 for header, col_width in zip(self.headers, self.gross_column_widths)
913 ]
914 )
915 self._lines.append(header_line)
917 def add_separator_line(self) -> None:
918 separator_line = self.SPACING.join(
919 [
920 _put_str("-" * header_colwidth, gross_colwidth)
921 for header_colwidth, gross_colwidth in zip(
922 self.header_column_widths, self.gross_column_widths
923 )
924 ]
925 )
926 self._lines.append(separator_line)
928 def add_body_lines(self) -> None:
929 for row in self.strrows:
930 body_line = self.SPACING.join(
931 [
932 _put_str(col, gross_colwidth)
933 for col, gross_colwidth in zip(row, self.gross_column_widths)
934 ]
935 )
936 self._lines.append(body_line)
938 def _gen_non_null_counts(self) -> Iterator[str]:
939 """Iterator with string representation of non-null counts."""
940 for count in self.non_null_counts:
941 yield f"{count} non-null"
943 def _gen_dtypes(self) -> Iterator[str]:
944 """Iterator with string representation of column dtypes."""
945 for dtype in self.dtypes:
946 yield pprint_thing(dtype)
949class DataFrameTableBuilderVerbose(DataFrameTableBuilder, TableBuilderVerboseMixin):
950 """
951 Dataframe info table builder for verbose output.
952 """
954 def __init__(
955 self,
956 *,
957 info: DataFrameInfo,
958 with_counts: bool,
959 ) -> None:
960 self.info = info
961 self.with_counts = with_counts
962 self.strrows: Sequence[Sequence[str]] = list(self._gen_rows())
963 self.gross_column_widths: Sequence[int] = self._get_gross_column_widths()
965 def _fill_non_empty_info(self) -> None:
966 """Add lines to the info table, pertaining to non-empty dataframe."""
967 self.add_object_type_line()
968 self.add_index_range_line()
969 self.add_columns_summary_line()
970 self.add_header_line()
971 self.add_separator_line()
972 self.add_body_lines()
973 self.add_dtypes_line()
974 if self.display_memory_usage:
975 self.add_memory_usage_line()
977 @property
978 def headers(self) -> Sequence[str]:
979 """Headers names of the columns in verbose table."""
980 if self.with_counts:
981 return [" # ", "Column", "Non-Null Count", "Dtype"]
982 return [" # ", "Column", "Dtype"]
984 def add_columns_summary_line(self) -> None:
985 self._lines.append(f"Data columns (total {self.col_count} columns):")
987 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:
988 """Iterator with string representation of body data without counts."""
989 yield from zip(
990 self._gen_line_numbers(),
991 self._gen_columns(),
992 self._gen_dtypes(),
993 )
995 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:
996 """Iterator with string representation of body data with counts."""
997 yield from zip(
998 self._gen_line_numbers(),
999 self._gen_columns(),
1000 self._gen_non_null_counts(),
1001 self._gen_dtypes(),
1002 )
1004 def _gen_line_numbers(self) -> Iterator[str]:
1005 """Iterator with string representation of column numbers."""
1006 for i, _ in enumerate(self.ids):
1007 yield f" {i}"
1009 def _gen_columns(self) -> Iterator[str]:
1010 """Iterator with string representation of column names."""
1011 for col in self.ids:
1012 yield pprint_thing(col)
1015class SeriesTableBuilder(TableBuilderAbstract):
1016 """
1017 Abstract builder for series info table.
1019 Parameters
1020 ----------
1021 info : SeriesInfo.
1022 Instance of SeriesInfo.
1023 """
1025 def __init__(self, *, info: SeriesInfo) -> None:
1026 self.info: SeriesInfo = info
1028 def get_lines(self) -> list[str]:
1029 self._lines = []
1030 self._fill_non_empty_info()
1031 return self._lines
1033 @property
1034 def data(self) -> Series:
1035 """Series."""
1036 return self.info.data
1038 def add_memory_usage_line(self) -> None:
1039 """Add line containing memory usage."""
1040 self._lines.append(f"memory usage: {self.memory_usage_string}")
1042 @abstractmethod
1043 def _fill_non_empty_info(self) -> None:
1044 """Add lines to the info table, pertaining to non-empty series."""
1047class SeriesTableBuilderNonVerbose(SeriesTableBuilder):
1048 """
1049 Series info table builder for non-verbose output.
1050 """
1052 def _fill_non_empty_info(self) -> None:
1053 """Add lines to the info table, pertaining to non-empty series."""
1054 self.add_object_type_line()
1055 self.add_index_range_line()
1056 self.add_dtypes_line()
1057 if self.display_memory_usage:
1058 self.add_memory_usage_line()
1061class SeriesTableBuilderVerbose(SeriesTableBuilder, TableBuilderVerboseMixin):
1062 """
1063 Series info table builder for verbose output.
1064 """
1066 def __init__(
1067 self,
1068 *,
1069 info: SeriesInfo,
1070 with_counts: bool,
1071 ) -> None:
1072 self.info = info
1073 self.with_counts = with_counts
1074 self.strrows: Sequence[Sequence[str]] = list(self._gen_rows())
1075 self.gross_column_widths: Sequence[int] = self._get_gross_column_widths()
1077 def _fill_non_empty_info(self) -> None:
1078 """Add lines to the info table, pertaining to non-empty series."""
1079 self.add_object_type_line()
1080 self.add_index_range_line()
1081 self.add_series_name_line()
1082 self.add_header_line()
1083 self.add_separator_line()
1084 self.add_body_lines()
1085 self.add_dtypes_line()
1086 if self.display_memory_usage:
1087 self.add_memory_usage_line()
1089 def add_series_name_line(self) -> None:
1090 self._lines.append(f"Series name: {self.data.name}")
1092 @property
1093 def headers(self) -> Sequence[str]:
1094 """Headers names of the columns in verbose table."""
1095 if self.with_counts:
1096 return ["Non-Null Count", "Dtype"]
1097 return ["Dtype"]
1099 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:
1100 """Iterator with string representation of body data without counts."""
1101 yield from self._gen_dtypes()
1103 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:
1104 """Iterator with string representation of body data with counts."""
1105 yield from zip(
1106 self._gen_non_null_counts(),
1107 self._gen_dtypes(),
1108 )
1111def _get_dataframe_dtype_counts(df: DataFrame) -> Mapping[str, int]:
1112 """
1113 Create mapping between datatypes and their number of occurrences.
1114 """
1115 # groupby dtype.name to collect e.g. Categorical columns
1116 return df.dtypes.value_counts().groupby(lambda x: x.name).sum()