Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/io/formats/info.py: 44%

1from __future__ import annotations

3from abc import (

4 ABC,

5 abstractmethod,

7import sys

8from textwrap import dedent

9from typing import (

10 TYPE_CHECKING,

11 Iterable,

12 Iterator,

13 Mapping,

14 Sequence,

15)

17from pandas._config import get_option

19from pandas._typing import (

20 Dtype,

21 WriteBuffer,

22)

24from pandas.io.formats import format as fmt

25from pandas.io.formats.printing import pprint_thing

27if TYPE_CHECKING: 27 ↛ 28line 27 didn't jump to line 28, because the condition on line 27 was never true

28 from pandas import (

29 DataFrame,

30 Index,

31 Series,

32 )

35frame_max_cols_sub = dedent(

36 """\

37 max_cols : int, optional

38 When to switch from the verbose to the truncated output. If the

39 DataFrame has more than `max_cols` columns, the truncated output

40 is used. By default, the setting in

41 ``pandas.options.display.max_info_columns`` is used."""

42)

45show_counts_sub = dedent(

46 """\

47 show_counts : bool, optional

48 Whether to show the non-null counts. By default, this is shown

49 only if the DataFrame is smaller than

50 ``pandas.options.display.max_info_rows`` and

51 ``pandas.options.display.max_info_columns``. A value of True always

52 shows the counts, and False never shows the counts."""

53)

55null_counts_sub = dedent(

56 """

57 null_counts : bool, optional

58 .. deprecated:: 1.2.0

59 Use show_counts instead."""

60)

63frame_examples_sub = dedent(

64 """\

65 >>> int_values = [1, 2, 3, 4, 5]

66 >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']

67 >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0]

68 >>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values,

69 ... "float_col": float_values})

70 >>> df

71 int_col text_col float_col

72 0 1 alpha 0.00

73 1 2 beta 0.25

74 2 3 gamma 0.50

75 3 4 delta 0.75

76 4 5 epsilon 1.00

78 Prints information of all columns:

80 >>> df.info(verbose=True)

81 <class 'pandas.core.frame.DataFrame'>

82 RangeIndex: 5 entries, 0 to 4

83 Data columns (total 3 columns):

84 # Column Non-Null Count Dtype

85 --- ------ -------------- -----

86 0 int_col 5 non-null int64

87 1 text_col 5 non-null object

88 2 float_col 5 non-null float64

89 dtypes: float64(1), int64(1), object(1)

90 memory usage: 248.0+ bytes

92 Prints a summary of columns count and its dtypes but not per column

93 information:

95 >>> df.info(verbose=False)

96 <class 'pandas.core.frame.DataFrame'>

97 RangeIndex: 5 entries, 0 to 4

98 Columns: 3 entries, int_col to float_col

99 dtypes: float64(1), int64(1), object(1)

100 memory usage: 248.0+ bytes

101

102 Pipe output of DataFrame.info to buffer instead of sys.stdout, get

103 buffer content and writes to a text file:

104

105 >>> import io

106 >>> buffer = io.StringIO()

107 >>> df.info(buf=buffer)

108 >>> s = buffer.getvalue()

109 >>> with open("df_info.txt", "w",

110 ... encoding="utf-8") as f: # doctest: +SKIP

111 ... f.write(s)

112 260

113

114 The `memory_usage` parameter allows deep introspection mode, specially

115 useful for big DataFrames and fine-tune memory optimization:

116

117 >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)

118 >>> df = pd.DataFrame({

119 ... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6),

120 ... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6),

121 ... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6)

122 ... })

123 >>> df.info()

124 <class 'pandas.core.frame.DataFrame'>

125 RangeIndex: 1000000 entries, 0 to 999999

126 Data columns (total 3 columns):

127 # Column Non-Null Count Dtype

128 --- ------ -------------- -----

129 0 column_1 1000000 non-null object

130 1 column_2 1000000 non-null object

131 2 column_3 1000000 non-null object

132 dtypes: object(3)

133 memory usage: 22.9+ MB

134

135 >>> df.info(memory_usage='deep')

136 <class 'pandas.core.frame.DataFrame'>

137 RangeIndex: 1000000 entries, 0 to 999999

138 Data columns (total 3 columns):

139 # Column Non-Null Count Dtype

140 --- ------ -------------- -----

141 0 column_1 1000000 non-null object

142 1 column_2 1000000 non-null object

143 2 column_3 1000000 non-null object

144 dtypes: object(3)

145 memory usage: 165.9 MB"""

146)

147

148

149frame_see_also_sub = dedent(

150 """\

151 DataFrame.describe: Generate descriptive statistics of DataFrame

152 columns.

153 DataFrame.memory_usage: Memory usage of DataFrame columns."""

154)

155

156

157frame_sub_kwargs = {

158 "klass": "DataFrame",

159 "type_sub": " and columns",

160 "max_cols_sub": frame_max_cols_sub,

161 "show_counts_sub": show_counts_sub,

162 "null_counts_sub": null_counts_sub,

163 "examples_sub": frame_examples_sub,

164 "see_also_sub": frame_see_also_sub,

165 "version_added_sub": "",

166}

167

168

169series_examples_sub = dedent(

170 """\

171 >>> int_values = [1, 2, 3, 4, 5]

172 >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']

173 >>> s = pd.Series(text_values, index=int_values)

174 >>> s.info()

175 <class 'pandas.core.series.Series'>

176 Int64Index: 5 entries, 1 to 5

177 Series name: None

178 Non-Null Count Dtype

179 -------------- -----

180 5 non-null object

181 dtypes: object(1)

182 memory usage: 80.0+ bytes

183

184 Prints a summary excluding information about its values:

185

186 >>> s.info(verbose=False)

187 <class 'pandas.core.series.Series'>

188 Int64Index: 5 entries, 1 to 5

189 dtypes: object(1)

190 memory usage: 80.0+ bytes

191

192 Pipe output of Series.info to buffer instead of sys.stdout, get

193 buffer content and writes to a text file:

194

195 >>> import io

196 >>> buffer = io.StringIO()

197 >>> s.info(buf=buffer)

198 >>> s = buffer.getvalue()

199 >>> with open("df_info.txt", "w",

200 ... encoding="utf-8") as f: # doctest: +SKIP

201 ... f.write(s)

202 260

203

204 The `memory_usage` parameter allows deep introspection mode, specially

205 useful for big Series and fine-tune memory optimization:

206

207 >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)

208 >>> s = pd.Series(np.random.choice(['a', 'b', 'c'], 10 ** 6))

209 >>> s.info()

210 <class 'pandas.core.series.Series'>

211 RangeIndex: 1000000 entries, 0 to 999999

212 Series name: None

213 Non-Null Count Dtype

214 -------------- -----

215 1000000 non-null object

216 dtypes: object(1)

217 memory usage: 7.6+ MB

218

219 >>> s.info(memory_usage='deep')

220 <class 'pandas.core.series.Series'>

221 RangeIndex: 1000000 entries, 0 to 999999

222 Series name: None

223 Non-Null Count Dtype

224 -------------- -----

225 1000000 non-null object

226 dtypes: object(1)

227 memory usage: 55.3 MB"""

228)

229

230

231series_see_also_sub = dedent(

232 """\

233 Series.describe: Generate descriptive statistics of Series.

234 Series.memory_usage: Memory usage of Series."""

235)

236

237

238series_sub_kwargs = {

239 "klass": "Series",

240 "type_sub": "",

241 "max_cols_sub": "",

242 "show_counts_sub": show_counts_sub,

243 "null_counts_sub": "",

244 "examples_sub": series_examples_sub,

245 "see_also_sub": series_see_also_sub,

246 "version_added_sub": "\n.. versionadded:: 1.4.0\n",

247}

248

249

250INFO_DOCSTRING = dedent(

251 """

252 Print a concise summary of a {klass}.

253

254 This method prints information about a {klass} including

255 the index dtype{type_sub}, non-null values and memory usage.

256 {version_added_sub}\

257

258 Parameters

259 ----------

260 verbose : bool, optional

261 Whether to print the full summary. By default, the setting in

262 ``pandas.options.display.max_info_columns`` is followed.

263 buf : writable buffer, defaults to sys.stdout

264 Where to send the output. By default, the output is printed to

265 sys.stdout. Pass a writable buffer if you need to further process

266 the output.\

267 {max_cols_sub}

268 memory_usage : bool, str, optional

269 Specifies whether total memory usage of the {klass}

270 elements (including the index) should be displayed. By default,

271 this follows the ``pandas.options.display.memory_usage`` setting.

272

273 True always show memory usage. False never shows memory usage.

274 A value of 'deep' is equivalent to "True with deep introspection".

275 Memory usage is shown in human-readable units (base-2

276 representation). Without deep introspection a memory estimation is

277 made based in column dtype and number of rows assuming values

278 consume the same memory amount for corresponding dtypes. With deep

279 memory introspection, a real memory usage calculation is performed

280 at the cost of computational resources. See the

281 :ref:`Frequently Asked Questions <df-memory-usage>` for more

282 details.

283 {show_counts_sub}{null_counts_sub}

284

285 Returns

286 -------

287 None

288 This method prints a summary of a {klass} and returns None.

289

290 See Also

291 --------

292 {see_also_sub}

293

294 Examples

295 --------

296 {examples_sub}

297 """

298)

299

300

301def _put_str(s: str | Dtype, space: int) -> str:

302 """

303 Make string of specified length, padding to the right if necessary.

304

305 Parameters

306 ----------

307 s : Union[str, Dtype]

308 String to be formatted.

309 space : int

310 Length to force string to be of.

311

312 Returns

313 -------

314 str

315 String coerced to given length.

316

317 Examples

318 --------

319 >>> pd.io.formats.info._put_str("panda", 6)

320 'panda '

321 >>> pd.io.formats.info._put_str("panda", 4)

322 'pand'

323 """

324 return str(s)[:space].ljust(space)

325

326

327def _sizeof_fmt(num: float, size_qualifier: str) -> str:

328 """

329 Return size in human readable format.

330

331 Parameters

332 ----------

333 num : int

334 Size in bytes.

335 size_qualifier : str

336 Either empty, or '+' (if lower bound).

337

338 Returns

339 -------

340 str

341 Size in human readable format.

342

343 Examples

344 --------

345 >>> _sizeof_fmt(23028, '')

346 '22.5 KB'

347

348 >>> _sizeof_fmt(23028, '+')

349 '22.5+ KB'

350 """

351 for x in ["bytes", "KB", "MB", "GB", "TB"]:

352 if num < 1024.0:

353 return f"{num:3.1f}{size_qualifier} {x}"

354 num /= 1024.0

355 return f"{num:3.1f}{size_qualifier} PB"

356

357

358def _initialize_memory_usage(

359 memory_usage: bool | str | None = None,

360) -> bool | str:

361 """Get memory usage based on inputs and display options."""

362 if memory_usage is None:

363 memory_usage = get_option("display.memory_usage")

364 return memory_usage

365

366

367class BaseInfo(ABC):

368 """

369 Base class for DataFrameInfo and SeriesInfo.

370

371 Parameters

372 ----------

373 data : DataFrame or Series

374 Either dataframe or series.

375 memory_usage : bool or str, optional

376 If "deep", introspect the data deeply by interrogating object dtypes

377 for system-level memory consumption, and include it in the returned

378 values.

379 """

380

381 data: DataFrame | Series

382 memory_usage: bool | str

383

384 @property

385 @abstractmethod

386 def dtypes(self) -> Iterable[Dtype]:

387 """

388 Dtypes.

389

390 Returns

391 -------

392 dtypes : sequence

393 Dtype of each of the DataFrame's columns (or one series column).

394 """

395

396 @property

397 @abstractmethod

398 def dtype_counts(self) -> Mapping[str, int]:

399 """Mapping dtype - number of counts."""

400

401 @property

402 @abstractmethod

403 def non_null_counts(self) -> Sequence[int]:

404 """Sequence of non-null counts for all columns or column (if series)."""

405

406 @property

407 @abstractmethod

408 def memory_usage_bytes(self) -> int:

409 """

410 Memory usage in bytes.

411

412 Returns

413 -------

414 memory_usage_bytes : int

415 Object's total memory usage in bytes.

416 """

417

418 @property

419 def memory_usage_string(self) -> str:

420 """Memory usage in a form of human readable string."""

421 return f"{_sizeof_fmt(self.memory_usage_bytes, self.size_qualifier)}\n"

422

423 @property

424 def size_qualifier(self) -> str:

425 size_qualifier = ""

426 if self.memory_usage:

427 if self.memory_usage != "deep":

428 # size_qualifier is just a best effort; not guaranteed to catch

429 # all cases (e.g., it misses categorical data even with object

430 # categories)

431 if (

432 "object" in self.dtype_counts

433 or self.data.index._is_memory_usage_qualified()

434 ):

435 size_qualifier = "+"

436 return size_qualifier

437

438 @abstractmethod

439 def render(

440 self,

441 *,

442 buf: WriteBuffer[str] | None,

443 max_cols: int | None,

444 verbose: bool | None,

445 show_counts: bool | None,

446 ) -> None:

447 pass

448

449

450class DataFrameInfo(BaseInfo):

451 """

452 Class storing dataframe-specific info.

453 """

454

455 def __init__(

456 self,

457 data: DataFrame,

458 memory_usage: bool | str | None = None,

459 ) -> None:

460 self.data: DataFrame = data

461 self.memory_usage = _initialize_memory_usage(memory_usage)

462

463 @property

464 def dtype_counts(self) -> Mapping[str, int]:

465 return _get_dataframe_dtype_counts(self.data)

466

467 @property

468 def dtypes(self) -> Iterable[Dtype]:

469 """

470 Dtypes.

471

472 Returns

473 -------

474 dtypes

475 Dtype of each of the DataFrame's columns.

476 """

477 return self.data.dtypes

478

479 @property

480 def ids(self) -> Index:

481 """

482 Column names.

483

484 Returns

485 -------

486 ids : Index

487 DataFrame's column names.

488 """

489 return self.data.columns

490

491 @property

492 def col_count(self) -> int:

493 """Number of columns to be summarized."""

494 return len(self.ids)

495

496 @property

497 def non_null_counts(self) -> Sequence[int]:

498 """Sequence of non-null counts for all columns or column (if series)."""

499 return self.data.count()

500

501 @property

502 def memory_usage_bytes(self) -> int:

503 if self.memory_usage == "deep":

504 deep = True

505 else:

506 deep = False

507 return self.data.memory_usage(index=True, deep=deep).sum()

508

509 def render(

510 self,

511 *,

512 buf: WriteBuffer[str] | None,

513 max_cols: int | None,

514 verbose: bool | None,

515 show_counts: bool | None,

516 ) -> None:

517 printer = DataFrameInfoPrinter(

518 info=self,

519 max_cols=max_cols,

520 verbose=verbose,

521 show_counts=show_counts,

522 )

523 printer.to_buffer(buf)

524

525

526class SeriesInfo(BaseInfo):

527 """

528 Class storing series-specific info.

529 """

530

531 def __init__(

532 self,

533 data: Series,

534 memory_usage: bool | str | None = None,

535 ) -> None:

536 self.data: Series = data

537 self.memory_usage = _initialize_memory_usage(memory_usage)

538

539 def render(

540 self,

541 *,

542 buf: WriteBuffer[str] | None = None,

543 max_cols: int | None = None,

544 verbose: bool | None = None,

545 show_counts: bool | None = None,

546 ) -> None:

547 if max_cols is not None:

548 raise ValueError(

549 "Argument `max_cols` can only be passed "

550 "in DataFrame.info, not Series.info"

551 )

552 printer = SeriesInfoPrinter(

553 info=self,

554 verbose=verbose,

555 show_counts=show_counts,

556 )

557 printer.to_buffer(buf)

558

559 @property

560 def non_null_counts(self) -> Sequence[int]:

561 return [self.data.count()]

562

563 @property

564 def dtypes(self) -> Iterable[Dtype]:

565 return [self.data.dtypes]

566

567 @property

568 def dtype_counts(self) -> Mapping[str, int]:

569 from pandas.core.frame import DataFrame

570

571 return _get_dataframe_dtype_counts(DataFrame(self.data))

572

573 @property

574 def memory_usage_bytes(self) -> int:

575 """Memory usage in bytes.

576

577 Returns

578 -------

579 memory_usage_bytes : int

580 Object's total memory usage in bytes.

581 """

582 if self.memory_usage == "deep":

583 deep = True

584 else:

585 deep = False

586 return self.data.memory_usage(index=True, deep=deep)

587

588

589class InfoPrinterAbstract:

590 """

591 Class for printing dataframe or series info.

592 """

593

594 def to_buffer(self, buf: WriteBuffer[str] | None = None) -> None:

595 """Save dataframe info into buffer."""

596 table_builder = self._create_table_builder()

597 lines = table_builder.get_lines()

598 if buf is None: # pragma: no cover

599 buf = sys.stdout

600 fmt.buffer_put_lines(buf, lines)

601

602 @abstractmethod

603 def _create_table_builder(self) -> TableBuilderAbstract:

604 """Create instance of table builder."""

605

606

607class DataFrameInfoPrinter(InfoPrinterAbstract):

608 """

609 Class for printing dataframe info.

610

611 Parameters

612 ----------

613 info : DataFrameInfo

614 Instance of DataFrameInfo.

615 max_cols : int, optional

616 When to switch from the verbose to the truncated output.

617 verbose : bool, optional

618 Whether to print the full summary.

619 show_counts : bool, optional

620 Whether to show the non-null counts.

621 """

622

623 def __init__(

624 self,

625 info: DataFrameInfo,

626 max_cols: int | None = None,

627 verbose: bool | None = None,

628 show_counts: bool | None = None,

629 ) -> None:

630 self.info = info

631 self.data = info.data

632 self.verbose = verbose

633 self.max_cols = self._initialize_max_cols(max_cols)

634 self.show_counts = self._initialize_show_counts(show_counts)

635

636 @property

637 def max_rows(self) -> int:

638 """Maximum info rows to be displayed."""

639 return get_option("display.max_info_rows", len(self.data) + 1)

640

641 @property

642 def exceeds_info_cols(self) -> bool:

643 """Check if number of columns to be summarized does not exceed maximum."""

644 return bool(self.col_count > self.max_cols)

645

646 @property

647 def exceeds_info_rows(self) -> bool:

648 """Check if number of rows to be summarized does not exceed maximum."""

649 return bool(len(self.data) > self.max_rows)

650

651 @property

652 def col_count(self) -> int:

653 """Number of columns to be summarized."""

654 return self.info.col_count

655

656 def _initialize_max_cols(self, max_cols: int | None) -> int:

657 if max_cols is None:

658 return get_option("display.max_info_columns", self.col_count + 1)

659 return max_cols

660

661 def _initialize_show_counts(self, show_counts: bool | None) -> bool:

662 if show_counts is None:

663 return bool(not self.exceeds_info_cols and not self.exceeds_info_rows)

664 else:

665 return show_counts

666

667 def _create_table_builder(self) -> DataFrameTableBuilder:

668 """

669 Create instance of table builder based on verbosity and display settings.

670 """

671 if self.verbose:

672 return DataFrameTableBuilderVerbose(

673 info=self.info,

674 with_counts=self.show_counts,

675 )

676 elif self.verbose is False: # specifically set to False, not necessarily None

677 return DataFrameTableBuilderNonVerbose(info=self.info)

678 else:

679 if self.exceeds_info_cols:

680 return DataFrameTableBuilderNonVerbose(info=self.info)

681 else:

682 return DataFrameTableBuilderVerbose(

683 info=self.info,

684 with_counts=self.show_counts,

685 )

686

687

688class SeriesInfoPrinter(InfoPrinterAbstract):

689 """Class for printing series info.

690

691 Parameters

692 ----------

693 info : SeriesInfo

694 Instance of SeriesInfo.

695 verbose : bool, optional

696 Whether to print the full summary.

697 show_counts : bool, optional

698 Whether to show the non-null counts.

699 """

700

701 def __init__(

702 self,

703 info: SeriesInfo,

704 verbose: bool | None = None,

705 show_counts: bool | None = None,

706 ) -> None:

707 self.info = info

708 self.data = info.data

709 self.verbose = verbose

710 self.show_counts = self._initialize_show_counts(show_counts)

711

712 def _create_table_builder(self) -> SeriesTableBuilder:

713 """

714 Create instance of table builder based on verbosity.

715 """

716 if self.verbose or self.verbose is None:

717 return SeriesTableBuilderVerbose(

718 info=self.info,

719 with_counts=self.show_counts,

720 )

721 else:

722 return SeriesTableBuilderNonVerbose(info=self.info)

723

724 def _initialize_show_counts(self, show_counts: bool | None) -> bool:

725 if show_counts is None:

726 return True

727 else:

728 return show_counts

729

730

731class TableBuilderAbstract(ABC):

732 """

733 Abstract builder for info table.

734 """

735

736 _lines: list[str]

737 info: BaseInfo

738

739 @abstractmethod

740 def get_lines(self) -> list[str]:

741 """Product in a form of list of lines (strings)."""

742

743 @property

744 def data(self) -> DataFrame | Series:

745 return self.info.data

746

747 @property

748 def dtypes(self) -> Iterable[Dtype]:

749 """Dtypes of each of the DataFrame's columns."""

750 return self.info.dtypes

751

752 @property

753 def dtype_counts(self) -> Mapping[str, int]:

754 """Mapping dtype - number of counts."""

755 return self.info.dtype_counts

756

757 @property

758 def display_memory_usage(self) -> bool:

759 """Whether to display memory usage."""

760 return bool(self.info.memory_usage)

761

762 @property

763 def memory_usage_string(self) -> str:

764 """Memory usage string with proper size qualifier."""

765 return self.info.memory_usage_string

766

767 @property

768 def non_null_counts(self) -> Sequence[int]:

769 return self.info.non_null_counts

770

771 def add_object_type_line(self) -> None:

772 """Add line with string representation of dataframe to the table."""

773 self._lines.append(str(type(self.data)))

774

775 def add_index_range_line(self) -> None:

776 """Add line with range of indices to the table."""

777 self._lines.append(self.data.index._summary())

778

779 def add_dtypes_line(self) -> None:

780 """Add summary line with dtypes present in dataframe."""

781 collected_dtypes = [

782 f"{key}({val:d})" for key, val in sorted(self.dtype_counts.items())

783 ]

784 self._lines.append(f"dtypes: {', '.join(collected_dtypes)}")

785

786

787class DataFrameTableBuilder(TableBuilderAbstract):

788 """

789 Abstract builder for dataframe info table.

790

791 Parameters

792 ----------

793 info : DataFrameInfo.

794 Instance of DataFrameInfo.

795 """

796

797 def __init__(self, *, info: DataFrameInfo) -> None:

798 self.info: DataFrameInfo = info

799

800 def get_lines(self) -> list[str]:

801 self._lines = []

802 if self.col_count == 0:

803 self._fill_empty_info()

804 else:

805 self._fill_non_empty_info()

806 return self._lines

807

808 def _fill_empty_info(self) -> None:

809 """Add lines to the info table, pertaining to empty dataframe."""

810 self.add_object_type_line()

811 self.add_index_range_line()

812 self._lines.append(f"Empty {type(self.data).__name__}\n")

813

814 @abstractmethod

815 def _fill_non_empty_info(self) -> None:

816 """Add lines to the info table, pertaining to non-empty dataframe."""

817

818 @property

819 def data(self) -> DataFrame:

820 """DataFrame."""

821 return self.info.data

822

823 @property

824 def ids(self) -> Index:

825 """Dataframe columns."""

826 return self.info.ids

827

828 @property

829 def col_count(self) -> int:

830 """Number of dataframe columns to be summarized."""

831 return self.info.col_count

832

833 def add_memory_usage_line(self) -> None:

834 """Add line containing memory usage."""

835 self._lines.append(f"memory usage: {self.memory_usage_string}")

836

837

838class DataFrameTableBuilderNonVerbose(DataFrameTableBuilder):

839 """

840 Dataframe info table builder for non-verbose output.

841 """

842

843 def _fill_non_empty_info(self) -> None:

844 """Add lines to the info table, pertaining to non-empty dataframe."""

845 self.add_object_type_line()

846 self.add_index_range_line()

847 self.add_columns_summary_line()

848 self.add_dtypes_line()

849 if self.display_memory_usage:

850 self.add_memory_usage_line()

851

852 def add_columns_summary_line(self) -> None:

853 self._lines.append(self.ids._summary(name="Columns"))

854

855

856class TableBuilderVerboseMixin(TableBuilderAbstract):

857 """

858 Mixin for verbose info output.

859 """

860

861 SPACING: str = " " * 2

862 strrows: Sequence[Sequence[str]]

863 gross_column_widths: Sequence[int]

864 with_counts: bool

865

866 @property

867 @abstractmethod

868 def headers(self) -> Sequence[str]:

869 """Headers names of the columns in verbose table."""

870

871 @property

872 def header_column_widths(self) -> Sequence[int]:

873 """Widths of header columns (only titles)."""

874 return [len(col) for col in self.headers]

875

876 def _get_gross_column_widths(self) -> Sequence[int]:

877 """Get widths of columns containing both headers and actual content."""

878 body_column_widths = self._get_body_column_widths()

879 return [

880 max(*widths)

881 for widths in zip(self.header_column_widths, body_column_widths)

882 ]

883

884 def _get_body_column_widths(self) -> Sequence[int]:

885 """Get widths of table content columns."""

886 strcols: Sequence[Sequence[str]] = list(zip(*self.strrows))

887 return [max(len(x) for x in col) for col in strcols]

888

889 def _gen_rows(self) -> Iterator[Sequence[str]]:

890 """

891 Generator function yielding rows content.

892

893 Each element represents a row comprising a sequence of strings.

894 """

895 if self.with_counts:

896 return self._gen_rows_with_counts()

897 else:

898 return self._gen_rows_without_counts()

899

900 @abstractmethod

901 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:

902 """Iterator with string representation of body data with counts."""

903

904 @abstractmethod

905 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:

906 """Iterator with string representation of body data without counts."""

907

908 def add_header_line(self) -> None:

909 header_line = self.SPACING.join(

910 [

911 _put_str(header, col_width)

912 for header, col_width in zip(self.headers, self.gross_column_widths)

913 ]

914 )

915 self._lines.append(header_line)

916

917 def add_separator_line(self) -> None:

918 separator_line = self.SPACING.join(

919 [

920 _put_str("-" * header_colwidth, gross_colwidth)

921 for header_colwidth, gross_colwidth in zip(

922 self.header_column_widths, self.gross_column_widths

923 )

924 ]

925 )

926 self._lines.append(separator_line)

927

928 def add_body_lines(self) -> None:

929 for row in self.strrows:

930 body_line = self.SPACING.join(

931 [

932 _put_str(col, gross_colwidth)

933 for col, gross_colwidth in zip(row, self.gross_column_widths)

934 ]

935 )

936 self._lines.append(body_line)

937

938 def _gen_non_null_counts(self) -> Iterator[str]:

939 """Iterator with string representation of non-null counts."""

940 for count in self.non_null_counts:

941 yield f"{count} non-null"

942

943 def _gen_dtypes(self) -> Iterator[str]:

944 """Iterator with string representation of column dtypes."""

945 for dtype in self.dtypes:

946 yield pprint_thing(dtype)

947

948

949class DataFrameTableBuilderVerbose(DataFrameTableBuilder, TableBuilderVerboseMixin):

950 """

951 Dataframe info table builder for verbose output.

952 """

953

954 def __init__(

955 self,

956 *,

957 info: DataFrameInfo,

958 with_counts: bool,

959 ) -> None:

960 self.info = info

961 self.with_counts = with_counts

962 self.strrows: Sequence[Sequence[str]] = list(self._gen_rows())

963 self.gross_column_widths: Sequence[int] = self._get_gross_column_widths()

964

965 def _fill_non_empty_info(self) -> None:

966 """Add lines to the info table, pertaining to non-empty dataframe."""

967 self.add_object_type_line()

968 self.add_index_range_line()

969 self.add_columns_summary_line()

970 self.add_header_line()

971 self.add_separator_line()

972 self.add_body_lines()

973 self.add_dtypes_line()

974 if self.display_memory_usage:

975 self.add_memory_usage_line()

976

977 @property

978 def headers(self) -> Sequence[str]:

979 """Headers names of the columns in verbose table."""

980 if self.with_counts:

981 return [" # ", "Column", "Non-Null Count", "Dtype"]

982 return [" # ", "Column", "Dtype"]

983

984 def add_columns_summary_line(self) -> None:

985 self._lines.append(f"Data columns (total {self.col_count} columns):")

986

987 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:

988 """Iterator with string representation of body data without counts."""

989 yield from zip(

990 self._gen_line_numbers(),

991 self._gen_columns(),

992 self._gen_dtypes(),

993 )

994

995 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:

996 """Iterator with string representation of body data with counts."""

997 yield from zip(

998 self._gen_line_numbers(),

999 self._gen_columns(),

1000 self._gen_non_null_counts(),

1001 self._gen_dtypes(),

1002 )

1003

1004 def _gen_line_numbers(self) -> Iterator[str]:

1005 """Iterator with string representation of column numbers."""

1006 for i, _ in enumerate(self.ids):

1007 yield f" {i}"

1008

1009 def _gen_columns(self) -> Iterator[str]:

1010 """Iterator with string representation of column names."""

1011 for col in self.ids:

1012 yield pprint_thing(col)

1013

1014

1015class SeriesTableBuilder(TableBuilderAbstract):

1016 """

1017 Abstract builder for series info table.

1018

1019 Parameters

1020 ----------

1021 info : SeriesInfo.

1022 Instance of SeriesInfo.

1023 """

1024

1025 def __init__(self, *, info: SeriesInfo) -> None:

1026 self.info: SeriesInfo = info

1027

1028 def get_lines(self) -> list[str]:

1029 self._lines = []

1030 self._fill_non_empty_info()

1031 return self._lines

1032

1033 @property

1034 def data(self) -> Series:

1035 """Series."""

1036 return self.info.data

1037

1038 def add_memory_usage_line(self) -> None:

1039 """Add line containing memory usage."""

1040 self._lines.append(f"memory usage: {self.memory_usage_string}")

1041

1042 @abstractmethod

1043 def _fill_non_empty_info(self) -> None:

1044 """Add lines to the info table, pertaining to non-empty series."""

1045

1046

1047class SeriesTableBuilderNonVerbose(SeriesTableBuilder):

1048 """

1049 Series info table builder for non-verbose output.

1050 """

1051

1052 def _fill_non_empty_info(self) -> None:

1053 """Add lines to the info table, pertaining to non-empty series."""

1054 self.add_object_type_line()

1055 self.add_index_range_line()

1056 self.add_dtypes_line()

1057 if self.display_memory_usage:

1058 self.add_memory_usage_line()

1059

1060

1061class SeriesTableBuilderVerbose(SeriesTableBuilder, TableBuilderVerboseMixin):

1062 """

1063 Series info table builder for verbose output.

1064 """

1065

1066 def __init__(

1067 self,

1068 *,

1069 info: SeriesInfo,

1070 with_counts: bool,

1071 ) -> None:

1072 self.info = info

1073 self.with_counts = with_counts

1074 self.strrows: Sequence[Sequence[str]] = list(self._gen_rows())

1075 self.gross_column_widths: Sequence[int] = self._get_gross_column_widths()

1076

1077 def _fill_non_empty_info(self) -> None:

1078 """Add lines to the info table, pertaining to non-empty series."""

1079 self.add_object_type_line()

1080 self.add_index_range_line()

1081 self.add_series_name_line()

1082 self.add_header_line()

1083 self.add_separator_line()

1084 self.add_body_lines()

1085 self.add_dtypes_line()

1086 if self.display_memory_usage:

1087 self.add_memory_usage_line()

1088

1089 def add_series_name_line(self) -> None:

1090 self._lines.append(f"Series name: {self.data.name}")

1091

1092 @property

1093 def headers(self) -> Sequence[str]:

1094 """Headers names of the columns in verbose table."""

1095 if self.with_counts:

1096 return ["Non-Null Count", "Dtype"]

1097 return ["Dtype"]

1098

1099 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:

1100 """Iterator with string representation of body data without counts."""

1101 yield from self._gen_dtypes()

1102

1103 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:

1104 """Iterator with string representation of body data with counts."""

1105 yield from zip(

1106 self._gen_non_null_counts(),

1107 self._gen_dtypes(),

1108 )

1109

1110

1111def _get_dataframe_dtype_counts(df: DataFrame) -> Mapping[str, int]:

1112 """

1113 Create mapping between datatypes and their number of occurrences.

1114 """

1115 # groupby dtype.name to collect e.g. Categorical columns

1116 return df.dtypes.value_counts().groupby(lambda x: x.name).sum()