Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/io/formats/info.py: 44%

363 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3from abc import ( 

4 ABC, 

5 abstractmethod, 

6) 

7import sys 

8from textwrap import dedent 

9from typing import ( 

10 TYPE_CHECKING, 

11 Iterable, 

12 Iterator, 

13 Mapping, 

14 Sequence, 

15) 

16 

17from pandas._config import get_option 

18 

19from pandas._typing import ( 

20 Dtype, 

21 WriteBuffer, 

22) 

23 

24from pandas.io.formats import format as fmt 

25from pandas.io.formats.printing import pprint_thing 

26 

27if TYPE_CHECKING: 27 ↛ 28line 27 didn't jump to line 28, because the condition on line 27 was never true

28 from pandas import ( 

29 DataFrame, 

30 Index, 

31 Series, 

32 ) 

33 

34 

35frame_max_cols_sub = dedent( 

36 """\ 

37 max_cols : int, optional 

38 When to switch from the verbose to the truncated output. If the 

39 DataFrame has more than `max_cols` columns, the truncated output 

40 is used. By default, the setting in 

41 ``pandas.options.display.max_info_columns`` is used.""" 

42) 

43 

44 

45show_counts_sub = dedent( 

46 """\ 

47 show_counts : bool, optional 

48 Whether to show the non-null counts. By default, this is shown 

49 only if the DataFrame is smaller than 

50 ``pandas.options.display.max_info_rows`` and 

51 ``pandas.options.display.max_info_columns``. A value of True always 

52 shows the counts, and False never shows the counts.""" 

53) 

54 

55null_counts_sub = dedent( 

56 """ 

57 null_counts : bool, optional 

58 .. deprecated:: 1.2.0 

59 Use show_counts instead.""" 

60) 

61 

62 

63frame_examples_sub = dedent( 

64 """\ 

65 >>> int_values = [1, 2, 3, 4, 5] 

66 >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon'] 

67 >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0] 

68 >>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values, 

69 ... "float_col": float_values}) 

70 >>> df 

71 int_col text_col float_col 

72 0 1 alpha 0.00 

73 1 2 beta 0.25 

74 2 3 gamma 0.50 

75 3 4 delta 0.75 

76 4 5 epsilon 1.00 

77 

78 Prints information of all columns: 

79 

80 >>> df.info(verbose=True) 

81 <class 'pandas.core.frame.DataFrame'> 

82 RangeIndex: 5 entries, 0 to 4 

83 Data columns (total 3 columns): 

84 # Column Non-Null Count Dtype 

85 --- ------ -------------- ----- 

86 0 int_col 5 non-null int64 

87 1 text_col 5 non-null object 

88 2 float_col 5 non-null float64 

89 dtypes: float64(1), int64(1), object(1) 

90 memory usage: 248.0+ bytes 

91 

92 Prints a summary of columns count and its dtypes but not per column 

93 information: 

94 

95 >>> df.info(verbose=False) 

96 <class 'pandas.core.frame.DataFrame'> 

97 RangeIndex: 5 entries, 0 to 4 

98 Columns: 3 entries, int_col to float_col 

99 dtypes: float64(1), int64(1), object(1) 

100 memory usage: 248.0+ bytes 

101 

102 Pipe output of DataFrame.info to buffer instead of sys.stdout, get 

103 buffer content and writes to a text file: 

104 

105 >>> import io 

106 >>> buffer = io.StringIO() 

107 >>> df.info(buf=buffer) 

108 >>> s = buffer.getvalue() 

109 >>> with open("df_info.txt", "w", 

110 ... encoding="utf-8") as f: # doctest: +SKIP 

111 ... f.write(s) 

112 260 

113 

114 The `memory_usage` parameter allows deep introspection mode, specially 

115 useful for big DataFrames and fine-tune memory optimization: 

116 

117 >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6) 

118 >>> df = pd.DataFrame({ 

119 ... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6), 

120 ... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6), 

121 ... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6) 

122 ... }) 

123 >>> df.info() 

124 <class 'pandas.core.frame.DataFrame'> 

125 RangeIndex: 1000000 entries, 0 to 999999 

126 Data columns (total 3 columns): 

127 # Column Non-Null Count Dtype 

128 --- ------ -------------- ----- 

129 0 column_1 1000000 non-null object 

130 1 column_2 1000000 non-null object 

131 2 column_3 1000000 non-null object 

132 dtypes: object(3) 

133 memory usage: 22.9+ MB 

134 

135 >>> df.info(memory_usage='deep') 

136 <class 'pandas.core.frame.DataFrame'> 

137 RangeIndex: 1000000 entries, 0 to 999999 

138 Data columns (total 3 columns): 

139 # Column Non-Null Count Dtype 

140 --- ------ -------------- ----- 

141 0 column_1 1000000 non-null object 

142 1 column_2 1000000 non-null object 

143 2 column_3 1000000 non-null object 

144 dtypes: object(3) 

145 memory usage: 165.9 MB""" 

146) 

147 

148 

149frame_see_also_sub = dedent( 

150 """\ 

151 DataFrame.describe: Generate descriptive statistics of DataFrame 

152 columns. 

153 DataFrame.memory_usage: Memory usage of DataFrame columns.""" 

154) 

155 

156 

157frame_sub_kwargs = { 

158 "klass": "DataFrame", 

159 "type_sub": " and columns", 

160 "max_cols_sub": frame_max_cols_sub, 

161 "show_counts_sub": show_counts_sub, 

162 "null_counts_sub": null_counts_sub, 

163 "examples_sub": frame_examples_sub, 

164 "see_also_sub": frame_see_also_sub, 

165 "version_added_sub": "", 

166} 

167 

168 

169series_examples_sub = dedent( 

170 """\ 

171 >>> int_values = [1, 2, 3, 4, 5] 

172 >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon'] 

173 >>> s = pd.Series(text_values, index=int_values) 

174 >>> s.info() 

175 <class 'pandas.core.series.Series'> 

176 Int64Index: 5 entries, 1 to 5 

177 Series name: None 

178 Non-Null Count Dtype 

179 -------------- ----- 

180 5 non-null object 

181 dtypes: object(1) 

182 memory usage: 80.0+ bytes 

183 

184 Prints a summary excluding information about its values: 

185 

186 >>> s.info(verbose=False) 

187 <class 'pandas.core.series.Series'> 

188 Int64Index: 5 entries, 1 to 5 

189 dtypes: object(1) 

190 memory usage: 80.0+ bytes 

191 

192 Pipe output of Series.info to buffer instead of sys.stdout, get 

193 buffer content and writes to a text file: 

194 

195 >>> import io 

196 >>> buffer = io.StringIO() 

197 >>> s.info(buf=buffer) 

198 >>> s = buffer.getvalue() 

199 >>> with open("df_info.txt", "w", 

200 ... encoding="utf-8") as f: # doctest: +SKIP 

201 ... f.write(s) 

202 260 

203 

204 The `memory_usage` parameter allows deep introspection mode, specially 

205 useful for big Series and fine-tune memory optimization: 

206 

207 >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6) 

208 >>> s = pd.Series(np.random.choice(['a', 'b', 'c'], 10 ** 6)) 

209 >>> s.info() 

210 <class 'pandas.core.series.Series'> 

211 RangeIndex: 1000000 entries, 0 to 999999 

212 Series name: None 

213 Non-Null Count Dtype 

214 -------------- ----- 

215 1000000 non-null object 

216 dtypes: object(1) 

217 memory usage: 7.6+ MB 

218 

219 >>> s.info(memory_usage='deep') 

220 <class 'pandas.core.series.Series'> 

221 RangeIndex: 1000000 entries, 0 to 999999 

222 Series name: None 

223 Non-Null Count Dtype 

224 -------------- ----- 

225 1000000 non-null object 

226 dtypes: object(1) 

227 memory usage: 55.3 MB""" 

228) 

229 

230 

231series_see_also_sub = dedent( 

232 """\ 

233 Series.describe: Generate descriptive statistics of Series. 

234 Series.memory_usage: Memory usage of Series.""" 

235) 

236 

237 

238series_sub_kwargs = { 

239 "klass": "Series", 

240 "type_sub": "", 

241 "max_cols_sub": "", 

242 "show_counts_sub": show_counts_sub, 

243 "null_counts_sub": "", 

244 "examples_sub": series_examples_sub, 

245 "see_also_sub": series_see_also_sub, 

246 "version_added_sub": "\n.. versionadded:: 1.4.0\n", 

247} 

248 

249 

250INFO_DOCSTRING = dedent( 

251 """ 

252 Print a concise summary of a {klass}. 

253 

254 This method prints information about a {klass} including 

255 the index dtype{type_sub}, non-null values and memory usage. 

256 {version_added_sub}\ 

257 

258 Parameters 

259 ---------- 

260 verbose : bool, optional 

261 Whether to print the full summary. By default, the setting in 

262 ``pandas.options.display.max_info_columns`` is followed. 

263 buf : writable buffer, defaults to sys.stdout 

264 Where to send the output. By default, the output is printed to 

265 sys.stdout. Pass a writable buffer if you need to further process 

266 the output.\ 

267 {max_cols_sub} 

268 memory_usage : bool, str, optional 

269 Specifies whether total memory usage of the {klass} 

270 elements (including the index) should be displayed. By default, 

271 this follows the ``pandas.options.display.memory_usage`` setting. 

272 

273 True always show memory usage. False never shows memory usage. 

274 A value of 'deep' is equivalent to "True with deep introspection". 

275 Memory usage is shown in human-readable units (base-2 

276 representation). Without deep introspection a memory estimation is 

277 made based in column dtype and number of rows assuming values 

278 consume the same memory amount for corresponding dtypes. With deep 

279 memory introspection, a real memory usage calculation is performed 

280 at the cost of computational resources. See the 

281 :ref:`Frequently Asked Questions <df-memory-usage>` for more 

282 details. 

283 {show_counts_sub}{null_counts_sub} 

284 

285 Returns 

286 ------- 

287 None 

288 This method prints a summary of a {klass} and returns None. 

289 

290 See Also 

291 -------- 

292 {see_also_sub} 

293 

294 Examples 

295 -------- 

296 {examples_sub} 

297 """ 

298) 

299 

300 

301def _put_str(s: str | Dtype, space: int) -> str: 

302 """ 

303 Make string of specified length, padding to the right if necessary. 

304 

305 Parameters 

306 ---------- 

307 s : Union[str, Dtype] 

308 String to be formatted. 

309 space : int 

310 Length to force string to be of. 

311 

312 Returns 

313 ------- 

314 str 

315 String coerced to given length. 

316 

317 Examples 

318 -------- 

319 >>> pd.io.formats.info._put_str("panda", 6) 

320 'panda ' 

321 >>> pd.io.formats.info._put_str("panda", 4) 

322 'pand' 

323 """ 

324 return str(s)[:space].ljust(space) 

325 

326 

327def _sizeof_fmt(num: float, size_qualifier: str) -> str: 

328 """ 

329 Return size in human readable format. 

330 

331 Parameters 

332 ---------- 

333 num : int 

334 Size in bytes. 

335 size_qualifier : str 

336 Either empty, or '+' (if lower bound). 

337 

338 Returns 

339 ------- 

340 str 

341 Size in human readable format. 

342 

343 Examples 

344 -------- 

345 >>> _sizeof_fmt(23028, '') 

346 '22.5 KB' 

347 

348 >>> _sizeof_fmt(23028, '+') 

349 '22.5+ KB' 

350 """ 

351 for x in ["bytes", "KB", "MB", "GB", "TB"]: 

352 if num < 1024.0: 

353 return f"{num:3.1f}{size_qualifier} {x}" 

354 num /= 1024.0 

355 return f"{num:3.1f}{size_qualifier} PB" 

356 

357 

358def _initialize_memory_usage( 

359 memory_usage: bool | str | None = None, 

360) -> bool | str: 

361 """Get memory usage based on inputs and display options.""" 

362 if memory_usage is None: 

363 memory_usage = get_option("display.memory_usage") 

364 return memory_usage 

365 

366 

367class BaseInfo(ABC): 

368 """ 

369 Base class for DataFrameInfo and SeriesInfo. 

370 

371 Parameters 

372 ---------- 

373 data : DataFrame or Series 

374 Either dataframe or series. 

375 memory_usage : bool or str, optional 

376 If "deep", introspect the data deeply by interrogating object dtypes 

377 for system-level memory consumption, and include it in the returned 

378 values. 

379 """ 

380 

381 data: DataFrame | Series 

382 memory_usage: bool | str 

383 

384 @property 

385 @abstractmethod 

386 def dtypes(self) -> Iterable[Dtype]: 

387 """ 

388 Dtypes. 

389 

390 Returns 

391 ------- 

392 dtypes : sequence 

393 Dtype of each of the DataFrame's columns (or one series column). 

394 """ 

395 

396 @property 

397 @abstractmethod 

398 def dtype_counts(self) -> Mapping[str, int]: 

399 """Mapping dtype - number of counts.""" 

400 

401 @property 

402 @abstractmethod 

403 def non_null_counts(self) -> Sequence[int]: 

404 """Sequence of non-null counts for all columns or column (if series).""" 

405 

406 @property 

407 @abstractmethod 

408 def memory_usage_bytes(self) -> int: 

409 """ 

410 Memory usage in bytes. 

411 

412 Returns 

413 ------- 

414 memory_usage_bytes : int 

415 Object's total memory usage in bytes. 

416 """ 

417 

418 @property 

419 def memory_usage_string(self) -> str: 

420 """Memory usage in a form of human readable string.""" 

421 return f"{_sizeof_fmt(self.memory_usage_bytes, self.size_qualifier)}\n" 

422 

423 @property 

424 def size_qualifier(self) -> str: 

425 size_qualifier = "" 

426 if self.memory_usage: 

427 if self.memory_usage != "deep": 

428 # size_qualifier is just a best effort; not guaranteed to catch 

429 # all cases (e.g., it misses categorical data even with object 

430 # categories) 

431 if ( 

432 "object" in self.dtype_counts 

433 or self.data.index._is_memory_usage_qualified() 

434 ): 

435 size_qualifier = "+" 

436 return size_qualifier 

437 

438 @abstractmethod 

439 def render( 

440 self, 

441 *, 

442 buf: WriteBuffer[str] | None, 

443 max_cols: int | None, 

444 verbose: bool | None, 

445 show_counts: bool | None, 

446 ) -> None: 

447 pass 

448 

449 

450class DataFrameInfo(BaseInfo): 

451 """ 

452 Class storing dataframe-specific info. 

453 """ 

454 

455 def __init__( 

456 self, 

457 data: DataFrame, 

458 memory_usage: bool | str | None = None, 

459 ) -> None: 

460 self.data: DataFrame = data 

461 self.memory_usage = _initialize_memory_usage(memory_usage) 

462 

463 @property 

464 def dtype_counts(self) -> Mapping[str, int]: 

465 return _get_dataframe_dtype_counts(self.data) 

466 

467 @property 

468 def dtypes(self) -> Iterable[Dtype]: 

469 """ 

470 Dtypes. 

471 

472 Returns 

473 ------- 

474 dtypes 

475 Dtype of each of the DataFrame's columns. 

476 """ 

477 return self.data.dtypes 

478 

479 @property 

480 def ids(self) -> Index: 

481 """ 

482 Column names. 

483 

484 Returns 

485 ------- 

486 ids : Index 

487 DataFrame's column names. 

488 """ 

489 return self.data.columns 

490 

491 @property 

492 def col_count(self) -> int: 

493 """Number of columns to be summarized.""" 

494 return len(self.ids) 

495 

496 @property 

497 def non_null_counts(self) -> Sequence[int]: 

498 """Sequence of non-null counts for all columns or column (if series).""" 

499 return self.data.count() 

500 

501 @property 

502 def memory_usage_bytes(self) -> int: 

503 if self.memory_usage == "deep": 

504 deep = True 

505 else: 

506 deep = False 

507 return self.data.memory_usage(index=True, deep=deep).sum() 

508 

509 def render( 

510 self, 

511 *, 

512 buf: WriteBuffer[str] | None, 

513 max_cols: int | None, 

514 verbose: bool | None, 

515 show_counts: bool | None, 

516 ) -> None: 

517 printer = DataFrameInfoPrinter( 

518 info=self, 

519 max_cols=max_cols, 

520 verbose=verbose, 

521 show_counts=show_counts, 

522 ) 

523 printer.to_buffer(buf) 

524 

525 

526class SeriesInfo(BaseInfo): 

527 """ 

528 Class storing series-specific info. 

529 """ 

530 

531 def __init__( 

532 self, 

533 data: Series, 

534 memory_usage: bool | str | None = None, 

535 ) -> None: 

536 self.data: Series = data 

537 self.memory_usage = _initialize_memory_usage(memory_usage) 

538 

539 def render( 

540 self, 

541 *, 

542 buf: WriteBuffer[str] | None = None, 

543 max_cols: int | None = None, 

544 verbose: bool | None = None, 

545 show_counts: bool | None = None, 

546 ) -> None: 

547 if max_cols is not None: 

548 raise ValueError( 

549 "Argument `max_cols` can only be passed " 

550 "in DataFrame.info, not Series.info" 

551 ) 

552 printer = SeriesInfoPrinter( 

553 info=self, 

554 verbose=verbose, 

555 show_counts=show_counts, 

556 ) 

557 printer.to_buffer(buf) 

558 

559 @property 

560 def non_null_counts(self) -> Sequence[int]: 

561 return [self.data.count()] 

562 

563 @property 

564 def dtypes(self) -> Iterable[Dtype]: 

565 return [self.data.dtypes] 

566 

567 @property 

568 def dtype_counts(self) -> Mapping[str, int]: 

569 from pandas.core.frame import DataFrame 

570 

571 return _get_dataframe_dtype_counts(DataFrame(self.data)) 

572 

573 @property 

574 def memory_usage_bytes(self) -> int: 

575 """Memory usage in bytes. 

576 

577 Returns 

578 ------- 

579 memory_usage_bytes : int 

580 Object's total memory usage in bytes. 

581 """ 

582 if self.memory_usage == "deep": 

583 deep = True 

584 else: 

585 deep = False 

586 return self.data.memory_usage(index=True, deep=deep) 

587 

588 

589class InfoPrinterAbstract: 

590 """ 

591 Class for printing dataframe or series info. 

592 """ 

593 

594 def to_buffer(self, buf: WriteBuffer[str] | None = None) -> None: 

595 """Save dataframe info into buffer.""" 

596 table_builder = self._create_table_builder() 

597 lines = table_builder.get_lines() 

598 if buf is None: # pragma: no cover 

599 buf = sys.stdout 

600 fmt.buffer_put_lines(buf, lines) 

601 

602 @abstractmethod 

603 def _create_table_builder(self) -> TableBuilderAbstract: 

604 """Create instance of table builder.""" 

605 

606 

607class DataFrameInfoPrinter(InfoPrinterAbstract): 

608 """ 

609 Class for printing dataframe info. 

610 

611 Parameters 

612 ---------- 

613 info : DataFrameInfo 

614 Instance of DataFrameInfo. 

615 max_cols : int, optional 

616 When to switch from the verbose to the truncated output. 

617 verbose : bool, optional 

618 Whether to print the full summary. 

619 show_counts : bool, optional 

620 Whether to show the non-null counts. 

621 """ 

622 

623 def __init__( 

624 self, 

625 info: DataFrameInfo, 

626 max_cols: int | None = None, 

627 verbose: bool | None = None, 

628 show_counts: bool | None = None, 

629 ) -> None: 

630 self.info = info 

631 self.data = info.data 

632 self.verbose = verbose 

633 self.max_cols = self._initialize_max_cols(max_cols) 

634 self.show_counts = self._initialize_show_counts(show_counts) 

635 

636 @property 

637 def max_rows(self) -> int: 

638 """Maximum info rows to be displayed.""" 

639 return get_option("display.max_info_rows", len(self.data) + 1) 

640 

641 @property 

642 def exceeds_info_cols(self) -> bool: 

643 """Check if number of columns to be summarized does not exceed maximum.""" 

644 return bool(self.col_count > self.max_cols) 

645 

646 @property 

647 def exceeds_info_rows(self) -> bool: 

648 """Check if number of rows to be summarized does not exceed maximum.""" 

649 return bool(len(self.data) > self.max_rows) 

650 

651 @property 

652 def col_count(self) -> int: 

653 """Number of columns to be summarized.""" 

654 return self.info.col_count 

655 

656 def _initialize_max_cols(self, max_cols: int | None) -> int: 

657 if max_cols is None: 

658 return get_option("display.max_info_columns", self.col_count + 1) 

659 return max_cols 

660 

661 def _initialize_show_counts(self, show_counts: bool | None) -> bool: 

662 if show_counts is None: 

663 return bool(not self.exceeds_info_cols and not self.exceeds_info_rows) 

664 else: 

665 return show_counts 

666 

667 def _create_table_builder(self) -> DataFrameTableBuilder: 

668 """ 

669 Create instance of table builder based on verbosity and display settings. 

670 """ 

671 if self.verbose: 

672 return DataFrameTableBuilderVerbose( 

673 info=self.info, 

674 with_counts=self.show_counts, 

675 ) 

676 elif self.verbose is False: # specifically set to False, not necessarily None 

677 return DataFrameTableBuilderNonVerbose(info=self.info) 

678 else: 

679 if self.exceeds_info_cols: 

680 return DataFrameTableBuilderNonVerbose(info=self.info) 

681 else: 

682 return DataFrameTableBuilderVerbose( 

683 info=self.info, 

684 with_counts=self.show_counts, 

685 ) 

686 

687 

688class SeriesInfoPrinter(InfoPrinterAbstract): 

689 """Class for printing series info. 

690 

691 Parameters 

692 ---------- 

693 info : SeriesInfo 

694 Instance of SeriesInfo. 

695 verbose : bool, optional 

696 Whether to print the full summary. 

697 show_counts : bool, optional 

698 Whether to show the non-null counts. 

699 """ 

700 

701 def __init__( 

702 self, 

703 info: SeriesInfo, 

704 verbose: bool | None = None, 

705 show_counts: bool | None = None, 

706 ) -> None: 

707 self.info = info 

708 self.data = info.data 

709 self.verbose = verbose 

710 self.show_counts = self._initialize_show_counts(show_counts) 

711 

712 def _create_table_builder(self) -> SeriesTableBuilder: 

713 """ 

714 Create instance of table builder based on verbosity. 

715 """ 

716 if self.verbose or self.verbose is None: 

717 return SeriesTableBuilderVerbose( 

718 info=self.info, 

719 with_counts=self.show_counts, 

720 ) 

721 else: 

722 return SeriesTableBuilderNonVerbose(info=self.info) 

723 

724 def _initialize_show_counts(self, show_counts: bool | None) -> bool: 

725 if show_counts is None: 

726 return True 

727 else: 

728 return show_counts 

729 

730 

731class TableBuilderAbstract(ABC): 

732 """ 

733 Abstract builder for info table. 

734 """ 

735 

736 _lines: list[str] 

737 info: BaseInfo 

738 

739 @abstractmethod 

740 def get_lines(self) -> list[str]: 

741 """Product in a form of list of lines (strings).""" 

742 

743 @property 

744 def data(self) -> DataFrame | Series: 

745 return self.info.data 

746 

747 @property 

748 def dtypes(self) -> Iterable[Dtype]: 

749 """Dtypes of each of the DataFrame's columns.""" 

750 return self.info.dtypes 

751 

752 @property 

753 def dtype_counts(self) -> Mapping[str, int]: 

754 """Mapping dtype - number of counts.""" 

755 return self.info.dtype_counts 

756 

757 @property 

758 def display_memory_usage(self) -> bool: 

759 """Whether to display memory usage.""" 

760 return bool(self.info.memory_usage) 

761 

762 @property 

763 def memory_usage_string(self) -> str: 

764 """Memory usage string with proper size qualifier.""" 

765 return self.info.memory_usage_string 

766 

767 @property 

768 def non_null_counts(self) -> Sequence[int]: 

769 return self.info.non_null_counts 

770 

771 def add_object_type_line(self) -> None: 

772 """Add line with string representation of dataframe to the table.""" 

773 self._lines.append(str(type(self.data))) 

774 

775 def add_index_range_line(self) -> None: 

776 """Add line with range of indices to the table.""" 

777 self._lines.append(self.data.index._summary()) 

778 

779 def add_dtypes_line(self) -> None: 

780 """Add summary line with dtypes present in dataframe.""" 

781 collected_dtypes = [ 

782 f"{key}({val:d})" for key, val in sorted(self.dtype_counts.items()) 

783 ] 

784 self._lines.append(f"dtypes: {', '.join(collected_dtypes)}") 

785 

786 

787class DataFrameTableBuilder(TableBuilderAbstract): 

788 """ 

789 Abstract builder for dataframe info table. 

790 

791 Parameters 

792 ---------- 

793 info : DataFrameInfo. 

794 Instance of DataFrameInfo. 

795 """ 

796 

797 def __init__(self, *, info: DataFrameInfo) -> None: 

798 self.info: DataFrameInfo = info 

799 

800 def get_lines(self) -> list[str]: 

801 self._lines = [] 

802 if self.col_count == 0: 

803 self._fill_empty_info() 

804 else: 

805 self._fill_non_empty_info() 

806 return self._lines 

807 

808 def _fill_empty_info(self) -> None: 

809 """Add lines to the info table, pertaining to empty dataframe.""" 

810 self.add_object_type_line() 

811 self.add_index_range_line() 

812 self._lines.append(f"Empty {type(self.data).__name__}\n") 

813 

814 @abstractmethod 

815 def _fill_non_empty_info(self) -> None: 

816 """Add lines to the info table, pertaining to non-empty dataframe.""" 

817 

818 @property 

819 def data(self) -> DataFrame: 

820 """DataFrame.""" 

821 return self.info.data 

822 

823 @property 

824 def ids(self) -> Index: 

825 """Dataframe columns.""" 

826 return self.info.ids 

827 

828 @property 

829 def col_count(self) -> int: 

830 """Number of dataframe columns to be summarized.""" 

831 return self.info.col_count 

832 

833 def add_memory_usage_line(self) -> None: 

834 """Add line containing memory usage.""" 

835 self._lines.append(f"memory usage: {self.memory_usage_string}") 

836 

837 

838class DataFrameTableBuilderNonVerbose(DataFrameTableBuilder): 

839 """ 

840 Dataframe info table builder for non-verbose output. 

841 """ 

842 

843 def _fill_non_empty_info(self) -> None: 

844 """Add lines to the info table, pertaining to non-empty dataframe.""" 

845 self.add_object_type_line() 

846 self.add_index_range_line() 

847 self.add_columns_summary_line() 

848 self.add_dtypes_line() 

849 if self.display_memory_usage: 

850 self.add_memory_usage_line() 

851 

852 def add_columns_summary_line(self) -> None: 

853 self._lines.append(self.ids._summary(name="Columns")) 

854 

855 

856class TableBuilderVerboseMixin(TableBuilderAbstract): 

857 """ 

858 Mixin for verbose info output. 

859 """ 

860 

861 SPACING: str = " " * 2 

862 strrows: Sequence[Sequence[str]] 

863 gross_column_widths: Sequence[int] 

864 with_counts: bool 

865 

866 @property 

867 @abstractmethod 

868 def headers(self) -> Sequence[str]: 

869 """Headers names of the columns in verbose table.""" 

870 

871 @property 

872 def header_column_widths(self) -> Sequence[int]: 

873 """Widths of header columns (only titles).""" 

874 return [len(col) for col in self.headers] 

875 

876 def _get_gross_column_widths(self) -> Sequence[int]: 

877 """Get widths of columns containing both headers and actual content.""" 

878 body_column_widths = self._get_body_column_widths() 

879 return [ 

880 max(*widths) 

881 for widths in zip(self.header_column_widths, body_column_widths) 

882 ] 

883 

884 def _get_body_column_widths(self) -> Sequence[int]: 

885 """Get widths of table content columns.""" 

886 strcols: Sequence[Sequence[str]] = list(zip(*self.strrows)) 

887 return [max(len(x) for x in col) for col in strcols] 

888 

889 def _gen_rows(self) -> Iterator[Sequence[str]]: 

890 """ 

891 Generator function yielding rows content. 

892 

893 Each element represents a row comprising a sequence of strings. 

894 """ 

895 if self.with_counts: 

896 return self._gen_rows_with_counts() 

897 else: 

898 return self._gen_rows_without_counts() 

899 

900 @abstractmethod 

901 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]: 

902 """Iterator with string representation of body data with counts.""" 

903 

904 @abstractmethod 

905 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]: 

906 """Iterator with string representation of body data without counts.""" 

907 

908 def add_header_line(self) -> None: 

909 header_line = self.SPACING.join( 

910 [ 

911 _put_str(header, col_width) 

912 for header, col_width in zip(self.headers, self.gross_column_widths) 

913 ] 

914 ) 

915 self._lines.append(header_line) 

916 

917 def add_separator_line(self) -> None: 

918 separator_line = self.SPACING.join( 

919 [ 

920 _put_str("-" * header_colwidth, gross_colwidth) 

921 for header_colwidth, gross_colwidth in zip( 

922 self.header_column_widths, self.gross_column_widths 

923 ) 

924 ] 

925 ) 

926 self._lines.append(separator_line) 

927 

928 def add_body_lines(self) -> None: 

929 for row in self.strrows: 

930 body_line = self.SPACING.join( 

931 [ 

932 _put_str(col, gross_colwidth) 

933 for col, gross_colwidth in zip(row, self.gross_column_widths) 

934 ] 

935 ) 

936 self._lines.append(body_line) 

937 

938 def _gen_non_null_counts(self) -> Iterator[str]: 

939 """Iterator with string representation of non-null counts.""" 

940 for count in self.non_null_counts: 

941 yield f"{count} non-null" 

942 

943 def _gen_dtypes(self) -> Iterator[str]: 

944 """Iterator with string representation of column dtypes.""" 

945 for dtype in self.dtypes: 

946 yield pprint_thing(dtype) 

947 

948 

949class DataFrameTableBuilderVerbose(DataFrameTableBuilder, TableBuilderVerboseMixin): 

950 """ 

951 Dataframe info table builder for verbose output. 

952 """ 

953 

954 def __init__( 

955 self, 

956 *, 

957 info: DataFrameInfo, 

958 with_counts: bool, 

959 ) -> None: 

960 self.info = info 

961 self.with_counts = with_counts 

962 self.strrows: Sequence[Sequence[str]] = list(self._gen_rows()) 

963 self.gross_column_widths: Sequence[int] = self._get_gross_column_widths() 

964 

965 def _fill_non_empty_info(self) -> None: 

966 """Add lines to the info table, pertaining to non-empty dataframe.""" 

967 self.add_object_type_line() 

968 self.add_index_range_line() 

969 self.add_columns_summary_line() 

970 self.add_header_line() 

971 self.add_separator_line() 

972 self.add_body_lines() 

973 self.add_dtypes_line() 

974 if self.display_memory_usage: 

975 self.add_memory_usage_line() 

976 

977 @property 

978 def headers(self) -> Sequence[str]: 

979 """Headers names of the columns in verbose table.""" 

980 if self.with_counts: 

981 return [" # ", "Column", "Non-Null Count", "Dtype"] 

982 return [" # ", "Column", "Dtype"] 

983 

984 def add_columns_summary_line(self) -> None: 

985 self._lines.append(f"Data columns (total {self.col_count} columns):") 

986 

987 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]: 

988 """Iterator with string representation of body data without counts.""" 

989 yield from zip( 

990 self._gen_line_numbers(), 

991 self._gen_columns(), 

992 self._gen_dtypes(), 

993 ) 

994 

995 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]: 

996 """Iterator with string representation of body data with counts.""" 

997 yield from zip( 

998 self._gen_line_numbers(), 

999 self._gen_columns(), 

1000 self._gen_non_null_counts(), 

1001 self._gen_dtypes(), 

1002 ) 

1003 

1004 def _gen_line_numbers(self) -> Iterator[str]: 

1005 """Iterator with string representation of column numbers.""" 

1006 for i, _ in enumerate(self.ids): 

1007 yield f" {i}" 

1008 

1009 def _gen_columns(self) -> Iterator[str]: 

1010 """Iterator with string representation of column names.""" 

1011 for col in self.ids: 

1012 yield pprint_thing(col) 

1013 

1014 

1015class SeriesTableBuilder(TableBuilderAbstract): 

1016 """ 

1017 Abstract builder for series info table. 

1018 

1019 Parameters 

1020 ---------- 

1021 info : SeriesInfo. 

1022 Instance of SeriesInfo. 

1023 """ 

1024 

1025 def __init__(self, *, info: SeriesInfo) -> None: 

1026 self.info: SeriesInfo = info 

1027 

1028 def get_lines(self) -> list[str]: 

1029 self._lines = [] 

1030 self._fill_non_empty_info() 

1031 return self._lines 

1032 

1033 @property 

1034 def data(self) -> Series: 

1035 """Series.""" 

1036 return self.info.data 

1037 

1038 def add_memory_usage_line(self) -> None: 

1039 """Add line containing memory usage.""" 

1040 self._lines.append(f"memory usage: {self.memory_usage_string}") 

1041 

1042 @abstractmethod 

1043 def _fill_non_empty_info(self) -> None: 

1044 """Add lines to the info table, pertaining to non-empty series.""" 

1045 

1046 

1047class SeriesTableBuilderNonVerbose(SeriesTableBuilder): 

1048 """ 

1049 Series info table builder for non-verbose output. 

1050 """ 

1051 

1052 def _fill_non_empty_info(self) -> None: 

1053 """Add lines to the info table, pertaining to non-empty series.""" 

1054 self.add_object_type_line() 

1055 self.add_index_range_line() 

1056 self.add_dtypes_line() 

1057 if self.display_memory_usage: 

1058 self.add_memory_usage_line() 

1059 

1060 

1061class SeriesTableBuilderVerbose(SeriesTableBuilder, TableBuilderVerboseMixin): 

1062 """ 

1063 Series info table builder for verbose output. 

1064 """ 

1065 

1066 def __init__( 

1067 self, 

1068 *, 

1069 info: SeriesInfo, 

1070 with_counts: bool, 

1071 ) -> None: 

1072 self.info = info 

1073 self.with_counts = with_counts 

1074 self.strrows: Sequence[Sequence[str]] = list(self._gen_rows()) 

1075 self.gross_column_widths: Sequence[int] = self._get_gross_column_widths() 

1076 

1077 def _fill_non_empty_info(self) -> None: 

1078 """Add lines to the info table, pertaining to non-empty series.""" 

1079 self.add_object_type_line() 

1080 self.add_index_range_line() 

1081 self.add_series_name_line() 

1082 self.add_header_line() 

1083 self.add_separator_line() 

1084 self.add_body_lines() 

1085 self.add_dtypes_line() 

1086 if self.display_memory_usage: 

1087 self.add_memory_usage_line() 

1088 

1089 def add_series_name_line(self) -> None: 

1090 self._lines.append(f"Series name: {self.data.name}") 

1091 

1092 @property 

1093 def headers(self) -> Sequence[str]: 

1094 """Headers names of the columns in verbose table.""" 

1095 if self.with_counts: 

1096 return ["Non-Null Count", "Dtype"] 

1097 return ["Dtype"] 

1098 

1099 def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]: 

1100 """Iterator with string representation of body data without counts.""" 

1101 yield from self._gen_dtypes() 

1102 

1103 def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]: 

1104 """Iterator with string representation of body data with counts.""" 

1105 yield from zip( 

1106 self._gen_non_null_counts(), 

1107 self._gen_dtypes(), 

1108 ) 

1109 

1110 

1111def _get_dataframe_dtype_counts(df: DataFrame) -> Mapping[str, int]: 

1112 """ 

1113 Create mapping between datatypes and their number of occurrences. 

1114 """ 

1115 # groupby dtype.name to collect e.g. Categorical columns 

1116 return df.dtypes.value_counts().groupby(lambda x: x.name).sum()