Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/plotting/_core.py: 21%

206 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3import importlib 

4import itertools 

5import types 

6from typing import ( 

7 TYPE_CHECKING, 

8 Sequence, 

9) 

10import warnings 

11 

12from pandas._config import get_option 

13 

14from pandas._typing import IndexLabel 

15from pandas.util._decorators import ( 

16 Appender, 

17 Substitution, 

18) 

19from pandas.util._exceptions import find_stack_level 

20 

21from pandas.core.dtypes.common import ( 

22 is_integer, 

23 is_list_like, 

24) 

25from pandas.core.dtypes.generic import ( 

26 ABCDataFrame, 

27 ABCSeries, 

28) 

29 

30from pandas.core.base import PandasObject 

31 

32if TYPE_CHECKING: 32 ↛ 33line 32 didn't jump to line 33, because the condition on line 32 was never true

33 from matplotlib.axes import Axes 

34 

35 from pandas import DataFrame 

36 

37 

38def hist_series( 

39 self, 

40 by=None, 

41 ax=None, 

42 grid: bool = True, 

43 xlabelsize: int | None = None, 

44 xrot: float | None = None, 

45 ylabelsize: int | None = None, 

46 yrot: float | None = None, 

47 figsize: tuple[int, int] | None = None, 

48 bins: int | Sequence[int] = 10, 

49 backend: str | None = None, 

50 legend: bool = False, 

51 **kwargs, 

52): 

53 """ 

54 Draw histogram of the input series using matplotlib. 

55 

56 Parameters 

57 ---------- 

58 by : object, optional 

59 If passed, then used to form histograms for separate groups. 

60 ax : matplotlib axis object 

61 If not passed, uses gca(). 

62 grid : bool, default True 

63 Whether to show axis grid lines. 

64 xlabelsize : int, default None 

65 If specified changes the x-axis label size. 

66 xrot : float, default None 

67 Rotation of x axis labels. 

68 ylabelsize : int, default None 

69 If specified changes the y-axis label size. 

70 yrot : float, default None 

71 Rotation of y axis labels. 

72 figsize : tuple, default None 

73 Figure size in inches by default. 

74 bins : int or sequence, default 10 

75 Number of histogram bins to be used. If an integer is given, bins + 1 

76 bin edges are calculated and returned. If bins is a sequence, gives 

77 bin edges, including left edge of first bin and right edge of last 

78 bin. In this case, bins is returned unmodified. 

79 backend : str, default None 

80 Backend to use instead of the backend specified in the option 

81 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to 

82 specify the ``plotting.backend`` for the whole session, set 

83 ``pd.options.plotting.backend``. 

84 

85 .. versionadded:: 1.0.0 

86 

87 legend : bool, default False 

88 Whether to show the legend. 

89 

90 .. versionadded:: 1.1.0 

91 

92 **kwargs 

93 To be passed to the actual plotting function. 

94 

95 Returns 

96 ------- 

97 matplotlib.AxesSubplot 

98 A histogram plot. 

99 

100 See Also 

101 -------- 

102 matplotlib.axes.Axes.hist : Plot a histogram using matplotlib. 

103 """ 

104 plot_backend = _get_plot_backend(backend) 

105 return plot_backend.hist_series( 

106 self, 

107 by=by, 

108 ax=ax, 

109 grid=grid, 

110 xlabelsize=xlabelsize, 

111 xrot=xrot, 

112 ylabelsize=ylabelsize, 

113 yrot=yrot, 

114 figsize=figsize, 

115 bins=bins, 

116 legend=legend, 

117 **kwargs, 

118 ) 

119 

120 

121def hist_frame( 

122 data: DataFrame, 

123 column: IndexLabel = None, 

124 by=None, 

125 grid: bool = True, 

126 xlabelsize: int | None = None, 

127 xrot: float | None = None, 

128 ylabelsize: int | None = None, 

129 yrot: float | None = None, 

130 ax=None, 

131 sharex: bool = False, 

132 sharey: bool = False, 

133 figsize: tuple[int, int] | None = None, 

134 layout: tuple[int, int] | None = None, 

135 bins: int | Sequence[int] = 10, 

136 backend: str | None = None, 

137 legend: bool = False, 

138 **kwargs, 

139): 

140 """ 

141 Make a histogram of the DataFrame's columns. 

142 

143 A `histogram`_ is a representation of the distribution of data. 

144 This function calls :meth:`matplotlib.pyplot.hist`, on each series in 

145 the DataFrame, resulting in one histogram per column. 

146 

147 .. _histogram: https://en.wikipedia.org/wiki/Histogram 

148 

149 Parameters 

150 ---------- 

151 data : DataFrame 

152 The pandas object holding the data. 

153 column : str or sequence, optional 

154 If passed, will be used to limit data to a subset of columns. 

155 by : object, optional 

156 If passed, then used to form histograms for separate groups. 

157 grid : bool, default True 

158 Whether to show axis grid lines. 

159 xlabelsize : int, default None 

160 If specified changes the x-axis label size. 

161 xrot : float, default None 

162 Rotation of x axis labels. For example, a value of 90 displays the 

163 x labels rotated 90 degrees clockwise. 

164 ylabelsize : int, default None 

165 If specified changes the y-axis label size. 

166 yrot : float, default None 

167 Rotation of y axis labels. For example, a value of 90 displays the 

168 y labels rotated 90 degrees clockwise. 

169 ax : Matplotlib axes object, default None 

170 The axes to plot the histogram on. 

171 sharex : bool, default True if ax is None else False 

172 In case subplots=True, share x axis and set some x axis labels to 

173 invisible; defaults to True if ax is None otherwise False if an ax 

174 is passed in. 

175 Note that passing in both an ax and sharex=True will alter all x axis 

176 labels for all subplots in a figure. 

177 sharey : bool, default False 

178 In case subplots=True, share y axis and set some y axis labels to 

179 invisible. 

180 figsize : tuple, optional 

181 The size in inches of the figure to create. Uses the value in 

182 `matplotlib.rcParams` by default. 

183 layout : tuple, optional 

184 Tuple of (rows, columns) for the layout of the histograms. 

185 bins : int or sequence, default 10 

186 Number of histogram bins to be used. If an integer is given, bins + 1 

187 bin edges are calculated and returned. If bins is a sequence, gives 

188 bin edges, including left edge of first bin and right edge of last 

189 bin. In this case, bins is returned unmodified. 

190 

191 backend : str, default None 

192 Backend to use instead of the backend specified in the option 

193 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to 

194 specify the ``plotting.backend`` for the whole session, set 

195 ``pd.options.plotting.backend``. 

196 

197 .. versionadded:: 1.0.0 

198 

199 legend : bool, default False 

200 Whether to show the legend. 

201 

202 .. versionadded:: 1.1.0 

203 

204 **kwargs 

205 All other plotting keyword arguments to be passed to 

206 :meth:`matplotlib.pyplot.hist`. 

207 

208 Returns 

209 ------- 

210 matplotlib.AxesSubplot or numpy.ndarray of them 

211 

212 See Also 

213 -------- 

214 matplotlib.pyplot.hist : Plot a histogram using matplotlib. 

215 

216 Examples 

217 -------- 

218 This example draws a histogram based on the length and width of 

219 some animals, displayed in three bins 

220 

221 .. plot:: 

222 :context: close-figs 

223 

224 >>> df = pd.DataFrame({ 

225 ... 'length': [1.5, 0.5, 1.2, 0.9, 3], 

226 ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1] 

227 ... }, index=['pig', 'rabbit', 'duck', 'chicken', 'horse']) 

228 >>> hist = df.hist(bins=3) 

229 """ 

230 plot_backend = _get_plot_backend(backend) 

231 return plot_backend.hist_frame( 

232 data, 

233 column=column, 

234 by=by, 

235 grid=grid, 

236 xlabelsize=xlabelsize, 

237 xrot=xrot, 

238 ylabelsize=ylabelsize, 

239 yrot=yrot, 

240 ax=ax, 

241 sharex=sharex, 

242 sharey=sharey, 

243 figsize=figsize, 

244 layout=layout, 

245 legend=legend, 

246 bins=bins, 

247 **kwargs, 

248 ) 

249 

250 

251_boxplot_doc = """ 

252Make a box plot from DataFrame columns. 

253 

254Make a box-and-whisker plot from DataFrame columns, optionally grouped 

255by some other columns. A box plot is a method for graphically depicting 

256groups of numerical data through their quartiles. 

257The box extends from the Q1 to Q3 quartile values of the data, 

258with a line at the median (Q2). The whiskers extend from the edges 

259of box to show the range of the data. By default, they extend no more than 

260`1.5 * IQR (IQR = Q3 - Q1)` from the edges of the box, ending at the farthest 

261data point within that interval. Outliers are plotted as separate dots. 

262 

263For further details see 

264Wikipedia's entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`_. 

265 

266Parameters 

267---------- 

268column : str or list of str, optional 

269 Column name or list of names, or vector. 

270 Can be any valid input to :meth:`pandas.DataFrame.groupby`. 

271by : str or array-like, optional 

272 Column in the DataFrame to :meth:`pandas.DataFrame.groupby`. 

273 One box-plot will be done per value of columns in `by`. 

274ax : object of class matplotlib.axes.Axes, optional 

275 The matplotlib axes to be used by boxplot. 

276fontsize : float or str 

277 Tick label font size in points or as a string (e.g., `large`). 

278rot : int or float, default 0 

279 The rotation angle of labels (in degrees) 

280 with respect to the screen coordinate system. 

281grid : bool, default True 

282 Setting this to True will show the grid. 

283figsize : A tuple (width, height) in inches 

284 The size of the figure to create in matplotlib. 

285layout : tuple (rows, columns), optional 

286 For example, (3, 5) will display the subplots 

287 using 3 columns and 5 rows, starting from the top-left. 

288return_type : {'axes', 'dict', 'both'} or None, default 'axes' 

289 The kind of object to return. The default is ``axes``. 

290 

291 * 'axes' returns the matplotlib axes the boxplot is drawn on. 

292 * 'dict' returns a dictionary whose values are the matplotlib 

293 Lines of the boxplot. 

294 * 'both' returns a namedtuple with the axes and dict. 

295 * when grouping with ``by``, a Series mapping columns to 

296 ``return_type`` is returned. 

297 

298 If ``return_type`` is `None`, a NumPy array 

299 of axes with the same shape as ``layout`` is returned. 

300%(backend)s\ 

301 

302**kwargs 

303 All other plotting keyword arguments to be passed to 

304 :func:`matplotlib.pyplot.boxplot`. 

305 

306Returns 

307------- 

308result 

309 See Notes. 

310 

311See Also 

312-------- 

313Series.plot.hist: Make a histogram. 

314matplotlib.pyplot.boxplot : Matplotlib equivalent plot. 

315 

316Notes 

317----- 

318The return type depends on the `return_type` parameter: 

319 

320* 'axes' : object of class matplotlib.axes.Axes 

321* 'dict' : dict of matplotlib.lines.Line2D objects 

322* 'both' : a namedtuple with structure (ax, lines) 

323 

324For data grouped with ``by``, return a Series of the above or a numpy 

325array: 

326 

327* :class:`~pandas.Series` 

328* :class:`~numpy.array` (for ``return_type = None``) 

329 

330Use ``return_type='dict'`` when you want to tweak the appearance 

331of the lines after plotting. In this case a dict containing the Lines 

332making up the boxes, caps, fliers, medians, and whiskers is returned. 

333 

334Examples 

335-------- 

336 

337Boxplots can be created for every column in the dataframe 

338by ``df.boxplot()`` or indicating the columns to be used: 

339 

340.. plot:: 

341 :context: close-figs 

342 

343 >>> np.random.seed(1234) 

344 >>> df = pd.DataFrame(np.random.randn(10, 4), 

345 ... columns=['Col1', 'Col2', 'Col3', 'Col4']) 

346 >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3']) # doctest: +SKIP 

347 

348Boxplots of variables distributions grouped by the values of a third 

349variable can be created using the option ``by``. For instance: 

350 

351.. plot:: 

352 :context: close-figs 

353 

354 >>> df = pd.DataFrame(np.random.randn(10, 2), 

355 ... columns=['Col1', 'Col2']) 

356 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', 

357 ... 'B', 'B', 'B', 'B', 'B']) 

358 >>> boxplot = df.boxplot(by='X') 

359 

360A list of strings (i.e. ``['X', 'Y']``) can be passed to boxplot 

361in order to group the data by combination of the variables in the x-axis: 

362 

363.. plot:: 

364 :context: close-figs 

365 

366 >>> df = pd.DataFrame(np.random.randn(10, 3), 

367 ... columns=['Col1', 'Col2', 'Col3']) 

368 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', 

369 ... 'B', 'B', 'B', 'B', 'B']) 

370 >>> df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A', 

371 ... 'B', 'A', 'B', 'A', 'B']) 

372 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y']) 

373 

374The layout of boxplot can be adjusted giving a tuple to ``layout``: 

375 

376.. plot:: 

377 :context: close-figs 

378 

379 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', 

380 ... layout=(2, 1)) 

381 

382Additional formatting can be done to the boxplot, like suppressing the grid 

383(``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``) 

384or changing the fontsize (i.e. ``fontsize=15``): 

385 

386.. plot:: 

387 :context: close-figs 

388 

389 >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15) # doctest: +SKIP 

390 

391The parameter ``return_type`` can be used to select the type of element 

392returned by `boxplot`. When ``return_type='axes'`` is selected, 

393the matplotlib axes on which the boxplot is drawn are returned: 

394 

395 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], return_type='axes') 

396 >>> type(boxplot) 

397 <class 'matplotlib.axes._subplots.AxesSubplot'> 

398 

399When grouping with ``by``, a Series mapping columns to ``return_type`` 

400is returned: 

401 

402 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', 

403 ... return_type='axes') 

404 >>> type(boxplot) 

405 <class 'pandas.core.series.Series'> 

406 

407If ``return_type`` is `None`, a NumPy array of axes with the same shape 

408as ``layout`` is returned: 

409 

410 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', 

411 ... return_type=None) 

412 >>> type(boxplot) 

413 <class 'numpy.ndarray'> 

414""" 

415 

416_backend_doc = """\ 

417backend : str, default None 

418 Backend to use instead of the backend specified in the option 

419 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to 

420 specify the ``plotting.backend`` for the whole session, set 

421 ``pd.options.plotting.backend``. 

422 

423 .. versionadded:: 1.0.0 

424""" 

425 

426 

427_bar_or_line_doc = """ 

428 Parameters 

429 ---------- 

430 x : label or position, optional 

431 Allows plotting of one column versus another. If not specified, 

432 the index of the DataFrame is used. 

433 y : label or position, optional 

434 Allows plotting of one column versus another. If not specified, 

435 all numerical columns are used. 

436 color : str, array-like, or dict, optional 

437 The color for each of the DataFrame's columns. Possible values are: 

438 

439 - A single color string referred to by name, RGB or RGBA code, 

440 for instance 'red' or '#a98d19'. 

441 

442 - A sequence of color strings referred to by name, RGB or RGBA 

443 code, which will be used for each column recursively. For 

444 instance ['green','yellow'] each column's %(kind)s will be filled in 

445 green or yellow, alternatively. If there is only a single column to 

446 be plotted, then only the first color from the color list will be 

447 used. 

448 

449 - A dict of the form {column name : color}, so that each column will be 

450 colored accordingly. For example, if your columns are called `a` and 

451 `b`, then passing {'a': 'green', 'b': 'red'} will color %(kind)ss for 

452 column `a` in green and %(kind)ss for column `b` in red. 

453 

454 .. versionadded:: 1.1.0 

455 

456 **kwargs 

457 Additional keyword arguments are documented in 

458 :meth:`DataFrame.plot`. 

459 

460 Returns 

461 ------- 

462 matplotlib.axes.Axes or np.ndarray of them 

463 An ndarray is returned with one :class:`matplotlib.axes.Axes` 

464 per column when ``subplots=True``. 

465""" 

466 

467 

468@Substitution(backend="") 

469@Appender(_boxplot_doc) 

470def boxplot( 

471 data: DataFrame, 

472 column: str | list[str] | None = None, 

473 by: str | list[str] | None = None, 

474 ax: Axes | None = None, 

475 fontsize: float | str | None = None, 

476 rot: int = 0, 

477 grid: bool = True, 

478 figsize: tuple[float, float] | None = None, 

479 layout: tuple[int, int] | None = None, 

480 return_type: str | None = None, 

481 **kwargs, 

482): 

483 plot_backend = _get_plot_backend("matplotlib") 

484 return plot_backend.boxplot( 

485 data, 

486 column=column, 

487 by=by, 

488 ax=ax, 

489 fontsize=fontsize, 

490 rot=rot, 

491 grid=grid, 

492 figsize=figsize, 

493 layout=layout, 

494 return_type=return_type, 

495 **kwargs, 

496 ) 

497 

498 

499@Substitution(backend=_backend_doc) 

500@Appender(_boxplot_doc) 

501def boxplot_frame( 

502 self, 

503 column=None, 

504 by=None, 

505 ax=None, 

506 fontsize=None, 

507 rot: int = 0, 

508 grid: bool = True, 

509 figsize=None, 

510 layout=None, 

511 return_type=None, 

512 backend=None, 

513 **kwargs, 

514): 

515 plot_backend = _get_plot_backend(backend) 

516 return plot_backend.boxplot_frame( 

517 self, 

518 column=column, 

519 by=by, 

520 ax=ax, 

521 fontsize=fontsize, 

522 rot=rot, 

523 grid=grid, 

524 figsize=figsize, 

525 layout=layout, 

526 return_type=return_type, 

527 **kwargs, 

528 ) 

529 

530 

531def boxplot_frame_groupby( 

532 grouped, 

533 subplots: bool = True, 

534 column=None, 

535 fontsize=None, 

536 rot: int = 0, 

537 grid: bool = True, 

538 ax=None, 

539 figsize=None, 

540 layout=None, 

541 sharex: bool = False, 

542 sharey: bool = True, 

543 backend=None, 

544 **kwargs, 

545): 

546 """ 

547 Make box plots from DataFrameGroupBy data. 

548 

549 Parameters 

550 ---------- 

551 grouped : Grouped DataFrame 

552 subplots : bool 

553 * ``False`` - no subplots will be used 

554 * ``True`` - create a subplot for each group. 

555 

556 column : column name or list of names, or vector 

557 Can be any valid input to groupby. 

558 fontsize : int or str 

559 rot : label rotation angle 

560 grid : Setting this to True will show the grid 

561 ax : Matplotlib axis object, default None 

562 figsize : A tuple (width, height) in inches 

563 layout : tuple (optional) 

564 The layout of the plot: (rows, columns). 

565 sharex : bool, default False 

566 Whether x-axes will be shared among subplots. 

567 sharey : bool, default True 

568 Whether y-axes will be shared among subplots. 

569 backend : str, default None 

570 Backend to use instead of the backend specified in the option 

571 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to 

572 specify the ``plotting.backend`` for the whole session, set 

573 ``pd.options.plotting.backend``. 

574 

575 .. versionadded:: 1.0.0 

576 

577 **kwargs 

578 All other plotting keyword arguments to be passed to 

579 matplotlib's boxplot function. 

580 

581 Returns 

582 ------- 

583 dict of key/value = group key/DataFrame.boxplot return value 

584 or DataFrame.boxplot return value in case subplots=figures=False 

585 

586 Examples 

587 -------- 

588 You can create boxplots for grouped data and show them as separate subplots: 

589 

590 .. plot:: 

591 :context: close-figs 

592 

593 >>> import itertools 

594 >>> tuples = [t for t in itertools.product(range(1000), range(4))] 

595 >>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1']) 

596 >>> data = np.random.randn(len(index),4) 

597 >>> df = pd.DataFrame(data, columns=list('ABCD'), index=index) 

598 >>> grouped = df.groupby(level='lvl1') 

599 >>> grouped.boxplot(rot=45, fontsize=12, figsize=(8,10)) # doctest: +SKIP 

600 

601 The ``subplots=False`` option shows the boxplots in a single figure. 

602 

603 .. plot:: 

604 :context: close-figs 

605 

606 >>> grouped.boxplot(subplots=False, rot=45, fontsize=12) # doctest: +SKIP 

607 """ 

608 plot_backend = _get_plot_backend(backend) 

609 return plot_backend.boxplot_frame_groupby( 

610 grouped, 

611 subplots=subplots, 

612 column=column, 

613 fontsize=fontsize, 

614 rot=rot, 

615 grid=grid, 

616 ax=ax, 

617 figsize=figsize, 

618 layout=layout, 

619 sharex=sharex, 

620 sharey=sharey, 

621 **kwargs, 

622 ) 

623 

624 

625class PlotAccessor(PandasObject): 

626 """ 

627 Make plots of Series or DataFrame. 

628 

629 Uses the backend specified by the 

630 option ``plotting.backend``. By default, matplotlib is used. 

631 

632 Parameters 

633 ---------- 

634 data : Series or DataFrame 

635 The object for which the method is called. 

636 x : label or position, default None 

637 Only used if data is a DataFrame. 

638 y : label, position or list of label, positions, default None 

639 Allows plotting of one column versus another. Only used if data is a 

640 DataFrame. 

641 kind : str 

642 The kind of plot to produce: 

643 

644 - 'line' : line plot (default) 

645 - 'bar' : vertical bar plot 

646 - 'barh' : horizontal bar plot 

647 - 'hist' : histogram 

648 - 'box' : boxplot 

649 - 'kde' : Kernel Density Estimation plot 

650 - 'density' : same as 'kde' 

651 - 'area' : area plot 

652 - 'pie' : pie plot 

653 - 'scatter' : scatter plot (DataFrame only) 

654 - 'hexbin' : hexbin plot (DataFrame only) 

655 ax : matplotlib axes object, default None 

656 An axes of the current figure. 

657 subplots : bool or sequence of iterables, default False 

658 Whether to group columns into subplots: 

659 

660 - ``False`` : No subplots will be used 

661 - ``True`` : Make separate subplots for each column. 

662 - sequence of iterables of column labels: Create a subplot for each 

663 group of columns. For example `[('a', 'c'), ('b', 'd')]` will 

664 create 2 subplots: one with columns 'a' and 'c', and one 

665 with columns 'b' and 'd'. Remaining columns that aren't specified 

666 will be plotted in additional subplots (one per column). 

667 .. versionadded:: 1.5.0 

668 

669 sharex : bool, default True if ax is None else False 

670 In case ``subplots=True``, share x axis and set some x axis labels 

671 to invisible; defaults to True if ax is None otherwise False if 

672 an ax is passed in; Be aware, that passing in both an ax and 

673 ``sharex=True`` will alter all x axis labels for all axis in a figure. 

674 sharey : bool, default False 

675 In case ``subplots=True``, share y axis and set some y axis labels to invisible. 

676 layout : tuple, optional 

677 (rows, columns) for the layout of subplots. 

678 figsize : a tuple (width, height) in inches 

679 Size of a figure object. 

680 use_index : bool, default True 

681 Use index as ticks for x axis. 

682 title : str or list 

683 Title to use for the plot. If a string is passed, print the string 

684 at the top of the figure. If a list is passed and `subplots` is 

685 True, print each item in the list above the corresponding subplot. 

686 grid : bool, default None (matlab style default) 

687 Axis grid lines. 

688 legend : bool or {'reverse'} 

689 Place legend on axis subplots. 

690 style : list or dict 

691 The matplotlib line style per column. 

692 logx : bool or 'sym', default False 

693 Use log scaling or symlog scaling on x axis. 

694 .. versionchanged:: 0.25.0 

695 

696 logy : bool or 'sym' default False 

697 Use log scaling or symlog scaling on y axis. 

698 .. versionchanged:: 0.25.0 

699 

700 loglog : bool or 'sym', default False 

701 Use log scaling or symlog scaling on both x and y axes. 

702 .. versionchanged:: 0.25.0 

703 

704 xticks : sequence 

705 Values to use for the xticks. 

706 yticks : sequence 

707 Values to use for the yticks. 

708 xlim : 2-tuple/list 

709 Set the x limits of the current axes. 

710 ylim : 2-tuple/list 

711 Set the y limits of the current axes. 

712 xlabel : label, optional 

713 Name to use for the xlabel on x-axis. Default uses index name as xlabel, or the 

714 x-column name for planar plots. 

715 

716 .. versionadded:: 1.1.0 

717 

718 .. versionchanged:: 1.2.0 

719 

720 Now applicable to planar plots (`scatter`, `hexbin`). 

721 

722 ylabel : label, optional 

723 Name to use for the ylabel on y-axis. Default will show no ylabel, or the 

724 y-column name for planar plots. 

725 

726 .. versionadded:: 1.1.0 

727 

728 .. versionchanged:: 1.2.0 

729 

730 Now applicable to planar plots (`scatter`, `hexbin`). 

731 

732 rot : int, default None 

733 Rotation for ticks (xticks for vertical, yticks for horizontal 

734 plots). 

735 fontsize : int, default None 

736 Font size for xticks and yticks. 

737 colormap : str or matplotlib colormap object, default None 

738 Colormap to select colors from. If string, load colormap with that 

739 name from matplotlib. 

740 colorbar : bool, optional 

741 If True, plot colorbar (only relevant for 'scatter' and 'hexbin' 

742 plots). 

743 position : float 

744 Specify relative alignments for bar plot layout. 

745 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 

746 (center). 

747 table : bool, Series or DataFrame, default False 

748 If True, draw a table using the data in the DataFrame and the data 

749 will be transposed to meet matplotlib's default layout. 

750 If a Series or DataFrame is passed, use passed data to draw a 

751 table. 

752 yerr : DataFrame, Series, array-like, dict and str 

753 See :ref:`Plotting with Error Bars <visualization.errorbars>` for 

754 detail. 

755 xerr : DataFrame, Series, array-like, dict and str 

756 Equivalent to yerr. 

757 stacked : bool, default False in line and bar plots, and True in area plot 

758 If True, create stacked plot. 

759 sort_columns : bool, default False 

760 Sort column names to determine plot ordering. 

761 

762 .. deprecated:: 1.5.0 

763 The `sort_columns` arguments is deprecated and will be removed in a 

764 future version. 

765 

766 secondary_y : bool or sequence, default False 

767 Whether to plot on the secondary y-axis if a list/tuple, which 

768 columns to plot on secondary y-axis. 

769 mark_right : bool, default True 

770 When using a secondary_y axis, automatically mark the column 

771 labels with "(right)" in the legend. 

772 include_bool : bool, default is False 

773 If True, boolean values can be plotted. 

774 backend : str, default None 

775 Backend to use instead of the backend specified in the option 

776 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to 

777 specify the ``plotting.backend`` for the whole session, set 

778 ``pd.options.plotting.backend``. 

779 

780 .. versionadded:: 1.0.0 

781 

782 **kwargs 

783 Options to pass to matplotlib plotting method. 

784 

785 Returns 

786 ------- 

787 :class:`matplotlib.axes.Axes` or numpy.ndarray of them 

788 If the backend is not the default matplotlib one, the return value 

789 will be the object returned by the backend. 

790 

791 Notes 

792 ----- 

793 - See matplotlib documentation online for more on this subject 

794 - If `kind` = 'bar' or 'barh', you can specify relative alignments 

795 for bar plot layout by `position` keyword. 

796 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 

797 (center) 

798 """ 

799 

800 _common_kinds = ("line", "bar", "barh", "kde", "density", "area", "hist", "box") 

801 _series_kinds = ("pie",) 

802 _dataframe_kinds = ("scatter", "hexbin") 

803 _kind_aliases = {"density": "kde"} 

804 _all_kinds = _common_kinds + _series_kinds + _dataframe_kinds 

805 

806 def __init__(self, data) -> None: 

807 self._parent = data 

808 

809 @staticmethod 

810 def _get_call_args(backend_name, data, args, kwargs): 

811 """ 

812 This function makes calls to this accessor `__call__` method compatible 

813 with the previous `SeriesPlotMethods.__call__` and 

814 `DataFramePlotMethods.__call__`. Those had slightly different 

815 signatures, since `DataFramePlotMethods` accepted `x` and `y` 

816 parameters. 

817 """ 

818 if isinstance(data, ABCSeries): 

819 arg_def = [ 

820 ("kind", "line"), 

821 ("ax", None), 

822 ("figsize", None), 

823 ("use_index", True), 

824 ("title", None), 

825 ("grid", None), 

826 ("legend", False), 

827 ("style", None), 

828 ("logx", False), 

829 ("logy", False), 

830 ("loglog", False), 

831 ("xticks", None), 

832 ("yticks", None), 

833 ("xlim", None), 

834 ("ylim", None), 

835 ("rot", None), 

836 ("fontsize", None), 

837 ("colormap", None), 

838 ("table", False), 

839 ("yerr", None), 

840 ("xerr", None), 

841 ("label", None), 

842 ("secondary_y", False), 

843 ("xlabel", None), 

844 ("ylabel", None), 

845 ] 

846 elif isinstance(data, ABCDataFrame): 

847 arg_def = [ 

848 ("x", None), 

849 ("y", None), 

850 ("kind", "line"), 

851 ("ax", None), 

852 ("subplots", False), 

853 ("sharex", None), 

854 ("sharey", False), 

855 ("layout", None), 

856 ("figsize", None), 

857 ("use_index", True), 

858 ("title", None), 

859 ("grid", None), 

860 ("legend", True), 

861 ("style", None), 

862 ("logx", False), 

863 ("logy", False), 

864 ("loglog", False), 

865 ("xticks", None), 

866 ("yticks", None), 

867 ("xlim", None), 

868 ("ylim", None), 

869 ("rot", None), 

870 ("fontsize", None), 

871 ("colormap", None), 

872 ("table", False), 

873 ("yerr", None), 

874 ("xerr", None), 

875 ("secondary_y", False), 

876 ("sort_columns", False), 

877 ("xlabel", None), 

878 ("ylabel", None), 

879 ] 

880 else: 

881 raise TypeError( 

882 f"Called plot accessor for type {type(data).__name__}, " 

883 "expected Series or DataFrame" 

884 ) 

885 

886 if "sort_columns" in itertools.chain(args, kwargs.keys()): 

887 warnings.warn( 

888 "`sort_columns` is deprecated and will be removed in a future " 

889 "version.", 

890 FutureWarning, 

891 stacklevel=find_stack_level(), 

892 ) 

893 

894 if args and isinstance(data, ABCSeries): 

895 positional_args = str(args)[1:-1] 

896 keyword_args = ", ".join( 

897 [f"{name}={repr(value)}" for (name, _), value in zip(arg_def, args)] 

898 ) 

899 msg = ( 

900 "`Series.plot()` should not be called with positional " 

901 "arguments, only keyword arguments. The order of " 

902 "positional arguments will change in the future. " 

903 f"Use `Series.plot({keyword_args})` instead of " 

904 f"`Series.plot({positional_args})`." 

905 ) 

906 raise TypeError(msg) 

907 

908 pos_args = {name: value for (name, _), value in zip(arg_def, args)} 

909 if backend_name == "pandas.plotting._matplotlib": 

910 kwargs = dict(arg_def, **pos_args, **kwargs) 

911 else: 

912 kwargs = dict(pos_args, **kwargs) 

913 

914 x = kwargs.pop("x", None) 

915 y = kwargs.pop("y", None) 

916 kind = kwargs.pop("kind", "line") 

917 return x, y, kind, kwargs 

918 

919 def __call__(self, *args, **kwargs): 

920 plot_backend = _get_plot_backend(kwargs.pop("backend", None)) 

921 

922 x, y, kind, kwargs = self._get_call_args( 

923 plot_backend.__name__, self._parent, args, kwargs 

924 ) 

925 

926 kind = self._kind_aliases.get(kind, kind) 

927 

928 # when using another backend, get out of the way 

929 if plot_backend.__name__ != "pandas.plotting._matplotlib": 

930 return plot_backend.plot(self._parent, x=x, y=y, kind=kind, **kwargs) 

931 

932 if kind not in self._all_kinds: 

933 raise ValueError(f"{kind} is not a valid plot kind") 

934 

935 # The original data structured can be transformed before passed to the 

936 # backend. For example, for DataFrame is common to set the index as the 

937 # `x` parameter, and return a Series with the parameter `y` as values. 

938 data = self._parent.copy() 

939 

940 if isinstance(data, ABCSeries): 

941 kwargs["reuse_plot"] = True 

942 

943 if kind in self._dataframe_kinds: 

944 if isinstance(data, ABCDataFrame): 

945 return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs) 

946 else: 

947 raise ValueError(f"plot kind {kind} can only be used for data frames") 

948 elif kind in self._series_kinds: 

949 if isinstance(data, ABCDataFrame): 

950 if y is None and kwargs.get("subplots") is False: 

951 raise ValueError( 

952 f"{kind} requires either y column or 'subplots=True'" 

953 ) 

954 elif y is not None: 

955 if is_integer(y) and not data.columns.holds_integer(): 

956 y = data.columns[y] 

957 # converted to series actually. copy to not modify 

958 data = data[y].copy() 

959 data.index.name = y 

960 elif isinstance(data, ABCDataFrame): 

961 data_cols = data.columns 

962 if x is not None: 

963 if is_integer(x) and not data.columns.holds_integer(): 

964 x = data_cols[x] 

965 elif not isinstance(data[x], ABCSeries): 

966 raise ValueError("x must be a label or position") 

967 data = data.set_index(x) 

968 if y is not None: 

969 # check if we have y as int or list of ints 

970 int_ylist = is_list_like(y) and all(is_integer(c) for c in y) 

971 int_y_arg = is_integer(y) or int_ylist 

972 if int_y_arg and not data.columns.holds_integer(): 

973 y = data_cols[y] 

974 

975 label_kw = kwargs["label"] if "label" in kwargs else False 

976 for kw in ["xerr", "yerr"]: 

977 if kw in kwargs and ( 

978 isinstance(kwargs[kw], str) or is_integer(kwargs[kw]) 

979 ): 

980 try: 

981 kwargs[kw] = data[kwargs[kw]] 

982 except (IndexError, KeyError, TypeError): 

983 pass 

984 

985 # don't overwrite 

986 data = data[y].copy() 

987 

988 if isinstance(data, ABCSeries): 

989 label_name = label_kw or y 

990 data.name = label_name 

991 else: 

992 match = is_list_like(label_kw) and len(label_kw) == len(y) 

993 if label_kw and not match: 

994 raise ValueError( 

995 "label should be list-like and same length as y" 

996 ) 

997 label_name = label_kw or data.columns 

998 data.columns = label_name 

999 

1000 return plot_backend.plot(data, kind=kind, **kwargs) 

1001 

1002 __call__.__doc__ = __doc__ 

1003 

1004 @Appender( 

1005 """ 

1006 See Also 

1007 -------- 

1008 matplotlib.pyplot.plot : Plot y versus x as lines and/or markers. 

1009 

1010 Examples 

1011 -------- 

1012 

1013 .. plot:: 

1014 :context: close-figs 

1015 

1016 >>> s = pd.Series([1, 3, 2]) 

1017 >>> s.plot.line() 

1018 <AxesSubplot: ylabel='Density'> 

1019 

1020 .. plot:: 

1021 :context: close-figs 

1022 

1023 The following example shows the populations for some animals 

1024 over the years. 

1025 

1026 >>> df = pd.DataFrame({ 

1027 ... 'pig': [20, 18, 489, 675, 1776], 

1028 ... 'horse': [4, 25, 281, 600, 1900] 

1029 ... }, index=[1990, 1997, 2003, 2009, 2014]) 

1030 >>> lines = df.plot.line() 

1031 

1032 .. plot:: 

1033 :context: close-figs 

1034 

1035 An example with subplots, so an array of axes is returned. 

1036 

1037 >>> axes = df.plot.line(subplots=True) 

1038 >>> type(axes) 

1039 <class 'numpy.ndarray'> 

1040 

1041 .. plot:: 

1042 :context: close-figs 

1043 

1044 Let's repeat the same example, but specifying colors for 

1045 each column (in this case, for each animal). 

1046 

1047 >>> axes = df.plot.line( 

1048 ... subplots=True, color={"pig": "pink", "horse": "#742802"} 

1049 ... ) 

1050 

1051 .. plot:: 

1052 :context: close-figs 

1053 

1054 The following example shows the relationship between both 

1055 populations. 

1056 

1057 >>> lines = df.plot.line(x='pig', y='horse') 

1058 """ 

1059 ) 

1060 @Substitution(kind="line") 

1061 @Appender(_bar_or_line_doc) 

1062 def line(self, x=None, y=None, **kwargs) -> PlotAccessor: 

1063 """ 

1064 Plot Series or DataFrame as lines. 

1065 

1066 This function is useful to plot lines using DataFrame's values 

1067 as coordinates. 

1068 """ 

1069 return self(kind="line", x=x, y=y, **kwargs) 

1070 

1071 @Appender( 

1072 """ 

1073 See Also 

1074 -------- 

1075 DataFrame.plot.barh : Horizontal bar plot. 

1076 DataFrame.plot : Make plots of a DataFrame. 

1077 matplotlib.pyplot.bar : Make a bar plot with matplotlib. 

1078 

1079 Examples 

1080 -------- 

1081 Basic plot. 

1082 

1083 .. plot:: 

1084 :context: close-figs 

1085 

1086 >>> df = pd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]}) 

1087 >>> ax = df.plot.bar(x='lab', y='val', rot=0) 

1088 

1089 Plot a whole dataframe to a bar plot. Each column is assigned a 

1090 distinct color, and each row is nested in a group along the 

1091 horizontal axis. 

1092 

1093 .. plot:: 

1094 :context: close-figs 

1095 

1096 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] 

1097 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] 

1098 >>> index = ['snail', 'pig', 'elephant', 

1099 ... 'rabbit', 'giraffe', 'coyote', 'horse'] 

1100 >>> df = pd.DataFrame({'speed': speed, 

1101 ... 'lifespan': lifespan}, index=index) 

1102 >>> ax = df.plot.bar(rot=0) 

1103 

1104 Plot stacked bar charts for the DataFrame 

1105 

1106 .. plot:: 

1107 :context: close-figs 

1108 

1109 >>> ax = df.plot.bar(stacked=True) 

1110 

1111 Instead of nesting, the figure can be split by column with 

1112 ``subplots=True``. In this case, a :class:`numpy.ndarray` of 

1113 :class:`matplotlib.axes.Axes` are returned. 

1114 

1115 .. plot:: 

1116 :context: close-figs 

1117 

1118 >>> axes = df.plot.bar(rot=0, subplots=True) 

1119 >>> axes[1].legend(loc=2) # doctest: +SKIP 

1120 

1121 If you don't like the default colours, you can specify how you'd 

1122 like each column to be colored. 

1123 

1124 .. plot:: 

1125 :context: close-figs 

1126 

1127 >>> axes = df.plot.bar( 

1128 ... rot=0, subplots=True, color={"speed": "red", "lifespan": "green"} 

1129 ... ) 

1130 >>> axes[1].legend(loc=2) # doctest: +SKIP 

1131 

1132 Plot a single column. 

1133 

1134 .. plot:: 

1135 :context: close-figs 

1136 

1137 >>> ax = df.plot.bar(y='speed', rot=0) 

1138 

1139 Plot only selected categories for the DataFrame. 

1140 

1141 .. plot:: 

1142 :context: close-figs 

1143 

1144 >>> ax = df.plot.bar(x='lifespan', rot=0) 

1145 """ 

1146 ) 

1147 @Substitution(kind="bar") 

1148 @Appender(_bar_or_line_doc) 

1149 def bar(self, x=None, y=None, **kwargs) -> PlotAccessor: 

1150 """ 

1151 Vertical bar plot. 

1152 

1153 A bar plot is a plot that presents categorical data with 

1154 rectangular bars with lengths proportional to the values that they 

1155 represent. A bar plot shows comparisons among discrete categories. One 

1156 axis of the plot shows the specific categories being compared, and the 

1157 other axis represents a measured value. 

1158 """ 

1159 return self(kind="bar", x=x, y=y, **kwargs) 

1160 

1161 @Appender( 

1162 """ 

1163 See Also 

1164 -------- 

1165 DataFrame.plot.bar: Vertical bar plot. 

1166 DataFrame.plot : Make plots of DataFrame using matplotlib. 

1167 matplotlib.axes.Axes.bar : Plot a vertical bar plot using matplotlib. 

1168 

1169 Examples 

1170 -------- 

1171 Basic example 

1172 

1173 .. plot:: 

1174 :context: close-figs 

1175 

1176 >>> df = pd.DataFrame({'lab': ['A', 'B', 'C'], 'val': [10, 30, 20]}) 

1177 >>> ax = df.plot.barh(x='lab', y='val') 

1178 

1179 Plot a whole DataFrame to a horizontal bar plot 

1180 

1181 .. plot:: 

1182 :context: close-figs 

1183 

1184 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] 

1185 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] 

1186 >>> index = ['snail', 'pig', 'elephant', 

1187 ... 'rabbit', 'giraffe', 'coyote', 'horse'] 

1188 >>> df = pd.DataFrame({'speed': speed, 

1189 ... 'lifespan': lifespan}, index=index) 

1190 >>> ax = df.plot.barh() 

1191 

1192 Plot stacked barh charts for the DataFrame 

1193 

1194 .. plot:: 

1195 :context: close-figs 

1196 

1197 >>> ax = df.plot.barh(stacked=True) 

1198 

1199 We can specify colors for each column 

1200 

1201 .. plot:: 

1202 :context: close-figs 

1203 

1204 >>> ax = df.plot.barh(color={"speed": "red", "lifespan": "green"}) 

1205 

1206 Plot a column of the DataFrame to a horizontal bar plot 

1207 

1208 .. plot:: 

1209 :context: close-figs 

1210 

1211 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] 

1212 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] 

1213 >>> index = ['snail', 'pig', 'elephant', 

1214 ... 'rabbit', 'giraffe', 'coyote', 'horse'] 

1215 >>> df = pd.DataFrame({'speed': speed, 

1216 ... 'lifespan': lifespan}, index=index) 

1217 >>> ax = df.plot.barh(y='speed') 

1218 

1219 Plot DataFrame versus the desired column 

1220 

1221 .. plot:: 

1222 :context: close-figs 

1223 

1224 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] 

1225 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] 

1226 >>> index = ['snail', 'pig', 'elephant', 

1227 ... 'rabbit', 'giraffe', 'coyote', 'horse'] 

1228 >>> df = pd.DataFrame({'speed': speed, 

1229 ... 'lifespan': lifespan}, index=index) 

1230 >>> ax = df.plot.barh(x='lifespan') 

1231 """ 

1232 ) 

1233 @Substitution(kind="bar") 

1234 @Appender(_bar_or_line_doc) 

1235 def barh(self, x=None, y=None, **kwargs) -> PlotAccessor: 

1236 """ 

1237 Make a horizontal bar plot. 

1238 

1239 A horizontal bar plot is a plot that presents quantitative data with 

1240 rectangular bars with lengths proportional to the values that they 

1241 represent. A bar plot shows comparisons among discrete categories. One 

1242 axis of the plot shows the specific categories being compared, and the 

1243 other axis represents a measured value. 

1244 """ 

1245 return self(kind="barh", x=x, y=y, **kwargs) 

1246 

1247 def box(self, by=None, **kwargs) -> PlotAccessor: 

1248 r""" 

1249 Make a box plot of the DataFrame columns. 

1250 

1251 A box plot is a method for graphically depicting groups of numerical 

1252 data through their quartiles. 

1253 The box extends from the Q1 to Q3 quartile values of the data, 

1254 with a line at the median (Q2). The whiskers extend from the edges 

1255 of box to show the range of the data. The position of the whiskers 

1256 is set by default to 1.5*IQR (IQR = Q3 - Q1) from the edges of the 

1257 box. Outlier points are those past the end of the whiskers. 

1258 

1259 For further details see Wikipedia's 

1260 entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`__. 

1261 

1262 A consideration when using this chart is that the box and the whiskers 

1263 can overlap, which is very common when plotting small sets of data. 

1264 

1265 Parameters 

1266 ---------- 

1267 by : str or sequence 

1268 Column in the DataFrame to group by. 

1269 

1270 .. versionchanged:: 1.4.0 

1271 

1272 Previously, `by` is silently ignore and makes no groupings 

1273 

1274 **kwargs 

1275 Additional keywords are documented in 

1276 :meth:`DataFrame.plot`. 

1277 

1278 Returns 

1279 ------- 

1280 :class:`matplotlib.axes.Axes` or numpy.ndarray of them 

1281 

1282 See Also 

1283 -------- 

1284 DataFrame.boxplot: Another method to draw a box plot. 

1285 Series.plot.box: Draw a box plot from a Series object. 

1286 matplotlib.pyplot.boxplot: Draw a box plot in matplotlib. 

1287 

1288 Examples 

1289 -------- 

1290 Draw a box plot from a DataFrame with four columns of randomly 

1291 generated data. 

1292 

1293 .. plot:: 

1294 :context: close-figs 

1295 

1296 >>> data = np.random.randn(25, 4) 

1297 >>> df = pd.DataFrame(data, columns=list('ABCD')) 

1298 >>> ax = df.plot.box() 

1299 

1300 You can also generate groupings if you specify the `by` parameter (which 

1301 can take a column name, or a list or tuple of column names): 

1302 

1303 .. versionchanged:: 1.4.0 

1304 

1305 .. plot:: 

1306 :context: close-figs 

1307 

1308 >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85] 

1309 >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list}) 

1310 >>> ax = df.plot.box(column="age", by="gender", figsize=(10, 8)) 

1311 """ 

1312 return self(kind="box", by=by, **kwargs) 

1313 

1314 def hist(self, by=None, bins: int = 10, **kwargs) -> PlotAccessor: 

1315 """ 

1316 Draw one histogram of the DataFrame's columns. 

1317 

1318 A histogram is a representation of the distribution of data. 

1319 This function groups the values of all given Series in the DataFrame 

1320 into bins and draws all bins in one :class:`matplotlib.axes.Axes`. 

1321 This is useful when the DataFrame's Series are in a similar scale. 

1322 

1323 Parameters 

1324 ---------- 

1325 by : str or sequence, optional 

1326 Column in the DataFrame to group by. 

1327 

1328 .. versionchanged:: 1.4.0 

1329 

1330 Previously, `by` is silently ignore and makes no groupings 

1331 

1332 bins : int, default 10 

1333 Number of histogram bins to be used. 

1334 **kwargs 

1335 Additional keyword arguments are documented in 

1336 :meth:`DataFrame.plot`. 

1337 

1338 Returns 

1339 ------- 

1340 class:`matplotlib.AxesSubplot` 

1341 Return a histogram plot. 

1342 

1343 See Also 

1344 -------- 

1345 DataFrame.hist : Draw histograms per DataFrame's Series. 

1346 Series.hist : Draw a histogram with Series' data. 

1347 

1348 Examples 

1349 -------- 

1350 When we roll a die 6000 times, we expect to get each value around 1000 

1351 times. But when we roll two dice and sum the result, the distribution 

1352 is going to be quite different. A histogram illustrates those 

1353 distributions. 

1354 

1355 .. plot:: 

1356 :context: close-figs 

1357 

1358 >>> df = pd.DataFrame( 

1359 ... np.random.randint(1, 7, 6000), 

1360 ... columns = ['one']) 

1361 >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000) 

1362 >>> ax = df.plot.hist(bins=12, alpha=0.5) 

1363 

1364 A grouped histogram can be generated by providing the parameter `by` (which 

1365 can be a column name, or a list of column names): 

1366 

1367 .. plot:: 

1368 :context: close-figs 

1369 

1370 >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85] 

1371 >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list}) 

1372 >>> ax = df.plot.hist(column=["age"], by="gender", figsize=(10, 8)) 

1373 """ 

1374 return self(kind="hist", by=by, bins=bins, **kwargs) 

1375 

1376 def kde(self, bw_method=None, ind=None, **kwargs) -> PlotAccessor: 

1377 """ 

1378 Generate Kernel Density Estimate plot using Gaussian kernels. 

1379 

1380 In statistics, `kernel density estimation`_ (KDE) is a non-parametric 

1381 way to estimate the probability density function (PDF) of a random 

1382 variable. This function uses Gaussian kernels and includes automatic 

1383 bandwidth determination. 

1384 

1385 .. _kernel density estimation: 

1386 https://en.wikipedia.org/wiki/Kernel_density_estimation 

1387 

1388 Parameters 

1389 ---------- 

1390 bw_method : str, scalar or callable, optional 

1391 The method used to calculate the estimator bandwidth. This can be 

1392 'scott', 'silverman', a scalar constant or a callable. 

1393 If None (default), 'scott' is used. 

1394 See :class:`scipy.stats.gaussian_kde` for more information. 

1395 ind : NumPy array or int, optional 

1396 Evaluation points for the estimated PDF. If None (default), 

1397 1000 equally spaced points are used. If `ind` is a NumPy array, the 

1398 KDE is evaluated at the points passed. If `ind` is an integer, 

1399 `ind` number of equally spaced points are used. 

1400 **kwargs 

1401 Additional keyword arguments are documented in 

1402 :meth:`DataFrame.plot`. 

1403 

1404 Returns 

1405 ------- 

1406 matplotlib.axes.Axes or numpy.ndarray of them 

1407 

1408 See Also 

1409 -------- 

1410 scipy.stats.gaussian_kde : Representation of a kernel-density 

1411 estimate using Gaussian kernels. This is the function used 

1412 internally to estimate the PDF. 

1413 

1414 Examples 

1415 -------- 

1416 Given a Series of points randomly sampled from an unknown 

1417 distribution, estimate its PDF using KDE with automatic 

1418 bandwidth determination and plot the results, evaluating them at 

1419 1000 equally spaced points (default): 

1420 

1421 .. plot:: 

1422 :context: close-figs 

1423 

1424 >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5]) 

1425 >>> ax = s.plot.kde() 

1426 

1427 A scalar bandwidth can be specified. Using a small bandwidth value can 

1428 lead to over-fitting, while using a large bandwidth value may result 

1429 in under-fitting: 

1430 

1431 .. plot:: 

1432 :context: close-figs 

1433 

1434 >>> ax = s.plot.kde(bw_method=0.3) 

1435 

1436 .. plot:: 

1437 :context: close-figs 

1438 

1439 >>> ax = s.plot.kde(bw_method=3) 

1440 

1441 Finally, the `ind` parameter determines the evaluation points for the 

1442 plot of the estimated PDF: 

1443 

1444 .. plot:: 

1445 :context: close-figs 

1446 

1447 >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5]) 

1448 

1449 For DataFrame, it works in the same way: 

1450 

1451 .. plot:: 

1452 :context: close-figs 

1453 

1454 >>> df = pd.DataFrame({ 

1455 ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5], 

1456 ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6], 

1457 ... }) 

1458 >>> ax = df.plot.kde() 

1459 

1460 A scalar bandwidth can be specified. Using a small bandwidth value can 

1461 lead to over-fitting, while using a large bandwidth value may result 

1462 in under-fitting: 

1463 

1464 .. plot:: 

1465 :context: close-figs 

1466 

1467 >>> ax = df.plot.kde(bw_method=0.3) 

1468 

1469 .. plot:: 

1470 :context: close-figs 

1471 

1472 >>> ax = df.plot.kde(bw_method=3) 

1473 

1474 Finally, the `ind` parameter determines the evaluation points for the 

1475 plot of the estimated PDF: 

1476 

1477 .. plot:: 

1478 :context: close-figs 

1479 

1480 >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6]) 

1481 """ 

1482 return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs) 

1483 

1484 density = kde 

1485 

1486 def area(self, x=None, y=None, **kwargs) -> PlotAccessor: 

1487 """ 

1488 Draw a stacked area plot. 

1489 

1490 An area plot displays quantitative data visually. 

1491 This function wraps the matplotlib area function. 

1492 

1493 Parameters 

1494 ---------- 

1495 x : label or position, optional 

1496 Coordinates for the X axis. By default uses the index. 

1497 y : label or position, optional 

1498 Column to plot. By default uses all columns. 

1499 stacked : bool, default True 

1500 Area plots are stacked by default. Set to False to create a 

1501 unstacked plot. 

1502 **kwargs 

1503 Additional keyword arguments are documented in 

1504 :meth:`DataFrame.plot`. 

1505 

1506 Returns 

1507 ------- 

1508 matplotlib.axes.Axes or numpy.ndarray 

1509 Area plot, or array of area plots if subplots is True. 

1510 

1511 See Also 

1512 -------- 

1513 DataFrame.plot : Make plots of DataFrame using matplotlib / pylab. 

1514 

1515 Examples 

1516 -------- 

1517 Draw an area plot based on basic business metrics: 

1518 

1519 .. plot:: 

1520 :context: close-figs 

1521 

1522 >>> df = pd.DataFrame({ 

1523 ... 'sales': [3, 2, 3, 9, 10, 6], 

1524 ... 'signups': [5, 5, 6, 12, 14, 13], 

1525 ... 'visits': [20, 42, 28, 62, 81, 50], 

1526 ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01', 

1527 ... freq='M')) 

1528 >>> ax = df.plot.area() 

1529 

1530 Area plots are stacked by default. To produce an unstacked plot, 

1531 pass ``stacked=False``: 

1532 

1533 .. plot:: 

1534 :context: close-figs 

1535 

1536 >>> ax = df.plot.area(stacked=False) 

1537 

1538 Draw an area plot for a single column: 

1539 

1540 .. plot:: 

1541 :context: close-figs 

1542 

1543 >>> ax = df.plot.area(y='sales') 

1544 

1545 Draw with a different `x`: 

1546 

1547 .. plot:: 

1548 :context: close-figs 

1549 

1550 >>> df = pd.DataFrame({ 

1551 ... 'sales': [3, 2, 3], 

1552 ... 'visits': [20, 42, 28], 

1553 ... 'day': [1, 2, 3], 

1554 ... }) 

1555 >>> ax = df.plot.area(x='day') 

1556 """ 

1557 return self(kind="area", x=x, y=y, **kwargs) 

1558 

1559 def pie(self, **kwargs) -> PlotAccessor: 

1560 """ 

1561 Generate a pie plot. 

1562 

1563 A pie plot is a proportional representation of the numerical data in a 

1564 column. This function wraps :meth:`matplotlib.pyplot.pie` for the 

1565 specified column. If no column reference is passed and 

1566 ``subplots=True`` a pie plot is drawn for each numerical column 

1567 independently. 

1568 

1569 Parameters 

1570 ---------- 

1571 y : int or label, optional 

1572 Label or position of the column to plot. 

1573 If not provided, ``subplots=True`` argument must be passed. 

1574 **kwargs 

1575 Keyword arguments to pass on to :meth:`DataFrame.plot`. 

1576 

1577 Returns 

1578 ------- 

1579 matplotlib.axes.Axes or np.ndarray of them 

1580 A NumPy array is returned when `subplots` is True. 

1581 

1582 See Also 

1583 -------- 

1584 Series.plot.pie : Generate a pie plot for a Series. 

1585 DataFrame.plot : Make plots of a DataFrame. 

1586 

1587 Examples 

1588 -------- 

1589 In the example below we have a DataFrame with the information about 

1590 planet's mass and radius. We pass the 'mass' column to the 

1591 pie function to get a pie plot. 

1592 

1593 .. plot:: 

1594 :context: close-figs 

1595 

1596 >>> df = pd.DataFrame({'mass': [0.330, 4.87 , 5.97], 

1597 ... 'radius': [2439.7, 6051.8, 6378.1]}, 

1598 ... index=['Mercury', 'Venus', 'Earth']) 

1599 >>> plot = df.plot.pie(y='mass', figsize=(5, 5)) 

1600 

1601 .. plot:: 

1602 :context: close-figs 

1603 

1604 >>> plot = df.plot.pie(subplots=True, figsize=(11, 6)) 

1605 """ 

1606 if ( 

1607 isinstance(self._parent, ABCDataFrame) 

1608 and kwargs.get("y", None) is None 

1609 and not kwargs.get("subplots", False) 

1610 ): 

1611 raise ValueError("pie requires either y column or 'subplots=True'") 

1612 return self(kind="pie", **kwargs) 

1613 

1614 def scatter(self, x, y, s=None, c=None, **kwargs) -> PlotAccessor: 

1615 """ 

1616 Create a scatter plot with varying marker point size and color. 

1617 

1618 The coordinates of each point are defined by two dataframe columns and 

1619 filled circles are used to represent each point. This kind of plot is 

1620 useful to see complex correlations between two variables. Points could 

1621 be for instance natural 2D coordinates like longitude and latitude in 

1622 a map or, in general, any pair of metrics that can be plotted against 

1623 each other. 

1624 

1625 Parameters 

1626 ---------- 

1627 x : int or str 

1628 The column name or column position to be used as horizontal 

1629 coordinates for each point. 

1630 y : int or str 

1631 The column name or column position to be used as vertical 

1632 coordinates for each point. 

1633 s : str, scalar or array-like, optional 

1634 The size of each point. Possible values are: 

1635 

1636 - A string with the name of the column to be used for marker's size. 

1637 

1638 - A single scalar so all points have the same size. 

1639 

1640 - A sequence of scalars, which will be used for each point's size 

1641 recursively. For instance, when passing [2,14] all points size 

1642 will be either 2 or 14, alternatively. 

1643 

1644 .. versionchanged:: 1.1.0 

1645 

1646 c : str, int or array-like, optional 

1647 The color of each point. Possible values are: 

1648 

1649 - A single color string referred to by name, RGB or RGBA code, 

1650 for instance 'red' or '#a98d19'. 

1651 

1652 - A sequence of color strings referred to by name, RGB or RGBA 

1653 code, which will be used for each point's color recursively. For 

1654 instance ['green','yellow'] all points will be filled in green or 

1655 yellow, alternatively. 

1656 

1657 - A column name or position whose values will be used to color the 

1658 marker points according to a colormap. 

1659 

1660 **kwargs 

1661 Keyword arguments to pass on to :meth:`DataFrame.plot`. 

1662 

1663 Returns 

1664 ------- 

1665 :class:`matplotlib.axes.Axes` or numpy.ndarray of them 

1666 

1667 See Also 

1668 -------- 

1669 matplotlib.pyplot.scatter : Scatter plot using multiple input data 

1670 formats. 

1671 

1672 Examples 

1673 -------- 

1674 Let's see how to draw a scatter plot using coordinates from the values 

1675 in a DataFrame's columns. 

1676 

1677 .. plot:: 

1678 :context: close-figs 

1679 

1680 >>> df = pd.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1], 

1681 ... [6.4, 3.2, 1], [5.9, 3.0, 2]], 

1682 ... columns=['length', 'width', 'species']) 

1683 >>> ax1 = df.plot.scatter(x='length', 

1684 ... y='width', 

1685 ... c='DarkBlue') 

1686 

1687 And now with the color determined by a column as well. 

1688 

1689 .. plot:: 

1690 :context: close-figs 

1691 

1692 >>> ax2 = df.plot.scatter(x='length', 

1693 ... y='width', 

1694 ... c='species', 

1695 ... colormap='viridis') 

1696 """ 

1697 size = kwargs.pop("size", None) 

1698 if s is not None and size is not None: 

1699 raise TypeError("Specify exactly one of `s` and `size`") 

1700 elif s is not None or size is not None: 

1701 kwargs["s"] = s if s is not None else size 

1702 

1703 color = kwargs.pop("color", None) 

1704 if c is not None and color is not None: 

1705 raise TypeError("Specify exactly one of `c` and `color`") 

1706 elif c is not None or color is not None: 

1707 kwargs["c"] = c if c is not None else color 

1708 

1709 return self(kind="scatter", x=x, y=y, **kwargs) 

1710 

1711 def hexbin( 

1712 self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs 

1713 ) -> PlotAccessor: 

1714 """ 

1715 Generate a hexagonal binning plot. 

1716 

1717 Generate a hexagonal binning plot of `x` versus `y`. If `C` is `None` 

1718 (the default), this is a histogram of the number of occurrences 

1719 of the observations at ``(x[i], y[i])``. 

1720 

1721 If `C` is specified, specifies values at given coordinates 

1722 ``(x[i], y[i])``. These values are accumulated for each hexagonal 

1723 bin and then reduced according to `reduce_C_function`, 

1724 having as default the NumPy's mean function (:meth:`numpy.mean`). 

1725 (If `C` is specified, it must also be a 1-D sequence 

1726 of the same length as `x` and `y`, or a column label.) 

1727 

1728 Parameters 

1729 ---------- 

1730 x : int or str 

1731 The column label or position for x points. 

1732 y : int or str 

1733 The column label or position for y points. 

1734 C : int or str, optional 

1735 The column label or position for the value of `(x, y)` point. 

1736 reduce_C_function : callable, default `np.mean` 

1737 Function of one argument that reduces all the values in a bin to 

1738 a single number (e.g. `np.mean`, `np.max`, `np.sum`, `np.std`). 

1739 gridsize : int or tuple of (int, int), default 100 

1740 The number of hexagons in the x-direction. 

1741 The corresponding number of hexagons in the y-direction is 

1742 chosen in a way that the hexagons are approximately regular. 

1743 Alternatively, gridsize can be a tuple with two elements 

1744 specifying the number of hexagons in the x-direction and the 

1745 y-direction. 

1746 **kwargs 

1747 Additional keyword arguments are documented in 

1748 :meth:`DataFrame.plot`. 

1749 

1750 Returns 

1751 ------- 

1752 matplotlib.AxesSubplot 

1753 The matplotlib ``Axes`` on which the hexbin is plotted. 

1754 

1755 See Also 

1756 -------- 

1757 DataFrame.plot : Make plots of a DataFrame. 

1758 matplotlib.pyplot.hexbin : Hexagonal binning plot using matplotlib, 

1759 the matplotlib function that is used under the hood. 

1760 

1761 Examples 

1762 -------- 

1763 The following examples are generated with random data from 

1764 a normal distribution. 

1765 

1766 .. plot:: 

1767 :context: close-figs 

1768 

1769 >>> n = 10000 

1770 >>> df = pd.DataFrame({'x': np.random.randn(n), 

1771 ... 'y': np.random.randn(n)}) 

1772 >>> ax = df.plot.hexbin(x='x', y='y', gridsize=20) 

1773 

1774 The next example uses `C` and `np.sum` as `reduce_C_function`. 

1775 Note that `'observations'` values ranges from 1 to 5 but the result 

1776 plot shows values up to more than 25. This is because of the 

1777 `reduce_C_function`. 

1778 

1779 .. plot:: 

1780 :context: close-figs 

1781 

1782 >>> n = 500 

1783 >>> df = pd.DataFrame({ 

1784 ... 'coord_x': np.random.uniform(-3, 3, size=n), 

1785 ... 'coord_y': np.random.uniform(30, 50, size=n), 

1786 ... 'observations': np.random.randint(1,5, size=n) 

1787 ... }) 

1788 >>> ax = df.plot.hexbin(x='coord_x', 

1789 ... y='coord_y', 

1790 ... C='observations', 

1791 ... reduce_C_function=np.sum, 

1792 ... gridsize=10, 

1793 ... cmap="viridis") 

1794 """ 

1795 if reduce_C_function is not None: 

1796 kwargs["reduce_C_function"] = reduce_C_function 

1797 if gridsize is not None: 

1798 kwargs["gridsize"] = gridsize 

1799 

1800 return self(kind="hexbin", x=x, y=y, C=C, **kwargs) 

1801 

1802 

1803_backends: dict[str, types.ModuleType] = {} 

1804 

1805 

1806def _load_backend(backend: str) -> types.ModuleType: 

1807 """ 

1808 Load a pandas plotting backend. 

1809 

1810 Parameters 

1811 ---------- 

1812 backend : str 

1813 The identifier for the backend. Either an entrypoint item registered 

1814 with importlib.metadata, "matplotlib", or a module name. 

1815 

1816 Returns 

1817 ------- 

1818 types.ModuleType 

1819 The imported backend. 

1820 """ 

1821 from importlib.metadata import entry_points 

1822 

1823 if backend == "matplotlib": 

1824 # Because matplotlib is an optional dependency and first-party backend, 

1825 # we need to attempt an import here to raise an ImportError if needed. 

1826 try: 

1827 module = importlib.import_module("pandas.plotting._matplotlib") 

1828 except ImportError: 

1829 raise ImportError( 

1830 "matplotlib is required for plotting when the " 

1831 'default backend "matplotlib" is selected.' 

1832 ) from None 

1833 return module 

1834 

1835 found_backend = False 

1836 

1837 eps = entry_points() 

1838 key = "pandas_plotting_backends" 

1839 # entry_points lost dict API ~ PY 3.10 

1840 # https://github.com/python/importlib_metadata/issues/298 

1841 if hasattr(eps, "select"): 

1842 # error: "Dict[str, Tuple[EntryPoint, ...]]" has no attribute "select" 

1843 entry = eps.select(group=key) # type: ignore[attr-defined] 

1844 else: 

1845 entry = eps.get(key, ()) 

1846 for entry_point in entry: 

1847 found_backend = entry_point.name == backend 

1848 if found_backend: 

1849 module = entry_point.load() 

1850 break 

1851 

1852 if not found_backend: 

1853 # Fall back to unregistered, module name approach. 

1854 try: 

1855 module = importlib.import_module(backend) 

1856 found_backend = True 

1857 except ImportError: 

1858 # We re-raise later on. 

1859 pass 

1860 

1861 if found_backend: 

1862 if hasattr(module, "plot"): 

1863 # Validate that the interface is implemented when the option is set, 

1864 # rather than at plot time. 

1865 return module 

1866 

1867 raise ValueError( 

1868 f"Could not find plotting backend '{backend}'. Ensure that you've " 

1869 f"installed the package providing the '{backend}' entrypoint, or that " 

1870 "the package has a top-level `.plot` method." 

1871 ) 

1872 

1873 

1874def _get_plot_backend(backend: str | None = None): 

1875 """ 

1876 Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`). 

1877 

1878 The plotting system of pandas uses matplotlib by default, but the idea here 

1879 is that it can also work with other third-party backends. This function 

1880 returns the module which provides a top-level `.plot` method that will 

1881 actually do the plotting. The backend is specified from a string, which 

1882 either comes from the keyword argument `backend`, or, if not specified, from 

1883 the option `pandas.options.plotting.backend`. All the rest of the code in 

1884 this file uses the backend specified there for the plotting. 

1885 

1886 The backend is imported lazily, as matplotlib is a soft dependency, and 

1887 pandas can be used without it being installed. 

1888 

1889 Notes 

1890 ----- 

1891 Modifies `_backends` with imported backend as a side effect. 

1892 """ 

1893 backend_str: str = backend or get_option("plotting.backend") 

1894 

1895 if backend_str in _backends: 

1896 return _backends[backend_str] 

1897 

1898 module = _load_backend(backend_str) 

1899 _backends[backend_str] = module 

1900 return module