Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/plotting/

1from __future__ import annotations

3import importlib

4import itertools

5import types

6from typing import (

7 TYPE_CHECKING,

8 Sequence,

10import warnings

12from pandas._config import get_option

14from pandas._typing import IndexLabel

15from pandas.util._decorators import (

16 Appender,

17 Substitution,

18)

19from pandas.util._exceptions import find_stack_level

21from pandas.core.dtypes.common import (

22 is_integer,

23 is_list_like,

24)

25from pandas.core.dtypes.generic import (

26 ABCDataFrame,

27 ABCSeries,

28)

30from pandas.core.base import PandasObject

32if TYPE_CHECKING: 32 ↛ 33line 32 didn't jump to line 33, because the condition on line 32 was never true

33 from matplotlib.axes import Axes

35 from pandas import DataFrame

38def hist_series(

39 self,

40 by=None,

41 ax=None,

42 grid: bool = True,

43 xlabelsize: int | None = None,

44 xrot: float | None = None,

45 ylabelsize: int | None = None,

46 yrot: float | None = None,

47 figsize: tuple[int, int] | None = None,

48 bins: int | Sequence[int] = 10,

49 backend: str | None = None,

50 legend: bool = False,

51 **kwargs,

52):

53 """

54 Draw histogram of the input series using matplotlib.

56 Parameters

57 ----------

58 by : object, optional

59 If passed, then used to form histograms for separate groups.

60 ax : matplotlib axis object

61 If not passed, uses gca().

62 grid : bool, default True

63 Whether to show axis grid lines.

64 xlabelsize : int, default None

65 If specified changes the x-axis label size.

66 xrot : float, default None

67 Rotation of x axis labels.

68 ylabelsize : int, default None

69 If specified changes the y-axis label size.

70 yrot : float, default None

71 Rotation of y axis labels.

72 figsize : tuple, default None

73 Figure size in inches by default.

74 bins : int or sequence, default 10

75 Number of histogram bins to be used. If an integer is given, bins + 1

76 bin edges are calculated and returned. If bins is a sequence, gives

77 bin edges, including left edge of first bin and right edge of last

78 bin. In this case, bins is returned unmodified.

79 backend : str, default None

80 Backend to use instead of the backend specified in the option

81 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to

82 specify the ``plotting.backend`` for the whole session, set

83 ``pd.options.plotting.backend``.

85 .. versionadded:: 1.0.0

87 legend : bool, default False

88 Whether to show the legend.

90 .. versionadded:: 1.1.0

92 **kwargs

93 To be passed to the actual plotting function.

95 Returns

96 -------

97 matplotlib.AxesSubplot

98 A histogram plot.

100 See Also

101 --------

102 matplotlib.axes.Axes.hist : Plot a histogram using matplotlib.

103 """

104 plot_backend = _get_plot_backend(backend)

105 return plot_backend.hist_series(

106 self,

107 by=by,

108 ax=ax,

109 grid=grid,

110 xlabelsize=xlabelsize,

111 xrot=xrot,

112 ylabelsize=ylabelsize,

113 yrot=yrot,

114 figsize=figsize,

115 bins=bins,

116 legend=legend,

117 **kwargs,

118 )

119

120

121def hist_frame(

122 data: DataFrame,

123 column: IndexLabel = None,

124 by=None,

125 grid: bool = True,

126 xlabelsize: int | None = None,

127 xrot: float | None = None,

128 ylabelsize: int | None = None,

129 yrot: float | None = None,

130 ax=None,

131 sharex: bool = False,

132 sharey: bool = False,

133 figsize: tuple[int, int] | None = None,

134 layout: tuple[int, int] | None = None,

135 bins: int | Sequence[int] = 10,

136 backend: str | None = None,

137 legend: bool = False,

138 **kwargs,

139):

140 """

141 Make a histogram of the DataFrame's columns.

142

143 A `histogram`_ is a representation of the distribution of data.

144 This function calls :meth:`matplotlib.pyplot.hist`, on each series in

145 the DataFrame, resulting in one histogram per column.

146

147 .. _histogram: https://en.wikipedia.org/wiki/Histogram

148

149 Parameters

150 ----------

151 data : DataFrame

152 The pandas object holding the data.

153 column : str or sequence, optional

154 If passed, will be used to limit data to a subset of columns.

155 by : object, optional

156 If passed, then used to form histograms for separate groups.

157 grid : bool, default True

158 Whether to show axis grid lines.

159 xlabelsize : int, default None

160 If specified changes the x-axis label size.

161 xrot : float, default None

162 Rotation of x axis labels. For example, a value of 90 displays the

163 x labels rotated 90 degrees clockwise.

164 ylabelsize : int, default None

165 If specified changes the y-axis label size.

166 yrot : float, default None

167 Rotation of y axis labels. For example, a value of 90 displays the

168 y labels rotated 90 degrees clockwise.

169 ax : Matplotlib axes object, default None

170 The axes to plot the histogram on.

171 sharex : bool, default True if ax is None else False

172 In case subplots=True, share x axis and set some x axis labels to

173 invisible; defaults to True if ax is None otherwise False if an ax

174 is passed in.

175 Note that passing in both an ax and sharex=True will alter all x axis

176 labels for all subplots in a figure.

177 sharey : bool, default False

178 In case subplots=True, share y axis and set some y axis labels to

179 invisible.

180 figsize : tuple, optional

181 The size in inches of the figure to create. Uses the value in

182 `matplotlib.rcParams` by default.

183 layout : tuple, optional

184 Tuple of (rows, columns) for the layout of the histograms.

185 bins : int or sequence, default 10

186 Number of histogram bins to be used. If an integer is given, bins + 1

187 bin edges are calculated and returned. If bins is a sequence, gives

188 bin edges, including left edge of first bin and right edge of last

189 bin. In this case, bins is returned unmodified.

190

191 backend : str, default None

192 Backend to use instead of the backend specified in the option

193 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to

194 specify the ``plotting.backend`` for the whole session, set

195 ``pd.options.plotting.backend``.

196

197 .. versionadded:: 1.0.0

198

199 legend : bool, default False

200 Whether to show the legend.

201

202 .. versionadded:: 1.1.0

203

204 **kwargs

205 All other plotting keyword arguments to be passed to

206 :meth:`matplotlib.pyplot.hist`.

207

208 Returns

209 -------

210 matplotlib.AxesSubplot or numpy.ndarray of them

211

212 See Also

213 --------

214 matplotlib.pyplot.hist : Plot a histogram using matplotlib.

215

216 Examples

217 --------

218 This example draws a histogram based on the length and width of

219 some animals, displayed in three bins

220

221 .. plot::

222 :context: close-figs

223

224 >>> df = pd.DataFrame({

225 ... 'length': [1.5, 0.5, 1.2, 0.9, 3],

226 ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]

227 ... }, index=['pig', 'rabbit', 'duck', 'chicken', 'horse'])

228 >>> hist = df.hist(bins=3)

229 """

230 plot_backend = _get_plot_backend(backend)

231 return plot_backend.hist_frame(

232 data,

233 column=column,

234 by=by,

235 grid=grid,

236 xlabelsize=xlabelsize,

237 xrot=xrot,

238 ylabelsize=ylabelsize,

239 yrot=yrot,

240 ax=ax,

241 sharex=sharex,

242 sharey=sharey,

243 figsize=figsize,

244 layout=layout,

245 legend=legend,

246 bins=bins,

247 **kwargs,

248 )

249

250

251_boxplot_doc = """

252Make a box plot from DataFrame columns.

253

254Make a box-and-whisker plot from DataFrame columns, optionally grouped

255by some other columns. A box plot is a method for graphically depicting

256groups of numerical data through their quartiles.

257The box extends from the Q1 to Q3 quartile values of the data,

258with a line at the median (Q2). The whiskers extend from the edges

259of box to show the range of the data. By default, they extend no more than

260`1.5 * IQR (IQR = Q3 - Q1)` from the edges of the box, ending at the farthest

261data point within that interval. Outliers are plotted as separate dots.

262

263For further details see

264Wikipedia's entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`_.

265

266Parameters

267----------

268column : str or list of str, optional

269 Column name or list of names, or vector.

270 Can be any valid input to :meth:`pandas.DataFrame.groupby`.

271by : str or array-like, optional

272 Column in the DataFrame to :meth:`pandas.DataFrame.groupby`.

273 One box-plot will be done per value of columns in `by`.

274ax : object of class matplotlib.axes.Axes, optional

275 The matplotlib axes to be used by boxplot.

276fontsize : float or str

277 Tick label font size in points or as a string (e.g., `large`).

278rot : int or float, default 0

279 The rotation angle of labels (in degrees)

280 with respect to the screen coordinate system.

281grid : bool, default True

282 Setting this to True will show the grid.

283figsize : A tuple (width, height) in inches

284 The size of the figure to create in matplotlib.

285layout : tuple (rows, columns), optional

286 For example, (3, 5) will display the subplots

287 using 3 columns and 5 rows, starting from the top-left.

288return_type : {'axes', 'dict', 'both'} or None, default 'axes'

289 The kind of object to return. The default is ``axes``.

290

291 * 'axes' returns the matplotlib axes the boxplot is drawn on.

292 * 'dict' returns a dictionary whose values are the matplotlib

293 Lines of the boxplot.

294 * 'both' returns a namedtuple with the axes and dict.

295 * when grouping with ``by``, a Series mapping columns to

296 ``return_type`` is returned.

297

298 If ``return_type`` is `None`, a NumPy array

299 of axes with the same shape as ``layout`` is returned.

300%(backend)s\

301

302**kwargs

303 All other plotting keyword arguments to be passed to

304 :func:`matplotlib.pyplot.boxplot`.

305

306Returns

307-------

308result

309 See Notes.

310

311See Also

312--------

313Series.plot.hist: Make a histogram.

314matplotlib.pyplot.boxplot : Matplotlib equivalent plot.

315

316Notes

317-----

318The return type depends on the `return_type` parameter:

319

320* 'axes' : object of class matplotlib.axes.Axes

321* 'dict' : dict of matplotlib.lines.Line2D objects

322* 'both' : a namedtuple with structure (ax, lines)

323

324For data grouped with ``by``, return a Series of the above or a numpy

325array:

326

327* :class:`~pandas.Series`

328* :class:`~numpy.array` (for ``return_type = None``)

329

330Use ``return_type='dict'`` when you want to tweak the appearance

331of the lines after plotting. In this case a dict containing the Lines

332making up the boxes, caps, fliers, medians, and whiskers is returned.

333

334Examples

335--------

336

337Boxplots can be created for every column in the dataframe

338by ``df.boxplot()`` or indicating the columns to be used:

339

340.. plot::

341 :context: close-figs

342

343 >>> np.random.seed(1234)

344 >>> df = pd.DataFrame(np.random.randn(10, 4),

345 ... columns=['Col1', 'Col2', 'Col3', 'Col4'])

346 >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3']) # doctest: +SKIP

347

348Boxplots of variables distributions grouped by the values of a third

349variable can be created using the option ``by``. For instance:

350

351.. plot::

352 :context: close-figs

353

354 >>> df = pd.DataFrame(np.random.randn(10, 2),

355 ... columns=['Col1', 'Col2'])

356 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A',

357 ... 'B', 'B', 'B', 'B', 'B'])

358 >>> boxplot = df.boxplot(by='X')

359

360A list of strings (i.e. ``['X', 'Y']``) can be passed to boxplot

361in order to group the data by combination of the variables in the x-axis:

362

363.. plot::

364 :context: close-figs

365

366 >>> df = pd.DataFrame(np.random.randn(10, 3),

367 ... columns=['Col1', 'Col2', 'Col3'])

368 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A',

369 ... 'B', 'B', 'B', 'B', 'B'])

370 >>> df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A',

371 ... 'B', 'A', 'B', 'A', 'B'])

372 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y'])

373

374The layout of boxplot can be adjusted giving a tuple to ``layout``:

375

376.. plot::

377 :context: close-figs

378

379 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',

380 ... layout=(2, 1))

381

382Additional formatting can be done to the boxplot, like suppressing the grid

383(``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``)

384or changing the fontsize (i.e. ``fontsize=15``):

385

386.. plot::

387 :context: close-figs

388

389 >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15) # doctest: +SKIP

390

391The parameter ``return_type`` can be used to select the type of element

392returned by `boxplot`. When ``return_type='axes'`` is selected,

393the matplotlib axes on which the boxplot is drawn are returned:

394

395 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], return_type='axes')

396 >>> type(boxplot)

397 <class 'matplotlib.axes._subplots.AxesSubplot'>

398

399When grouping with ``by``, a Series mapping columns to ``return_type``

400is returned:

401

402 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',

403 ... return_type='axes')

404 >>> type(boxplot)

405 <class 'pandas.core.series.Series'>

406

407If ``return_type`` is `None`, a NumPy array of axes with the same shape

408as ``layout`` is returned:

409

410 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',

411 ... return_type=None)

412 >>> type(boxplot)

413 <class 'numpy.ndarray'>

414"""

415

416_backend_doc = """\

417backend : str, default None

418 Backend to use instead of the backend specified in the option

419 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to

420 specify the ``plotting.backend`` for the whole session, set

421 ``pd.options.plotting.backend``.

422

423 .. versionadded:: 1.0.0

424"""

425

426

427_bar_or_line_doc = """

428 Parameters

429 ----------

430 x : label or position, optional

431 Allows plotting of one column versus another. If not specified,

432 the index of the DataFrame is used.

433 y : label or position, optional

434 Allows plotting of one column versus another. If not specified,

435 all numerical columns are used.

436 color : str, array-like, or dict, optional

437 The color for each of the DataFrame's columns. Possible values are:

438

439 - A single color string referred to by name, RGB or RGBA code,

440 for instance 'red' or '#a98d19'.

441

442 - A sequence of color strings referred to by name, RGB or RGBA

443 code, which will be used for each column recursively. For

444 instance ['green','yellow'] each column's %(kind)s will be filled in

445 green or yellow, alternatively. If there is only a single column to

446 be plotted, then only the first color from the color list will be

447 used.

448

449 - A dict of the form {column name : color}, so that each column will be

450 colored accordingly. For example, if your columns are called `a` and

451 `b`, then passing {'a': 'green', 'b': 'red'} will color %(kind)ss for

452 column `a` in green and %(kind)ss for column `b` in red.

453

454 .. versionadded:: 1.1.0

455

456 **kwargs

457 Additional keyword arguments are documented in

458 :meth:`DataFrame.plot`.

459

460 Returns

461 -------

462 matplotlib.axes.Axes or np.ndarray of them

463 An ndarray is returned with one :class:`matplotlib.axes.Axes`

464 per column when ``subplots=True``.

465"""

466

467

468@Substitution(backend="")

469@Appender(_boxplot_doc)

470def boxplot(

471 data: DataFrame,

472 column: str | list[str] | None = None,

473 by: str | list[str] | None = None,

474 ax: Axes | None = None,

475 fontsize: float | str | None = None,

476 rot: int = 0,

477 grid: bool = True,

478 figsize: tuple[float, float] | None = None,

479 layout: tuple[int, int] | None = None,

480 return_type: str | None = None,

481 **kwargs,

482):

483 plot_backend = _get_plot_backend("matplotlib")

484 return plot_backend.boxplot(

485 data,

486 column=column,

487 by=by,

488 ax=ax,

489 fontsize=fontsize,

490 rot=rot,

491 grid=grid,

492 figsize=figsize,

493 layout=layout,

494 return_type=return_type,

495 **kwargs,

496 )

497

498

499@Substitution(backend=_backend_doc)

500@Appender(_boxplot_doc)

501def boxplot_frame(

502 self,

503 column=None,

504 by=None,

505 ax=None,

506 fontsize=None,

507 rot: int = 0,

508 grid: bool = True,

509 figsize=None,

510 layout=None,

511 return_type=None,

512 backend=None,

513 **kwargs,

514):

515 plot_backend = _get_plot_backend(backend)

516 return plot_backend.boxplot_frame(

517 self,

518 column=column,

519 by=by,

520 ax=ax,

521 fontsize=fontsize,

522 rot=rot,

523 grid=grid,

524 figsize=figsize,

525 layout=layout,

526 return_type=return_type,

527 **kwargs,

528 )

529

530

531def boxplot_frame_groupby(

532 grouped,

533 subplots: bool = True,

534 column=None,

535 fontsize=None,

536 rot: int = 0,

537 grid: bool = True,

538 ax=None,

539 figsize=None,

540 layout=None,

541 sharex: bool = False,

542 sharey: bool = True,

543 backend=None,

544 **kwargs,

545):

546 """

547 Make box plots from DataFrameGroupBy data.

548

549 Parameters

550 ----------

551 grouped : Grouped DataFrame

552 subplots : bool

553 * ``False`` - no subplots will be used

554 * ``True`` - create a subplot for each group.

555

556 column : column name or list of names, or vector

557 Can be any valid input to groupby.

558 fontsize : int or str

559 rot : label rotation angle

560 grid : Setting this to True will show the grid

561 ax : Matplotlib axis object, default None

562 figsize : A tuple (width, height) in inches

563 layout : tuple (optional)

564 The layout of the plot: (rows, columns).

565 sharex : bool, default False

566 Whether x-axes will be shared among subplots.

567 sharey : bool, default True

568 Whether y-axes will be shared among subplots.

569 backend : str, default None

570 Backend to use instead of the backend specified in the option

571 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to

572 specify the ``plotting.backend`` for the whole session, set

573 ``pd.options.plotting.backend``.

574

575 .. versionadded:: 1.0.0

576

577 **kwargs

578 All other plotting keyword arguments to be passed to

579 matplotlib's boxplot function.

580

581 Returns

582 -------

583 dict of key/value = group key/DataFrame.boxplot return value

584 or DataFrame.boxplot return value in case subplots=figures=False

585

586 Examples

587 --------

588 You can create boxplots for grouped data and show them as separate subplots:

589

590 .. plot::

591 :context: close-figs

592

593 >>> import itertools

594 >>> tuples = [t for t in itertools.product(range(1000), range(4))]

595 >>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1'])

596 >>> data = np.random.randn(len(index),4)

597 >>> df = pd.DataFrame(data, columns=list('ABCD'), index=index)

598 >>> grouped = df.groupby(level='lvl1')

599 >>> grouped.boxplot(rot=45, fontsize=12, figsize=(8,10)) # doctest: +SKIP

600

601 The ``subplots=False`` option shows the boxplots in a single figure.

602

603 .. plot::

604 :context: close-figs

605

606 >>> grouped.boxplot(subplots=False, rot=45, fontsize=12) # doctest: +SKIP

607 """

608 plot_backend = _get_plot_backend(backend)

609 return plot_backend.boxplot_frame_groupby(

610 grouped,

611 subplots=subplots,

612 column=column,

613 fontsize=fontsize,

614 rot=rot,

615 grid=grid,

616 ax=ax,

617 figsize=figsize,

618 layout=layout,

619 sharex=sharex,

620 sharey=sharey,

621 **kwargs,

622 )

623

624

625class PlotAccessor(PandasObject):

626 """

627 Make plots of Series or DataFrame.

628

629 Uses the backend specified by the

630 option ``plotting.backend``. By default, matplotlib is used.

631

632 Parameters

633 ----------

634 data : Series or DataFrame

635 The object for which the method is called.

636 x : label or position, default None

637 Only used if data is a DataFrame.

638 y : label, position or list of label, positions, default None

639 Allows plotting of one column versus another. Only used if data is a

640 DataFrame.

641 kind : str

642 The kind of plot to produce:

643

644 - 'line' : line plot (default)

645 - 'bar' : vertical bar plot

646 - 'barh' : horizontal bar plot

647 - 'hist' : histogram

648 - 'box' : boxplot

649 - 'kde' : Kernel Density Estimation plot

650 - 'density' : same as 'kde'

651 - 'area' : area plot

652 - 'pie' : pie plot

653 - 'scatter' : scatter plot (DataFrame only)

654 - 'hexbin' : hexbin plot (DataFrame only)

655 ax : matplotlib axes object, default None

656 An axes of the current figure.

657 subplots : bool or sequence of iterables, default False

658 Whether to group columns into subplots:

659

660 - ``False`` : No subplots will be used

661 - ``True`` : Make separate subplots for each column.

662 - sequence of iterables of column labels: Create a subplot for each

663 group of columns. For example `[('a', 'c'), ('b', 'd')]` will

664 create 2 subplots: one with columns 'a' and 'c', and one

665 with columns 'b' and 'd'. Remaining columns that aren't specified

666 will be plotted in additional subplots (one per column).

667 .. versionadded:: 1.5.0

668

669 sharex : bool, default True if ax is None else False

670 In case ``subplots=True``, share x axis and set some x axis labels

671 to invisible; defaults to True if ax is None otherwise False if

672 an ax is passed in; Be aware, that passing in both an ax and

673 ``sharex=True`` will alter all x axis labels for all axis in a figure.

674 sharey : bool, default False

675 In case ``subplots=True``, share y axis and set some y axis labels to invisible.

676 layout : tuple, optional

677 (rows, columns) for the layout of subplots.

678 figsize : a tuple (width, height) in inches

679 Size of a figure object.

680 use_index : bool, default True

681 Use index as ticks for x axis.

682 title : str or list

683 Title to use for the plot. If a string is passed, print the string

684 at the top of the figure. If a list is passed and `subplots` is

685 True, print each item in the list above the corresponding subplot.

686 grid : bool, default None (matlab style default)

687 Axis grid lines.

688 legend : bool or {'reverse'}

689 Place legend on axis subplots.

690 style : list or dict

691 The matplotlib line style per column.

692 logx : bool or 'sym', default False

693 Use log scaling or symlog scaling on x axis.

694 .. versionchanged:: 0.25.0

695

696 logy : bool or 'sym' default False

697 Use log scaling or symlog scaling on y axis.

698 .. versionchanged:: 0.25.0

699

700 loglog : bool or 'sym', default False

701 Use log scaling or symlog scaling on both x and y axes.

702 .. versionchanged:: 0.25.0

703

704 xticks : sequence

705 Values to use for the xticks.

706 yticks : sequence

707 Values to use for the yticks.

708 xlim : 2-tuple/list

709 Set the x limits of the current axes.

710 ylim : 2-tuple/list

711 Set the y limits of the current axes.

712 xlabel : label, optional

713 Name to use for the xlabel on x-axis. Default uses index name as xlabel, or the

714 x-column name for planar plots.

715

716 .. versionadded:: 1.1.0

717

718 .. versionchanged:: 1.2.0

719

720 Now applicable to planar plots (`scatter`, `hexbin`).

721

722 ylabel : label, optional

723 Name to use for the ylabel on y-axis. Default will show no ylabel, or the

724 y-column name for planar plots.

725

726 .. versionadded:: 1.1.0

727

728 .. versionchanged:: 1.2.0

729

730 Now applicable to planar plots (`scatter`, `hexbin`).

731

732 rot : int, default None

733 Rotation for ticks (xticks for vertical, yticks for horizontal

734 plots).

735 fontsize : int, default None

736 Font size for xticks and yticks.

737 colormap : str or matplotlib colormap object, default None

738 Colormap to select colors from. If string, load colormap with that

739 name from matplotlib.

740 colorbar : bool, optional

741 If True, plot colorbar (only relevant for 'scatter' and 'hexbin'

742 plots).

743 position : float

744 Specify relative alignments for bar plot layout.

745 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5

746 (center).

747 table : bool, Series or DataFrame, default False

748 If True, draw a table using the data in the DataFrame and the data

749 will be transposed to meet matplotlib's default layout.

750 If a Series or DataFrame is passed, use passed data to draw a

751 table.

752 yerr : DataFrame, Series, array-like, dict and str

753 See :ref:`Plotting with Error Bars <visualization.errorbars>` for

754 detail.

755 xerr : DataFrame, Series, array-like, dict and str

756 Equivalent to yerr.

757 stacked : bool, default False in line and bar plots, and True in area plot

758 If True, create stacked plot.

759 sort_columns : bool, default False

760 Sort column names to determine plot ordering.

761

762 .. deprecated:: 1.5.0

763 The `sort_columns` arguments is deprecated and will be removed in a

764 future version.

765

766 secondary_y : bool or sequence, default False

767 Whether to plot on the secondary y-axis if a list/tuple, which

768 columns to plot on secondary y-axis.

769 mark_right : bool, default True

770 When using a secondary_y axis, automatically mark the column

771 labels with "(right)" in the legend.

772 include_bool : bool, default is False

773 If True, boolean values can be plotted.

774 backend : str, default None

775 Backend to use instead of the backend specified in the option

776 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to

777 specify the ``plotting.backend`` for the whole session, set

778 ``pd.options.plotting.backend``.

779

780 .. versionadded:: 1.0.0

781

782 **kwargs

783 Options to pass to matplotlib plotting method.

784

785 Returns

786 -------

787 :class:`matplotlib.axes.Axes` or numpy.ndarray of them

788 If the backend is not the default matplotlib one, the return value

789 will be the object returned by the backend.

790

791 Notes

792 -----

793 - See matplotlib documentation online for more on this subject

794 - If `kind` = 'bar' or 'barh', you can specify relative alignments

795 for bar plot layout by `position` keyword.

796 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5

797 (center)

798 """

799

800 _common_kinds = ("line", "bar", "barh", "kde", "density", "area", "hist", "box")

801 _series_kinds = ("pie",)

802 _dataframe_kinds = ("scatter", "hexbin")

803 _kind_aliases = {"density": "kde"}

804 _all_kinds = _common_kinds + _series_kinds + _dataframe_kinds

805

806 def __init__(self, data) -> None:

807 self._parent = data

808

809 @staticmethod

810 def _get_call_args(backend_name, data, args, kwargs):

811 """

812 This function makes calls to this accessor `__call__` method compatible

813 with the previous `SeriesPlotMethods.__call__` and

814 `DataFramePlotMethods.__call__`. Those had slightly different

815 signatures, since `DataFramePlotMethods` accepted `x` and `y`

816 parameters.

817 """

818 if isinstance(data, ABCSeries):

819 arg_def = [

820 ("kind", "line"),

821 ("ax", None),

822 ("figsize", None),

823 ("use_index", True),

824 ("title", None),

825 ("grid", None),

826 ("legend", False),

827 ("style", None),

828 ("logx", False),

829 ("logy", False),

830 ("loglog", False),

831 ("xticks", None),

832 ("yticks", None),

833 ("xlim", None),

834 ("ylim", None),

835 ("rot", None),

836 ("fontsize", None),

837 ("colormap", None),

838 ("table", False),

839 ("yerr", None),

840 ("xerr", None),

841 ("label", None),

842 ("secondary_y", False),

843 ("xlabel", None),

844 ("ylabel", None),

845 ]

846 elif isinstance(data, ABCDataFrame):

847 arg_def = [

848 ("x", None),

849 ("y", None),

850 ("kind", "line"),

851 ("ax", None),

852 ("subplots", False),

853 ("sharex", None),

854 ("sharey", False),

855 ("layout", None),

856 ("figsize", None),

857 ("use_index", True),

858 ("title", None),

859 ("grid", None),

860 ("legend", True),

861 ("style", None),

862 ("logx", False),

863 ("logy", False),

864 ("loglog", False),

865 ("xticks", None),

866 ("yticks", None),

867 ("xlim", None),

868 ("ylim", None),

869 ("rot", None),

870 ("fontsize", None),

871 ("colormap", None),

872 ("table", False),

873 ("yerr", None),

874 ("xerr", None),

875 ("secondary_y", False),

876 ("sort_columns", False),

877 ("xlabel", None),

878 ("ylabel", None),

879 ]

880 else:

881 raise TypeError(

882 f"Called plot accessor for type {type(data).__name__}, "

883 "expected Series or DataFrame"

884 )

885

886 if "sort_columns" in itertools.chain(args, kwargs.keys()):

887 warnings.warn(

888 "`sort_columns` is deprecated and will be removed in a future "

889 "version.",

890 FutureWarning,

891 stacklevel=find_stack_level(),

892 )

893

894 if args and isinstance(data, ABCSeries):

895 positional_args = str(args)[1:-1]

896 keyword_args = ", ".join(

897 [f"{name}={repr(value)}" for (name, _), value in zip(arg_def, args)]

898 )

899 msg = (

900 "`Series.plot()` should not be called with positional "

901 "arguments, only keyword arguments. The order of "

902 "positional arguments will change in the future. "

903 f"Use `Series.plot({keyword_args})` instead of "

904 f"`Series.plot({positional_args})`."

905 )

906 raise TypeError(msg)

907

908 pos_args = {name: value for (name, _), value in zip(arg_def, args)}

909 if backend_name == "pandas.plotting._matplotlib":

910 kwargs = dict(arg_def, **pos_args, **kwargs)

911 else:

912 kwargs = dict(pos_args, **kwargs)

913

914 x = kwargs.pop("x", None)

915 y = kwargs.pop("y", None)

916 kind = kwargs.pop("kind", "line")

917 return x, y, kind, kwargs

918

919 def __call__(self, *args, **kwargs):

920 plot_backend = _get_plot_backend(kwargs.pop("backend", None))

921

922 x, y, kind, kwargs = self._get_call_args(

923 plot_backend.__name__, self._parent, args, kwargs

924 )

925

926 kind = self._kind_aliases.get(kind, kind)

927

928 # when using another backend, get out of the way

929 if plot_backend.__name__ != "pandas.plotting._matplotlib":

930 return plot_backend.plot(self._parent, x=x, y=y, kind=kind, **kwargs)

931

932 if kind not in self._all_kinds:

933 raise ValueError(f"{kind} is not a valid plot kind")

934

935 # The original data structured can be transformed before passed to the

936 # backend. For example, for DataFrame is common to set the index as the

937 # `x` parameter, and return a Series with the parameter `y` as values.

938 data = self._parent.copy()

939

940 if isinstance(data, ABCSeries):

941 kwargs["reuse_plot"] = True

942

943 if kind in self._dataframe_kinds:

944 if isinstance(data, ABCDataFrame):

945 return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs)

946 else:

947 raise ValueError(f"plot kind {kind} can only be used for data frames")

948 elif kind in self._series_kinds:

949 if isinstance(data, ABCDataFrame):

950 if y is None and kwargs.get("subplots") is False:

951 raise ValueError(

952 f"{kind} requires either y column or 'subplots=True'"

953 )

954 elif y is not None:

955 if is_integer(y) and not data.columns.holds_integer():

956 y = data.columns[y]

957 # converted to series actually. copy to not modify

958 data = data[y].copy()

959 data.index.name = y

960 elif isinstance(data, ABCDataFrame):

961 data_cols = data.columns

962 if x is not None:

963 if is_integer(x) and not data.columns.holds_integer():

964 x = data_cols[x]

965 elif not isinstance(data[x], ABCSeries):

966 raise ValueError("x must be a label or position")

967 data = data.set_index(x)

968 if y is not None:

969 # check if we have y as int or list of ints

970 int_ylist = is_list_like(y) and all(is_integer(c) for c in y)

971 int_y_arg = is_integer(y) or int_ylist

972 if int_y_arg and not data.columns.holds_integer():

973 y = data_cols[y]

974

975 label_kw = kwargs["label"] if "label" in kwargs else False

976 for kw in ["xerr", "yerr"]:

977 if kw in kwargs and (

978 isinstance(kwargs[kw], str) or is_integer(kwargs[kw])

979 ):

980 try:

981 kwargs[kw] = data[kwargs[kw]]

982 except (IndexError, KeyError, TypeError):

983 pass

984

985 # don't overwrite

986 data = data[y].copy()

987

988 if isinstance(data, ABCSeries):

989 label_name = label_kw or y

990 data.name = label_name

991 else:

992 match = is_list_like(label_kw) and len(label_kw) == len(y)

993 if label_kw and not match:

994 raise ValueError(

995 "label should be list-like and same length as y"

996 )

997 label_name = label_kw or data.columns

998 data.columns = label_name

999

1000 return plot_backend.plot(data, kind=kind, **kwargs)

1001

1002 __call__.__doc__ = __doc__

1003

1004 @Appender(

1005 """

1006 See Also

1007 --------

1008 matplotlib.pyplot.plot : Plot y versus x as lines and/or markers.

1009

1010 Examples

1011 --------

1012

1013 .. plot::

1014 :context: close-figs

1015

1016 >>> s = pd.Series([1, 3, 2])

1017 >>> s.plot.line()

1018 <AxesSubplot: ylabel='Density'>

1019

1020 .. plot::

1021 :context: close-figs

1022

1023 The following example shows the populations for some animals

1024 over the years.

1025

1026 >>> df = pd.DataFrame({

1027 ... 'pig': [20, 18, 489, 675, 1776],

1028 ... 'horse': [4, 25, 281, 600, 1900]

1029 ... }, index=[1990, 1997, 2003, 2009, 2014])

1030 >>> lines = df.plot.line()

1031

1032 .. plot::

1033 :context: close-figs

1034

1035 An example with subplots, so an array of axes is returned.

1036

1037 >>> axes = df.plot.line(subplots=True)

1038 >>> type(axes)

1039 <class 'numpy.ndarray'>

1040

1041 .. plot::

1042 :context: close-figs

1043

1044 Let's repeat the same example, but specifying colors for

1045 each column (in this case, for each animal).

1046

1047 >>> axes = df.plot.line(

1048 ... subplots=True, color={"pig": "pink", "horse": "#742802"}

1049 ... )

1050

1051 .. plot::

1052 :context: close-figs

1053

1054 The following example shows the relationship between both

1055 populations.

1056

1057 >>> lines = df.plot.line(x='pig', y='horse')

1058 """

1059 )

1060 @Substitution(kind="line")

1061 @Appender(_bar_or_line_doc)

1062 def line(self, x=None, y=None, **kwargs) -> PlotAccessor:

1063 """

1064 Plot Series or DataFrame as lines.

1065

1066 This function is useful to plot lines using DataFrame's values

1067 as coordinates.

1068 """

1069 return self(kind="line", x=x, y=y, **kwargs)

1070

1071 @Appender(

1072 """

1073 See Also

1074 --------

1075 DataFrame.plot.barh : Horizontal bar plot.

1076 DataFrame.plot : Make plots of a DataFrame.

1077 matplotlib.pyplot.bar : Make a bar plot with matplotlib.

1078

1079 Examples

1080 --------

1081 Basic plot.

1082

1083 .. plot::

1084 :context: close-figs

1085

1086 >>> df = pd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]})

1087 >>> ax = df.plot.bar(x='lab', y='val', rot=0)

1088

1089 Plot a whole dataframe to a bar plot. Each column is assigned a

1090 distinct color, and each row is nested in a group along the

1091 horizontal axis.

1092

1093 .. plot::

1094 :context: close-figs

1095

1096 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]

1097 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]

1098 >>> index = ['snail', 'pig', 'elephant',

1099 ... 'rabbit', 'giraffe', 'coyote', 'horse']

1100 >>> df = pd.DataFrame({'speed': speed,

1101 ... 'lifespan': lifespan}, index=index)

1102 >>> ax = df.plot.bar(rot=0)

1103

1104 Plot stacked bar charts for the DataFrame

1105

1106 .. plot::

1107 :context: close-figs

1108

1109 >>> ax = df.plot.bar(stacked=True)

1110

1111 Instead of nesting, the figure can be split by column with

1112 ``subplots=True``. In this case, a :class:`numpy.ndarray` of

1113 :class:`matplotlib.axes.Axes` are returned.

1114

1115 .. plot::

1116 :context: close-figs

1117

1118 >>> axes = df.plot.bar(rot=0, subplots=True)

1119 >>> axes[1].legend(loc=2) # doctest: +SKIP

1120

1121 If you don't like the default colours, you can specify how you'd

1122 like each column to be colored.

1123

1124 .. plot::

1125 :context: close-figs

1126

1127 >>> axes = df.plot.bar(

1128 ... rot=0, subplots=True, color={"speed": "red", "lifespan": "green"}

1129 ... )

1130 >>> axes[1].legend(loc=2) # doctest: +SKIP

1131

1132 Plot a single column.

1133

1134 .. plot::

1135 :context: close-figs

1136

1137 >>> ax = df.plot.bar(y='speed', rot=0)

1138

1139 Plot only selected categories for the DataFrame.

1140

1141 .. plot::

1142 :context: close-figs

1143

1144 >>> ax = df.plot.bar(x='lifespan', rot=0)

1145 """

1146 )

1147 @Substitution(kind="bar")

1148 @Appender(_bar_or_line_doc)

1149 def bar(self, x=None, y=None, **kwargs) -> PlotAccessor:

1150 """

1151 Vertical bar plot.

1152

1153 A bar plot is a plot that presents categorical data with

1154 rectangular bars with lengths proportional to the values that they

1155 represent. A bar plot shows comparisons among discrete categories. One

1156 axis of the plot shows the specific categories being compared, and the

1157 other axis represents a measured value.

1158 """

1159 return self(kind="bar", x=x, y=y, **kwargs)

1160

1161 @Appender(

1162 """

1163 See Also

1164 --------

1165 DataFrame.plot.bar: Vertical bar plot.

1166 DataFrame.plot : Make plots of DataFrame using matplotlib.

1167 matplotlib.axes.Axes.bar : Plot a vertical bar plot using matplotlib.

1168

1169 Examples

1170 --------

1171 Basic example

1172

1173 .. plot::

1174 :context: close-figs

1175

1176 >>> df = pd.DataFrame({'lab': ['A', 'B', 'C'], 'val': [10, 30, 20]})

1177 >>> ax = df.plot.barh(x='lab', y='val')

1178

1179 Plot a whole DataFrame to a horizontal bar plot

1180

1181 .. plot::

1182 :context: close-figs

1183

1184 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]

1185 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]

1186 >>> index = ['snail', 'pig', 'elephant',

1187 ... 'rabbit', 'giraffe', 'coyote', 'horse']

1188 >>> df = pd.DataFrame({'speed': speed,

1189 ... 'lifespan': lifespan}, index=index)

1190 >>> ax = df.plot.barh()

1191

1192 Plot stacked barh charts for the DataFrame

1193

1194 .. plot::

1195 :context: close-figs

1196

1197 >>> ax = df.plot.barh(stacked=True)

1198

1199 We can specify colors for each column

1200

1201 .. plot::

1202 :context: close-figs

1203

1204 >>> ax = df.plot.barh(color={"speed": "red", "lifespan": "green"})

1205

1206 Plot a column of the DataFrame to a horizontal bar plot

1207

1208 .. plot::

1209 :context: close-figs

1210

1211 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]

1212 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]

1213 >>> index = ['snail', 'pig', 'elephant',

1214 ... 'rabbit', 'giraffe', 'coyote', 'horse']

1215 >>> df = pd.DataFrame({'speed': speed,

1216 ... 'lifespan': lifespan}, index=index)

1217 >>> ax = df.plot.barh(y='speed')

1218

1219 Plot DataFrame versus the desired column

1220

1221 .. plot::

1222 :context: close-figs

1223

1224 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]

1225 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]

1226 >>> index = ['snail', 'pig', 'elephant',

1227 ... 'rabbit', 'giraffe', 'coyote', 'horse']

1228 >>> df = pd.DataFrame({'speed': speed,

1229 ... 'lifespan': lifespan}, index=index)

1230 >>> ax = df.plot.barh(x='lifespan')

1231 """

1232 )

1233 @Substitution(kind="bar")

1234 @Appender(_bar_or_line_doc)

1235 def barh(self, x=None, y=None, **kwargs) -> PlotAccessor:

1236 """

1237 Make a horizontal bar plot.

1238

1239 A horizontal bar plot is a plot that presents quantitative data with

1240 rectangular bars with lengths proportional to the values that they

1241 represent. A bar plot shows comparisons among discrete categories. One

1242 axis of the plot shows the specific categories being compared, and the

1243 other axis represents a measured value.

1244 """

1245 return self(kind="barh", x=x, y=y, **kwargs)

1246

1247 def box(self, by=None, **kwargs) -> PlotAccessor:

1248 r"""

1249 Make a box plot of the DataFrame columns.

1250

1251 A box plot is a method for graphically depicting groups of numerical

1252 data through their quartiles.

1253 The box extends from the Q1 to Q3 quartile values of the data,

1254 with a line at the median (Q2). The whiskers extend from the edges

1255 of box to show the range of the data. The position of the whiskers

1256 is set by default to 1.5*IQR (IQR = Q3 - Q1) from the edges of the

1257 box. Outlier points are those past the end of the whiskers.

1258

1259 For further details see Wikipedia's

1260 entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`__.

1261

1262 A consideration when using this chart is that the box and the whiskers

1263 can overlap, which is very common when plotting small sets of data.

1264

1265 Parameters

1266 ----------

1267 by : str or sequence

1268 Column in the DataFrame to group by.

1269

1270 .. versionchanged:: 1.4.0

1271

1272 Previously, `by` is silently ignore and makes no groupings

1273

1274 **kwargs

1275 Additional keywords are documented in

1276 :meth:`DataFrame.plot`.

1277

1278 Returns

1279 -------

1280 :class:`matplotlib.axes.Axes` or numpy.ndarray of them

1281

1282 See Also

1283 --------

1284 DataFrame.boxplot: Another method to draw a box plot.

1285 Series.plot.box: Draw a box plot from a Series object.

1286 matplotlib.pyplot.boxplot: Draw a box plot in matplotlib.

1287

1288 Examples

1289 --------

1290 Draw a box plot from a DataFrame with four columns of randomly

1291 generated data.

1292

1293 .. plot::

1294 :context: close-figs

1295

1296 >>> data = np.random.randn(25, 4)

1297 >>> df = pd.DataFrame(data, columns=list('ABCD'))

1298 >>> ax = df.plot.box()

1299

1300 You can also generate groupings if you specify the `by` parameter (which

1301 can take a column name, or a list or tuple of column names):

1302

1303 .. versionchanged:: 1.4.0

1304

1305 .. plot::

1306 :context: close-figs

1307

1308 >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85]

1309 >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list})

1310 >>> ax = df.plot.box(column="age", by="gender", figsize=(10, 8))

1311 """

1312 return self(kind="box", by=by, **kwargs)

1313

1314 def hist(self, by=None, bins: int = 10, **kwargs) -> PlotAccessor:

1315 """

1316 Draw one histogram of the DataFrame's columns.

1317

1318 A histogram is a representation of the distribution of data.

1319 This function groups the values of all given Series in the DataFrame

1320 into bins and draws all bins in one :class:`matplotlib.axes.Axes`.

1321 This is useful when the DataFrame's Series are in a similar scale.

1322

1323 Parameters

1324 ----------

1325 by : str or sequence, optional

1326 Column in the DataFrame to group by.

1327

1328 .. versionchanged:: 1.4.0

1329

1330 Previously, `by` is silently ignore and makes no groupings

1331

1332 bins : int, default 10

1333 Number of histogram bins to be used.

1334 **kwargs

1335 Additional keyword arguments are documented in

1336 :meth:`DataFrame.plot`.

1337

1338 Returns

1339 -------

1340 class:`matplotlib.AxesSubplot`

1341 Return a histogram plot.

1342

1343 See Also

1344 --------

1345 DataFrame.hist : Draw histograms per DataFrame's Series.

1346 Series.hist : Draw a histogram with Series' data.

1347

1348 Examples

1349 --------

1350 When we roll a die 6000 times, we expect to get each value around 1000

1351 times. But when we roll two dice and sum the result, the distribution

1352 is going to be quite different. A histogram illustrates those

1353 distributions.

1354

1355 .. plot::

1356 :context: close-figs

1357

1358 >>> df = pd.DataFrame(

1359 ... np.random.randint(1, 7, 6000),

1360 ... columns = ['one'])

1361 >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000)

1362 >>> ax = df.plot.hist(bins=12, alpha=0.5)

1363

1364 A grouped histogram can be generated by providing the parameter `by` (which

1365 can be a column name, or a list of column names):

1366

1367 .. plot::

1368 :context: close-figs

1369

1370 >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85]

1371 >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list})

1372 >>> ax = df.plot.hist(column=["age"], by="gender", figsize=(10, 8))

1373 """

1374 return self(kind="hist", by=by, bins=bins, **kwargs)

1375

1376 def kde(self, bw_method=None, ind=None, **kwargs) -> PlotAccessor:

1377 """

1378 Generate Kernel Density Estimate plot using Gaussian kernels.

1379

1380 In statistics, `kernel density estimation`_ (KDE) is a non-parametric

1381 way to estimate the probability density function (PDF) of a random

1382 variable. This function uses Gaussian kernels and includes automatic

1383 bandwidth determination.

1384

1385 .. _kernel density estimation:

1386 https://en.wikipedia.org/wiki/Kernel_density_estimation

1387

1388 Parameters

1389 ----------

1390 bw_method : str, scalar or callable, optional

1391 The method used to calculate the estimator bandwidth. This can be

1392 'scott', 'silverman', a scalar constant or a callable.

1393 If None (default), 'scott' is used.

1394 See :class:`scipy.stats.gaussian_kde` for more information.

1395 ind : NumPy array or int, optional

1396 Evaluation points for the estimated PDF. If None (default),

1397 1000 equally spaced points are used. If `ind` is a NumPy array, the

1398 KDE is evaluated at the points passed. If `ind` is an integer,

1399 `ind` number of equally spaced points are used.

1400 **kwargs

1401 Additional keyword arguments are documented in

1402 :meth:`DataFrame.plot`.

1403

1404 Returns

1405 -------

1406 matplotlib.axes.Axes or numpy.ndarray of them

1407

1408 See Also

1409 --------

1410 scipy.stats.gaussian_kde : Representation of a kernel-density

1411 estimate using Gaussian kernels. This is the function used

1412 internally to estimate the PDF.

1413

1414 Examples

1415 --------

1416 Given a Series of points randomly sampled from an unknown

1417 distribution, estimate its PDF using KDE with automatic

1418 bandwidth determination and plot the results, evaluating them at

1419 1000 equally spaced points (default):

1420

1421 .. plot::

1422 :context: close-figs

1423

1424 >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5])

1425 >>> ax = s.plot.kde()

1426

1427 A scalar bandwidth can be specified. Using a small bandwidth value can

1428 lead to over-fitting, while using a large bandwidth value may result

1429 in under-fitting:

1430

1431 .. plot::

1432 :context: close-figs

1433

1434 >>> ax = s.plot.kde(bw_method=0.3)

1435

1436 .. plot::

1437 :context: close-figs

1438

1439 >>> ax = s.plot.kde(bw_method=3)

1440

1441 Finally, the `ind` parameter determines the evaluation points for the

1442 plot of the estimated PDF:

1443

1444 .. plot::

1445 :context: close-figs

1446

1447 >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5])

1448

1449 For DataFrame, it works in the same way:

1450

1451 .. plot::

1452 :context: close-figs

1453

1454 >>> df = pd.DataFrame({

1455 ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5],

1456 ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6],

1457 ... })

1458 >>> ax = df.plot.kde()

1459

1460 A scalar bandwidth can be specified. Using a small bandwidth value can

1461 lead to over-fitting, while using a large bandwidth value may result

1462 in under-fitting:

1463

1464 .. plot::

1465 :context: close-figs

1466

1467 >>> ax = df.plot.kde(bw_method=0.3)

1468

1469 .. plot::

1470 :context: close-figs

1471

1472 >>> ax = df.plot.kde(bw_method=3)

1473

1474 Finally, the `ind` parameter determines the evaluation points for the

1475 plot of the estimated PDF:

1476

1477 .. plot::

1478 :context: close-figs

1479

1480 >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6])

1481 """

1482 return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs)

1483

1484 density = kde

1485

1486 def area(self, x=None, y=None, **kwargs) -> PlotAccessor:

1487 """

1488 Draw a stacked area plot.

1489

1490 An area plot displays quantitative data visually.

1491 This function wraps the matplotlib area function.

1492

1493 Parameters

1494 ----------

1495 x : label or position, optional

1496 Coordinates for the X axis. By default uses the index.

1497 y : label or position, optional

1498 Column to plot. By default uses all columns.

1499 stacked : bool, default True

1500 Area plots are stacked by default. Set to False to create a

1501 unstacked plot.

1502 **kwargs

1503 Additional keyword arguments are documented in

1504 :meth:`DataFrame.plot`.

1505

1506 Returns

1507 -------

1508 matplotlib.axes.Axes or numpy.ndarray

1509 Area plot, or array of area plots if subplots is True.

1510

1511 See Also

1512 --------

1513 DataFrame.plot : Make plots of DataFrame using matplotlib / pylab.

1514

1515 Examples

1516 --------

1517 Draw an area plot based on basic business metrics:

1518

1519 .. plot::

1520 :context: close-figs

1521

1522 >>> df = pd.DataFrame({

1523 ... 'sales': [3, 2, 3, 9, 10, 6],

1524 ... 'signups': [5, 5, 6, 12, 14, 13],

1525 ... 'visits': [20, 42, 28, 62, 81, 50],

1526 ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01',

1527 ... freq='M'))

1528 >>> ax = df.plot.area()

1529

1530 Area plots are stacked by default. To produce an unstacked plot,

1531 pass ``stacked=False``:

1532

1533 .. plot::

1534 :context: close-figs

1535

1536 >>> ax = df.plot.area(stacked=False)

1537

1538 Draw an area plot for a single column:

1539

1540 .. plot::

1541 :context: close-figs

1542

1543 >>> ax = df.plot.area(y='sales')

1544

1545 Draw with a different `x`:

1546

1547 .. plot::

1548 :context: close-figs

1549

1550 >>> df = pd.DataFrame({

1551 ... 'sales': [3, 2, 3],

1552 ... 'visits': [20, 42, 28],

1553 ... 'day': [1, 2, 3],

1554 ... })

1555 >>> ax = df.plot.area(x='day')

1556 """

1557 return self(kind="area", x=x, y=y, **kwargs)

1558

1559 def pie(self, **kwargs) -> PlotAccessor:

1560 """

1561 Generate a pie plot.

1562

1563 A pie plot is a proportional representation of the numerical data in a

1564 column. This function wraps :meth:`matplotlib.pyplot.pie` for the

1565 specified column. If no column reference is passed and

1566 ``subplots=True`` a pie plot is drawn for each numerical column

1567 independently.

1568

1569 Parameters

1570 ----------

1571 y : int or label, optional

1572 Label or position of the column to plot.

1573 If not provided, ``subplots=True`` argument must be passed.

1574 **kwargs

1575 Keyword arguments to pass on to :meth:`DataFrame.plot`.

1576

1577 Returns

1578 -------

1579 matplotlib.axes.Axes or np.ndarray of them

1580 A NumPy array is returned when `subplots` is True.

1581

1582 See Also

1583 --------

1584 Series.plot.pie : Generate a pie plot for a Series.

1585 DataFrame.plot : Make plots of a DataFrame.

1586

1587 Examples

1588 --------

1589 In the example below we have a DataFrame with the information about

1590 planet's mass and radius. We pass the 'mass' column to the

1591 pie function to get a pie plot.

1592

1593 .. plot::

1594 :context: close-figs

1595

1596 >>> df = pd.DataFrame({'mass': [0.330, 4.87 , 5.97],

1597 ... 'radius': [2439.7, 6051.8, 6378.1]},

1598 ... index=['Mercury', 'Venus', 'Earth'])

1599 >>> plot = df.plot.pie(y='mass', figsize=(5, 5))

1600

1601 .. plot::

1602 :context: close-figs

1603

1604 >>> plot = df.plot.pie(subplots=True, figsize=(11, 6))

1605 """

1606 if (

1607 isinstance(self._parent, ABCDataFrame)

1608 and kwargs.get("y", None) is None

1609 and not kwargs.get("subplots", False)

1610 ):

1611 raise ValueError("pie requires either y column or 'subplots=True'")

1612 return self(kind="pie", **kwargs)

1613

1614 def scatter(self, x, y, s=None, c=None, **kwargs) -> PlotAccessor:

1615 """

1616 Create a scatter plot with varying marker point size and color.

1617

1618 The coordinates of each point are defined by two dataframe columns and

1619 filled circles are used to represent each point. This kind of plot is

1620 useful to see complex correlations between two variables. Points could

1621 be for instance natural 2D coordinates like longitude and latitude in

1622 a map or, in general, any pair of metrics that can be plotted against

1623 each other.

1624

1625 Parameters

1626 ----------

1627 x : int or str

1628 The column name or column position to be used as horizontal

1629 coordinates for each point.

1630 y : int or str

1631 The column name or column position to be used as vertical

1632 coordinates for each point.

1633 s : str, scalar or array-like, optional

1634 The size of each point. Possible values are:

1635

1636 - A string with the name of the column to be used for marker's size.

1637

1638 - A single scalar so all points have the same size.

1639

1640 - A sequence of scalars, which will be used for each point's size

1641 recursively. For instance, when passing [2,14] all points size

1642 will be either 2 or 14, alternatively.

1643

1644 .. versionchanged:: 1.1.0

1645

1646 c : str, int or array-like, optional

1647 The color of each point. Possible values are:

1648

1649 - A single color string referred to by name, RGB or RGBA code,

1650 for instance 'red' or '#a98d19'.

1651

1652 - A sequence of color strings referred to by name, RGB or RGBA

1653 code, which will be used for each point's color recursively. For

1654 instance ['green','yellow'] all points will be filled in green or

1655 yellow, alternatively.

1656

1657 - A column name or position whose values will be used to color the

1658 marker points according to a colormap.

1659

1660 **kwargs

1661 Keyword arguments to pass on to :meth:`DataFrame.plot`.

1662

1663 Returns

1664 -------

1665 :class:`matplotlib.axes.Axes` or numpy.ndarray of them

1666

1667 See Also

1668 --------

1669 matplotlib.pyplot.scatter : Scatter plot using multiple input data

1670 formats.

1671

1672 Examples

1673 --------

1674 Let's see how to draw a scatter plot using coordinates from the values

1675 in a DataFrame's columns.

1676

1677 .. plot::

1678 :context: close-figs

1679

1680 >>> df = pd.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1],

1681 ... [6.4, 3.2, 1], [5.9, 3.0, 2]],

1682 ... columns=['length', 'width', 'species'])

1683 >>> ax1 = df.plot.scatter(x='length',

1684 ... y='width',

1685 ... c='DarkBlue')

1686

1687 And now with the color determined by a column as well.

1688

1689 .. plot::

1690 :context: close-figs

1691

1692 >>> ax2 = df.plot.scatter(x='length',

1693 ... y='width',

1694 ... c='species',

1695 ... colormap='viridis')

1696 """

1697 size = kwargs.pop("size", None)

1698 if s is not None and size is not None:

1699 raise TypeError("Specify exactly one of `s` and `size`")

1700 elif s is not None or size is not None:

1701 kwargs["s"] = s if s is not None else size

1702

1703 color = kwargs.pop("color", None)

1704 if c is not None and color is not None:

1705 raise TypeError("Specify exactly one of `c` and `color`")

1706 elif c is not None or color is not None:

1707 kwargs["c"] = c if c is not None else color

1708

1709 return self(kind="scatter", x=x, y=y, **kwargs)

1710

1711 def hexbin(

1712 self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs

1713 ) -> PlotAccessor:

1714 """

1715 Generate a hexagonal binning plot.

1716

1717 Generate a hexagonal binning plot of `x` versus `y`. If `C` is `None`

1718 (the default), this is a histogram of the number of occurrences

1719 of the observations at ``(x[i], y[i])``.

1720

1721 If `C` is specified, specifies values at given coordinates

1722 ``(x[i], y[i])``. These values are accumulated for each hexagonal

1723 bin and then reduced according to `reduce_C_function`,

1724 having as default the NumPy's mean function (:meth:`numpy.mean`).

1725 (If `C` is specified, it must also be a 1-D sequence

1726 of the same length as `x` and `y`, or a column label.)

1727

1728 Parameters

1729 ----------

1730 x : int or str

1731 The column label or position for x points.

1732 y : int or str

1733 The column label or position for y points.

1734 C : int or str, optional

1735 The column label or position for the value of `(x, y)` point.

1736 reduce_C_function : callable, default `np.mean`

1737 Function of one argument that reduces all the values in a bin to

1738 a single number (e.g. `np.mean`, `np.max`, `np.sum`, `np.std`).

1739 gridsize : int or tuple of (int, int), default 100

1740 The number of hexagons in the x-direction.

1741 The corresponding number of hexagons in the y-direction is

1742 chosen in a way that the hexagons are approximately regular.

1743 Alternatively, gridsize can be a tuple with two elements

1744 specifying the number of hexagons in the x-direction and the

1745 y-direction.

1746 **kwargs

1747 Additional keyword arguments are documented in

1748 :meth:`DataFrame.plot`.

1749

1750 Returns

1751 -------

1752 matplotlib.AxesSubplot

1753 The matplotlib ``Axes`` on which the hexbin is plotted.

1754

1755 See Also

1756 --------

1757 DataFrame.plot : Make plots of a DataFrame.

1758 matplotlib.pyplot.hexbin : Hexagonal binning plot using matplotlib,

1759 the matplotlib function that is used under the hood.

1760

1761 Examples

1762 --------

1763 The following examples are generated with random data from

1764 a normal distribution.

1765

1766 .. plot::

1767 :context: close-figs

1768

1769 >>> n = 10000

1770 >>> df = pd.DataFrame({'x': np.random.randn(n),

1771 ... 'y': np.random.randn(n)})

1772 >>> ax = df.plot.hexbin(x='x', y='y', gridsize=20)

1773

1774 The next example uses `C` and `np.sum` as `reduce_C_function`.

1775 Note that `'observations'` values ranges from 1 to 5 but the result

1776 plot shows values up to more than 25. This is because of the

1777 `reduce_C_function`.

1778

1779 .. plot::

1780 :context: close-figs

1781

1782 >>> n = 500

1783 >>> df = pd.DataFrame({

1784 ... 'coord_x': np.random.uniform(-3, 3, size=n),

1785 ... 'coord_y': np.random.uniform(30, 50, size=n),

1786 ... 'observations': np.random.randint(1,5, size=n)

1787 ... })

1788 >>> ax = df.plot.hexbin(x='coord_x',

1789 ... y='coord_y',

1790 ... C='observations',

1791 ... reduce_C_function=np.sum,

1792 ... gridsize=10,

1793 ... cmap="viridis")

1794 """

1795 if reduce_C_function is not None:

1796 kwargs["reduce_C_function"] = reduce_C_function

1797 if gridsize is not None:

1798 kwargs["gridsize"] = gridsize

1799

1800 return self(kind="hexbin", x=x, y=y, C=C, **kwargs)

1801

1802

1803_backends: dict[str, types.ModuleType] = {}

1804

1805

1806def _load_backend(backend: str) -> types.ModuleType:

1807 """

1808 Load a pandas plotting backend.

1809

1810 Parameters

1811 ----------

1812 backend : str

1813 The identifier for the backend. Either an entrypoint item registered

1814 with importlib.metadata, "matplotlib", or a module name.

1815

1816 Returns

1817 -------

1818 types.ModuleType

1819 The imported backend.

1820 """

1821 from importlib.metadata import entry_points

1822

1823 if backend == "matplotlib":

1824 # Because matplotlib is an optional dependency and first-party backend,

1825 # we need to attempt an import here to raise an ImportError if needed.

1826 try:

1827 module = importlib.import_module("pandas.plotting._matplotlib")

1828 except ImportError:

1829 raise ImportError(

1830 "matplotlib is required for plotting when the "

1831 'default backend "matplotlib" is selected.'

1832 ) from None

1833 return module

1834

1835 found_backend = False

1836

1837 eps = entry_points()

1838 key = "pandas_plotting_backends"

1839 # entry_points lost dict API ~ PY 3.10

1840 # https://github.com/python/importlib_metadata/issues/298

1841 if hasattr(eps, "select"):

1842 # error: "Dict[str, Tuple[EntryPoint, ...]]" has no attribute "select"

1843 entry = eps.select(group=key) # type: ignore[attr-defined]

1844 else:

1845 entry = eps.get(key, ())

1846 for entry_point in entry:

1847 found_backend = entry_point.name == backend

1848 if found_backend:

1849 module = entry_point.load()

1850 break

1851

1852 if not found_backend:

1853 # Fall back to unregistered, module name approach.

1854 try:

1855 module = importlib.import_module(backend)

1856 found_backend = True

1857 except ImportError:

1858 # We re-raise later on.

1859 pass

1860

1861 if found_backend:

1862 if hasattr(module, "plot"):

1863 # Validate that the interface is implemented when the option is set,

1864 # rather than at plot time.

1865 return module

1866

1867 raise ValueError(

1868 f"Could not find plotting backend '{backend}'. Ensure that you've "

1869 f"installed the package providing the '{backend}' entrypoint, or that "

1870 "the package has a top-level `.plot` method."

1871 )

1872

1873

1874def _get_plot_backend(backend: str | None = None):

1875 """

1876 Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`).

1877

1878 The plotting system of pandas uses matplotlib by default, but the idea here

1879 is that it can also work with other third-party backends. This function

1880 returns the module which provides a top-level `.plot` method that will

1881 actually do the plotting. The backend is specified from a string, which

1882 either comes from the keyword argument `backend`, or, if not specified, from

1883 the option `pandas.options.plotting.backend`. All the rest of the code in

1884 this file uses the backend specified there for the plotting.

1885

1886 The backend is imported lazily, as matplotlib is a soft dependency, and

1887 pandas can be used without it being installed.

1888

1889 Notes

1890 -----

1891 Modifies `_backends` with imported backend as a side effect.

1892 """

1893 backend_str: str = backend or get_option("plotting.backend")

1894

1895 if backend_str in _backends:

1896 return _backends[backend_str]

1897

1898 module = _load_backend(backend_str)

1899 _backends[backend_str] = module

1900 return module

Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/plotting/_core.py: 21%

206 statements