Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/plotting/_core.py: 21%
206 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1from __future__ import annotations
3import importlib
4import itertools
5import types
6from typing import (
7 TYPE_CHECKING,
8 Sequence,
9)
10import warnings
12from pandas._config import get_option
14from pandas._typing import IndexLabel
15from pandas.util._decorators import (
16 Appender,
17 Substitution,
18)
19from pandas.util._exceptions import find_stack_level
21from pandas.core.dtypes.common import (
22 is_integer,
23 is_list_like,
24)
25from pandas.core.dtypes.generic import (
26 ABCDataFrame,
27 ABCSeries,
28)
30from pandas.core.base import PandasObject
32if TYPE_CHECKING: 32 ↛ 33line 32 didn't jump to line 33, because the condition on line 32 was never true
33 from matplotlib.axes import Axes
35 from pandas import DataFrame
38def hist_series(
39 self,
40 by=None,
41 ax=None,
42 grid: bool = True,
43 xlabelsize: int | None = None,
44 xrot: float | None = None,
45 ylabelsize: int | None = None,
46 yrot: float | None = None,
47 figsize: tuple[int, int] | None = None,
48 bins: int | Sequence[int] = 10,
49 backend: str | None = None,
50 legend: bool = False,
51 **kwargs,
52):
53 """
54 Draw histogram of the input series using matplotlib.
56 Parameters
57 ----------
58 by : object, optional
59 If passed, then used to form histograms for separate groups.
60 ax : matplotlib axis object
61 If not passed, uses gca().
62 grid : bool, default True
63 Whether to show axis grid lines.
64 xlabelsize : int, default None
65 If specified changes the x-axis label size.
66 xrot : float, default None
67 Rotation of x axis labels.
68 ylabelsize : int, default None
69 If specified changes the y-axis label size.
70 yrot : float, default None
71 Rotation of y axis labels.
72 figsize : tuple, default None
73 Figure size in inches by default.
74 bins : int or sequence, default 10
75 Number of histogram bins to be used. If an integer is given, bins + 1
76 bin edges are calculated and returned. If bins is a sequence, gives
77 bin edges, including left edge of first bin and right edge of last
78 bin. In this case, bins is returned unmodified.
79 backend : str, default None
80 Backend to use instead of the backend specified in the option
81 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
82 specify the ``plotting.backend`` for the whole session, set
83 ``pd.options.plotting.backend``.
85 .. versionadded:: 1.0.0
87 legend : bool, default False
88 Whether to show the legend.
90 .. versionadded:: 1.1.0
92 **kwargs
93 To be passed to the actual plotting function.
95 Returns
96 -------
97 matplotlib.AxesSubplot
98 A histogram plot.
100 See Also
101 --------
102 matplotlib.axes.Axes.hist : Plot a histogram using matplotlib.
103 """
104 plot_backend = _get_plot_backend(backend)
105 return plot_backend.hist_series(
106 self,
107 by=by,
108 ax=ax,
109 grid=grid,
110 xlabelsize=xlabelsize,
111 xrot=xrot,
112 ylabelsize=ylabelsize,
113 yrot=yrot,
114 figsize=figsize,
115 bins=bins,
116 legend=legend,
117 **kwargs,
118 )
121def hist_frame(
122 data: DataFrame,
123 column: IndexLabel = None,
124 by=None,
125 grid: bool = True,
126 xlabelsize: int | None = None,
127 xrot: float | None = None,
128 ylabelsize: int | None = None,
129 yrot: float | None = None,
130 ax=None,
131 sharex: bool = False,
132 sharey: bool = False,
133 figsize: tuple[int, int] | None = None,
134 layout: tuple[int, int] | None = None,
135 bins: int | Sequence[int] = 10,
136 backend: str | None = None,
137 legend: bool = False,
138 **kwargs,
139):
140 """
141 Make a histogram of the DataFrame's columns.
143 A `histogram`_ is a representation of the distribution of data.
144 This function calls :meth:`matplotlib.pyplot.hist`, on each series in
145 the DataFrame, resulting in one histogram per column.
147 .. _histogram: https://en.wikipedia.org/wiki/Histogram
149 Parameters
150 ----------
151 data : DataFrame
152 The pandas object holding the data.
153 column : str or sequence, optional
154 If passed, will be used to limit data to a subset of columns.
155 by : object, optional
156 If passed, then used to form histograms for separate groups.
157 grid : bool, default True
158 Whether to show axis grid lines.
159 xlabelsize : int, default None
160 If specified changes the x-axis label size.
161 xrot : float, default None
162 Rotation of x axis labels. For example, a value of 90 displays the
163 x labels rotated 90 degrees clockwise.
164 ylabelsize : int, default None
165 If specified changes the y-axis label size.
166 yrot : float, default None
167 Rotation of y axis labels. For example, a value of 90 displays the
168 y labels rotated 90 degrees clockwise.
169 ax : Matplotlib axes object, default None
170 The axes to plot the histogram on.
171 sharex : bool, default True if ax is None else False
172 In case subplots=True, share x axis and set some x axis labels to
173 invisible; defaults to True if ax is None otherwise False if an ax
174 is passed in.
175 Note that passing in both an ax and sharex=True will alter all x axis
176 labels for all subplots in a figure.
177 sharey : bool, default False
178 In case subplots=True, share y axis and set some y axis labels to
179 invisible.
180 figsize : tuple, optional
181 The size in inches of the figure to create. Uses the value in
182 `matplotlib.rcParams` by default.
183 layout : tuple, optional
184 Tuple of (rows, columns) for the layout of the histograms.
185 bins : int or sequence, default 10
186 Number of histogram bins to be used. If an integer is given, bins + 1
187 bin edges are calculated and returned. If bins is a sequence, gives
188 bin edges, including left edge of first bin and right edge of last
189 bin. In this case, bins is returned unmodified.
191 backend : str, default None
192 Backend to use instead of the backend specified in the option
193 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
194 specify the ``plotting.backend`` for the whole session, set
195 ``pd.options.plotting.backend``.
197 .. versionadded:: 1.0.0
199 legend : bool, default False
200 Whether to show the legend.
202 .. versionadded:: 1.1.0
204 **kwargs
205 All other plotting keyword arguments to be passed to
206 :meth:`matplotlib.pyplot.hist`.
208 Returns
209 -------
210 matplotlib.AxesSubplot or numpy.ndarray of them
212 See Also
213 --------
214 matplotlib.pyplot.hist : Plot a histogram using matplotlib.
216 Examples
217 --------
218 This example draws a histogram based on the length and width of
219 some animals, displayed in three bins
221 .. plot::
222 :context: close-figs
224 >>> df = pd.DataFrame({
225 ... 'length': [1.5, 0.5, 1.2, 0.9, 3],
226 ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]
227 ... }, index=['pig', 'rabbit', 'duck', 'chicken', 'horse'])
228 >>> hist = df.hist(bins=3)
229 """
230 plot_backend = _get_plot_backend(backend)
231 return plot_backend.hist_frame(
232 data,
233 column=column,
234 by=by,
235 grid=grid,
236 xlabelsize=xlabelsize,
237 xrot=xrot,
238 ylabelsize=ylabelsize,
239 yrot=yrot,
240 ax=ax,
241 sharex=sharex,
242 sharey=sharey,
243 figsize=figsize,
244 layout=layout,
245 legend=legend,
246 bins=bins,
247 **kwargs,
248 )
251_boxplot_doc = """
252Make a box plot from DataFrame columns.
254Make a box-and-whisker plot from DataFrame columns, optionally grouped
255by some other columns. A box plot is a method for graphically depicting
256groups of numerical data through their quartiles.
257The box extends from the Q1 to Q3 quartile values of the data,
258with a line at the median (Q2). The whiskers extend from the edges
259of box to show the range of the data. By default, they extend no more than
260`1.5 * IQR (IQR = Q3 - Q1)` from the edges of the box, ending at the farthest
261data point within that interval. Outliers are plotted as separate dots.
263For further details see
264Wikipedia's entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`_.
266Parameters
267----------
268column : str or list of str, optional
269 Column name or list of names, or vector.
270 Can be any valid input to :meth:`pandas.DataFrame.groupby`.
271by : str or array-like, optional
272 Column in the DataFrame to :meth:`pandas.DataFrame.groupby`.
273 One box-plot will be done per value of columns in `by`.
274ax : object of class matplotlib.axes.Axes, optional
275 The matplotlib axes to be used by boxplot.
276fontsize : float or str
277 Tick label font size in points or as a string (e.g., `large`).
278rot : int or float, default 0
279 The rotation angle of labels (in degrees)
280 with respect to the screen coordinate system.
281grid : bool, default True
282 Setting this to True will show the grid.
283figsize : A tuple (width, height) in inches
284 The size of the figure to create in matplotlib.
285layout : tuple (rows, columns), optional
286 For example, (3, 5) will display the subplots
287 using 3 columns and 5 rows, starting from the top-left.
288return_type : {'axes', 'dict', 'both'} or None, default 'axes'
289 The kind of object to return. The default is ``axes``.
291 * 'axes' returns the matplotlib axes the boxplot is drawn on.
292 * 'dict' returns a dictionary whose values are the matplotlib
293 Lines of the boxplot.
294 * 'both' returns a namedtuple with the axes and dict.
295 * when grouping with ``by``, a Series mapping columns to
296 ``return_type`` is returned.
298 If ``return_type`` is `None`, a NumPy array
299 of axes with the same shape as ``layout`` is returned.
300%(backend)s\
302**kwargs
303 All other plotting keyword arguments to be passed to
304 :func:`matplotlib.pyplot.boxplot`.
306Returns
307-------
308result
309 See Notes.
311See Also
312--------
313Series.plot.hist: Make a histogram.
314matplotlib.pyplot.boxplot : Matplotlib equivalent plot.
316Notes
317-----
318The return type depends on the `return_type` parameter:
320* 'axes' : object of class matplotlib.axes.Axes
321* 'dict' : dict of matplotlib.lines.Line2D objects
322* 'both' : a namedtuple with structure (ax, lines)
324For data grouped with ``by``, return a Series of the above or a numpy
325array:
327* :class:`~pandas.Series`
328* :class:`~numpy.array` (for ``return_type = None``)
330Use ``return_type='dict'`` when you want to tweak the appearance
331of the lines after plotting. In this case a dict containing the Lines
332making up the boxes, caps, fliers, medians, and whiskers is returned.
334Examples
335--------
337Boxplots can be created for every column in the dataframe
338by ``df.boxplot()`` or indicating the columns to be used:
340.. plot::
341 :context: close-figs
343 >>> np.random.seed(1234)
344 >>> df = pd.DataFrame(np.random.randn(10, 4),
345 ... columns=['Col1', 'Col2', 'Col3', 'Col4'])
346 >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3']) # doctest: +SKIP
348Boxplots of variables distributions grouped by the values of a third
349variable can be created using the option ``by``. For instance:
351.. plot::
352 :context: close-figs
354 >>> df = pd.DataFrame(np.random.randn(10, 2),
355 ... columns=['Col1', 'Col2'])
356 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A',
357 ... 'B', 'B', 'B', 'B', 'B'])
358 >>> boxplot = df.boxplot(by='X')
360A list of strings (i.e. ``['X', 'Y']``) can be passed to boxplot
361in order to group the data by combination of the variables in the x-axis:
363.. plot::
364 :context: close-figs
366 >>> df = pd.DataFrame(np.random.randn(10, 3),
367 ... columns=['Col1', 'Col2', 'Col3'])
368 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A',
369 ... 'B', 'B', 'B', 'B', 'B'])
370 >>> df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A',
371 ... 'B', 'A', 'B', 'A', 'B'])
372 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y'])
374The layout of boxplot can be adjusted giving a tuple to ``layout``:
376.. plot::
377 :context: close-figs
379 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
380 ... layout=(2, 1))
382Additional formatting can be done to the boxplot, like suppressing the grid
383(``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``)
384or changing the fontsize (i.e. ``fontsize=15``):
386.. plot::
387 :context: close-figs
389 >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15) # doctest: +SKIP
391The parameter ``return_type`` can be used to select the type of element
392returned by `boxplot`. When ``return_type='axes'`` is selected,
393the matplotlib axes on which the boxplot is drawn are returned:
395 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], return_type='axes')
396 >>> type(boxplot)
397 <class 'matplotlib.axes._subplots.AxesSubplot'>
399When grouping with ``by``, a Series mapping columns to ``return_type``
400is returned:
402 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
403 ... return_type='axes')
404 >>> type(boxplot)
405 <class 'pandas.core.series.Series'>
407If ``return_type`` is `None`, a NumPy array of axes with the same shape
408as ``layout`` is returned:
410 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
411 ... return_type=None)
412 >>> type(boxplot)
413 <class 'numpy.ndarray'>
414"""
416_backend_doc = """\
417backend : str, default None
418 Backend to use instead of the backend specified in the option
419 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
420 specify the ``plotting.backend`` for the whole session, set
421 ``pd.options.plotting.backend``.
423 .. versionadded:: 1.0.0
424"""
427_bar_or_line_doc = """
428 Parameters
429 ----------
430 x : label or position, optional
431 Allows plotting of one column versus another. If not specified,
432 the index of the DataFrame is used.
433 y : label or position, optional
434 Allows plotting of one column versus another. If not specified,
435 all numerical columns are used.
436 color : str, array-like, or dict, optional
437 The color for each of the DataFrame's columns. Possible values are:
439 - A single color string referred to by name, RGB or RGBA code,
440 for instance 'red' or '#a98d19'.
442 - A sequence of color strings referred to by name, RGB or RGBA
443 code, which will be used for each column recursively. For
444 instance ['green','yellow'] each column's %(kind)s will be filled in
445 green or yellow, alternatively. If there is only a single column to
446 be plotted, then only the first color from the color list will be
447 used.
449 - A dict of the form {column name : color}, so that each column will be
450 colored accordingly. For example, if your columns are called `a` and
451 `b`, then passing {'a': 'green', 'b': 'red'} will color %(kind)ss for
452 column `a` in green and %(kind)ss for column `b` in red.
454 .. versionadded:: 1.1.0
456 **kwargs
457 Additional keyword arguments are documented in
458 :meth:`DataFrame.plot`.
460 Returns
461 -------
462 matplotlib.axes.Axes or np.ndarray of them
463 An ndarray is returned with one :class:`matplotlib.axes.Axes`
464 per column when ``subplots=True``.
465"""
468@Substitution(backend="")
469@Appender(_boxplot_doc)
470def boxplot(
471 data: DataFrame,
472 column: str | list[str] | None = None,
473 by: str | list[str] | None = None,
474 ax: Axes | None = None,
475 fontsize: float | str | None = None,
476 rot: int = 0,
477 grid: bool = True,
478 figsize: tuple[float, float] | None = None,
479 layout: tuple[int, int] | None = None,
480 return_type: str | None = None,
481 **kwargs,
482):
483 plot_backend = _get_plot_backend("matplotlib")
484 return plot_backend.boxplot(
485 data,
486 column=column,
487 by=by,
488 ax=ax,
489 fontsize=fontsize,
490 rot=rot,
491 grid=grid,
492 figsize=figsize,
493 layout=layout,
494 return_type=return_type,
495 **kwargs,
496 )
499@Substitution(backend=_backend_doc)
500@Appender(_boxplot_doc)
501def boxplot_frame(
502 self,
503 column=None,
504 by=None,
505 ax=None,
506 fontsize=None,
507 rot: int = 0,
508 grid: bool = True,
509 figsize=None,
510 layout=None,
511 return_type=None,
512 backend=None,
513 **kwargs,
514):
515 plot_backend = _get_plot_backend(backend)
516 return plot_backend.boxplot_frame(
517 self,
518 column=column,
519 by=by,
520 ax=ax,
521 fontsize=fontsize,
522 rot=rot,
523 grid=grid,
524 figsize=figsize,
525 layout=layout,
526 return_type=return_type,
527 **kwargs,
528 )
531def boxplot_frame_groupby(
532 grouped,
533 subplots: bool = True,
534 column=None,
535 fontsize=None,
536 rot: int = 0,
537 grid: bool = True,
538 ax=None,
539 figsize=None,
540 layout=None,
541 sharex: bool = False,
542 sharey: bool = True,
543 backend=None,
544 **kwargs,
545):
546 """
547 Make box plots from DataFrameGroupBy data.
549 Parameters
550 ----------
551 grouped : Grouped DataFrame
552 subplots : bool
553 * ``False`` - no subplots will be used
554 * ``True`` - create a subplot for each group.
556 column : column name or list of names, or vector
557 Can be any valid input to groupby.
558 fontsize : int or str
559 rot : label rotation angle
560 grid : Setting this to True will show the grid
561 ax : Matplotlib axis object, default None
562 figsize : A tuple (width, height) in inches
563 layout : tuple (optional)
564 The layout of the plot: (rows, columns).
565 sharex : bool, default False
566 Whether x-axes will be shared among subplots.
567 sharey : bool, default True
568 Whether y-axes will be shared among subplots.
569 backend : str, default None
570 Backend to use instead of the backend specified in the option
571 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
572 specify the ``plotting.backend`` for the whole session, set
573 ``pd.options.plotting.backend``.
575 .. versionadded:: 1.0.0
577 **kwargs
578 All other plotting keyword arguments to be passed to
579 matplotlib's boxplot function.
581 Returns
582 -------
583 dict of key/value = group key/DataFrame.boxplot return value
584 or DataFrame.boxplot return value in case subplots=figures=False
586 Examples
587 --------
588 You can create boxplots for grouped data and show them as separate subplots:
590 .. plot::
591 :context: close-figs
593 >>> import itertools
594 >>> tuples = [t for t in itertools.product(range(1000), range(4))]
595 >>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1'])
596 >>> data = np.random.randn(len(index),4)
597 >>> df = pd.DataFrame(data, columns=list('ABCD'), index=index)
598 >>> grouped = df.groupby(level='lvl1')
599 >>> grouped.boxplot(rot=45, fontsize=12, figsize=(8,10)) # doctest: +SKIP
601 The ``subplots=False`` option shows the boxplots in a single figure.
603 .. plot::
604 :context: close-figs
606 >>> grouped.boxplot(subplots=False, rot=45, fontsize=12) # doctest: +SKIP
607 """
608 plot_backend = _get_plot_backend(backend)
609 return plot_backend.boxplot_frame_groupby(
610 grouped,
611 subplots=subplots,
612 column=column,
613 fontsize=fontsize,
614 rot=rot,
615 grid=grid,
616 ax=ax,
617 figsize=figsize,
618 layout=layout,
619 sharex=sharex,
620 sharey=sharey,
621 **kwargs,
622 )
625class PlotAccessor(PandasObject):
626 """
627 Make plots of Series or DataFrame.
629 Uses the backend specified by the
630 option ``plotting.backend``. By default, matplotlib is used.
632 Parameters
633 ----------
634 data : Series or DataFrame
635 The object for which the method is called.
636 x : label or position, default None
637 Only used if data is a DataFrame.
638 y : label, position or list of label, positions, default None
639 Allows plotting of one column versus another. Only used if data is a
640 DataFrame.
641 kind : str
642 The kind of plot to produce:
644 - 'line' : line plot (default)
645 - 'bar' : vertical bar plot
646 - 'barh' : horizontal bar plot
647 - 'hist' : histogram
648 - 'box' : boxplot
649 - 'kde' : Kernel Density Estimation plot
650 - 'density' : same as 'kde'
651 - 'area' : area plot
652 - 'pie' : pie plot
653 - 'scatter' : scatter plot (DataFrame only)
654 - 'hexbin' : hexbin plot (DataFrame only)
655 ax : matplotlib axes object, default None
656 An axes of the current figure.
657 subplots : bool or sequence of iterables, default False
658 Whether to group columns into subplots:
660 - ``False`` : No subplots will be used
661 - ``True`` : Make separate subplots for each column.
662 - sequence of iterables of column labels: Create a subplot for each
663 group of columns. For example `[('a', 'c'), ('b', 'd')]` will
664 create 2 subplots: one with columns 'a' and 'c', and one
665 with columns 'b' and 'd'. Remaining columns that aren't specified
666 will be plotted in additional subplots (one per column).
667 .. versionadded:: 1.5.0
669 sharex : bool, default True if ax is None else False
670 In case ``subplots=True``, share x axis and set some x axis labels
671 to invisible; defaults to True if ax is None otherwise False if
672 an ax is passed in; Be aware, that passing in both an ax and
673 ``sharex=True`` will alter all x axis labels for all axis in a figure.
674 sharey : bool, default False
675 In case ``subplots=True``, share y axis and set some y axis labels to invisible.
676 layout : tuple, optional
677 (rows, columns) for the layout of subplots.
678 figsize : a tuple (width, height) in inches
679 Size of a figure object.
680 use_index : bool, default True
681 Use index as ticks for x axis.
682 title : str or list
683 Title to use for the plot. If a string is passed, print the string
684 at the top of the figure. If a list is passed and `subplots` is
685 True, print each item in the list above the corresponding subplot.
686 grid : bool, default None (matlab style default)
687 Axis grid lines.
688 legend : bool or {'reverse'}
689 Place legend on axis subplots.
690 style : list or dict
691 The matplotlib line style per column.
692 logx : bool or 'sym', default False
693 Use log scaling or symlog scaling on x axis.
694 .. versionchanged:: 0.25.0
696 logy : bool or 'sym' default False
697 Use log scaling or symlog scaling on y axis.
698 .. versionchanged:: 0.25.0
700 loglog : bool or 'sym', default False
701 Use log scaling or symlog scaling on both x and y axes.
702 .. versionchanged:: 0.25.0
704 xticks : sequence
705 Values to use for the xticks.
706 yticks : sequence
707 Values to use for the yticks.
708 xlim : 2-tuple/list
709 Set the x limits of the current axes.
710 ylim : 2-tuple/list
711 Set the y limits of the current axes.
712 xlabel : label, optional
713 Name to use for the xlabel on x-axis. Default uses index name as xlabel, or the
714 x-column name for planar plots.
716 .. versionadded:: 1.1.0
718 .. versionchanged:: 1.2.0
720 Now applicable to planar plots (`scatter`, `hexbin`).
722 ylabel : label, optional
723 Name to use for the ylabel on y-axis. Default will show no ylabel, or the
724 y-column name for planar plots.
726 .. versionadded:: 1.1.0
728 .. versionchanged:: 1.2.0
730 Now applicable to planar plots (`scatter`, `hexbin`).
732 rot : int, default None
733 Rotation for ticks (xticks for vertical, yticks for horizontal
734 plots).
735 fontsize : int, default None
736 Font size for xticks and yticks.
737 colormap : str or matplotlib colormap object, default None
738 Colormap to select colors from. If string, load colormap with that
739 name from matplotlib.
740 colorbar : bool, optional
741 If True, plot colorbar (only relevant for 'scatter' and 'hexbin'
742 plots).
743 position : float
744 Specify relative alignments for bar plot layout.
745 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5
746 (center).
747 table : bool, Series or DataFrame, default False
748 If True, draw a table using the data in the DataFrame and the data
749 will be transposed to meet matplotlib's default layout.
750 If a Series or DataFrame is passed, use passed data to draw a
751 table.
752 yerr : DataFrame, Series, array-like, dict and str
753 See :ref:`Plotting with Error Bars <visualization.errorbars>` for
754 detail.
755 xerr : DataFrame, Series, array-like, dict and str
756 Equivalent to yerr.
757 stacked : bool, default False in line and bar plots, and True in area plot
758 If True, create stacked plot.
759 sort_columns : bool, default False
760 Sort column names to determine plot ordering.
762 .. deprecated:: 1.5.0
763 The `sort_columns` arguments is deprecated and will be removed in a
764 future version.
766 secondary_y : bool or sequence, default False
767 Whether to plot on the secondary y-axis if a list/tuple, which
768 columns to plot on secondary y-axis.
769 mark_right : bool, default True
770 When using a secondary_y axis, automatically mark the column
771 labels with "(right)" in the legend.
772 include_bool : bool, default is False
773 If True, boolean values can be plotted.
774 backend : str, default None
775 Backend to use instead of the backend specified in the option
776 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
777 specify the ``plotting.backend`` for the whole session, set
778 ``pd.options.plotting.backend``.
780 .. versionadded:: 1.0.0
782 **kwargs
783 Options to pass to matplotlib plotting method.
785 Returns
786 -------
787 :class:`matplotlib.axes.Axes` or numpy.ndarray of them
788 If the backend is not the default matplotlib one, the return value
789 will be the object returned by the backend.
791 Notes
792 -----
793 - See matplotlib documentation online for more on this subject
794 - If `kind` = 'bar' or 'barh', you can specify relative alignments
795 for bar plot layout by `position` keyword.
796 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5
797 (center)
798 """
800 _common_kinds = ("line", "bar", "barh", "kde", "density", "area", "hist", "box")
801 _series_kinds = ("pie",)
802 _dataframe_kinds = ("scatter", "hexbin")
803 _kind_aliases = {"density": "kde"}
804 _all_kinds = _common_kinds + _series_kinds + _dataframe_kinds
806 def __init__(self, data) -> None:
807 self._parent = data
809 @staticmethod
810 def _get_call_args(backend_name, data, args, kwargs):
811 """
812 This function makes calls to this accessor `__call__` method compatible
813 with the previous `SeriesPlotMethods.__call__` and
814 `DataFramePlotMethods.__call__`. Those had slightly different
815 signatures, since `DataFramePlotMethods` accepted `x` and `y`
816 parameters.
817 """
818 if isinstance(data, ABCSeries):
819 arg_def = [
820 ("kind", "line"),
821 ("ax", None),
822 ("figsize", None),
823 ("use_index", True),
824 ("title", None),
825 ("grid", None),
826 ("legend", False),
827 ("style", None),
828 ("logx", False),
829 ("logy", False),
830 ("loglog", False),
831 ("xticks", None),
832 ("yticks", None),
833 ("xlim", None),
834 ("ylim", None),
835 ("rot", None),
836 ("fontsize", None),
837 ("colormap", None),
838 ("table", False),
839 ("yerr", None),
840 ("xerr", None),
841 ("label", None),
842 ("secondary_y", False),
843 ("xlabel", None),
844 ("ylabel", None),
845 ]
846 elif isinstance(data, ABCDataFrame):
847 arg_def = [
848 ("x", None),
849 ("y", None),
850 ("kind", "line"),
851 ("ax", None),
852 ("subplots", False),
853 ("sharex", None),
854 ("sharey", False),
855 ("layout", None),
856 ("figsize", None),
857 ("use_index", True),
858 ("title", None),
859 ("grid", None),
860 ("legend", True),
861 ("style", None),
862 ("logx", False),
863 ("logy", False),
864 ("loglog", False),
865 ("xticks", None),
866 ("yticks", None),
867 ("xlim", None),
868 ("ylim", None),
869 ("rot", None),
870 ("fontsize", None),
871 ("colormap", None),
872 ("table", False),
873 ("yerr", None),
874 ("xerr", None),
875 ("secondary_y", False),
876 ("sort_columns", False),
877 ("xlabel", None),
878 ("ylabel", None),
879 ]
880 else:
881 raise TypeError(
882 f"Called plot accessor for type {type(data).__name__}, "
883 "expected Series or DataFrame"
884 )
886 if "sort_columns" in itertools.chain(args, kwargs.keys()):
887 warnings.warn(
888 "`sort_columns` is deprecated and will be removed in a future "
889 "version.",
890 FutureWarning,
891 stacklevel=find_stack_level(),
892 )
894 if args and isinstance(data, ABCSeries):
895 positional_args = str(args)[1:-1]
896 keyword_args = ", ".join(
897 [f"{name}={repr(value)}" for (name, _), value in zip(arg_def, args)]
898 )
899 msg = (
900 "`Series.plot()` should not be called with positional "
901 "arguments, only keyword arguments. The order of "
902 "positional arguments will change in the future. "
903 f"Use `Series.plot({keyword_args})` instead of "
904 f"`Series.plot({positional_args})`."
905 )
906 raise TypeError(msg)
908 pos_args = {name: value for (name, _), value in zip(arg_def, args)}
909 if backend_name == "pandas.plotting._matplotlib":
910 kwargs = dict(arg_def, **pos_args, **kwargs)
911 else:
912 kwargs = dict(pos_args, **kwargs)
914 x = kwargs.pop("x", None)
915 y = kwargs.pop("y", None)
916 kind = kwargs.pop("kind", "line")
917 return x, y, kind, kwargs
919 def __call__(self, *args, **kwargs):
920 plot_backend = _get_plot_backend(kwargs.pop("backend", None))
922 x, y, kind, kwargs = self._get_call_args(
923 plot_backend.__name__, self._parent, args, kwargs
924 )
926 kind = self._kind_aliases.get(kind, kind)
928 # when using another backend, get out of the way
929 if plot_backend.__name__ != "pandas.plotting._matplotlib":
930 return plot_backend.plot(self._parent, x=x, y=y, kind=kind, **kwargs)
932 if kind not in self._all_kinds:
933 raise ValueError(f"{kind} is not a valid plot kind")
935 # The original data structured can be transformed before passed to the
936 # backend. For example, for DataFrame is common to set the index as the
937 # `x` parameter, and return a Series with the parameter `y` as values.
938 data = self._parent.copy()
940 if isinstance(data, ABCSeries):
941 kwargs["reuse_plot"] = True
943 if kind in self._dataframe_kinds:
944 if isinstance(data, ABCDataFrame):
945 return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs)
946 else:
947 raise ValueError(f"plot kind {kind} can only be used for data frames")
948 elif kind in self._series_kinds:
949 if isinstance(data, ABCDataFrame):
950 if y is None and kwargs.get("subplots") is False:
951 raise ValueError(
952 f"{kind} requires either y column or 'subplots=True'"
953 )
954 elif y is not None:
955 if is_integer(y) and not data.columns.holds_integer():
956 y = data.columns[y]
957 # converted to series actually. copy to not modify
958 data = data[y].copy()
959 data.index.name = y
960 elif isinstance(data, ABCDataFrame):
961 data_cols = data.columns
962 if x is not None:
963 if is_integer(x) and not data.columns.holds_integer():
964 x = data_cols[x]
965 elif not isinstance(data[x], ABCSeries):
966 raise ValueError("x must be a label or position")
967 data = data.set_index(x)
968 if y is not None:
969 # check if we have y as int or list of ints
970 int_ylist = is_list_like(y) and all(is_integer(c) for c in y)
971 int_y_arg = is_integer(y) or int_ylist
972 if int_y_arg and not data.columns.holds_integer():
973 y = data_cols[y]
975 label_kw = kwargs["label"] if "label" in kwargs else False
976 for kw in ["xerr", "yerr"]:
977 if kw in kwargs and (
978 isinstance(kwargs[kw], str) or is_integer(kwargs[kw])
979 ):
980 try:
981 kwargs[kw] = data[kwargs[kw]]
982 except (IndexError, KeyError, TypeError):
983 pass
985 # don't overwrite
986 data = data[y].copy()
988 if isinstance(data, ABCSeries):
989 label_name = label_kw or y
990 data.name = label_name
991 else:
992 match = is_list_like(label_kw) and len(label_kw) == len(y)
993 if label_kw and not match:
994 raise ValueError(
995 "label should be list-like and same length as y"
996 )
997 label_name = label_kw or data.columns
998 data.columns = label_name
1000 return plot_backend.plot(data, kind=kind, **kwargs)
1002 __call__.__doc__ = __doc__
1004 @Appender(
1005 """
1006 See Also
1007 --------
1008 matplotlib.pyplot.plot : Plot y versus x as lines and/or markers.
1010 Examples
1011 --------
1013 .. plot::
1014 :context: close-figs
1016 >>> s = pd.Series([1, 3, 2])
1017 >>> s.plot.line()
1018 <AxesSubplot: ylabel='Density'>
1020 .. plot::
1021 :context: close-figs
1023 The following example shows the populations for some animals
1024 over the years.
1026 >>> df = pd.DataFrame({
1027 ... 'pig': [20, 18, 489, 675, 1776],
1028 ... 'horse': [4, 25, 281, 600, 1900]
1029 ... }, index=[1990, 1997, 2003, 2009, 2014])
1030 >>> lines = df.plot.line()
1032 .. plot::
1033 :context: close-figs
1035 An example with subplots, so an array of axes is returned.
1037 >>> axes = df.plot.line(subplots=True)
1038 >>> type(axes)
1039 <class 'numpy.ndarray'>
1041 .. plot::
1042 :context: close-figs
1044 Let's repeat the same example, but specifying colors for
1045 each column (in this case, for each animal).
1047 >>> axes = df.plot.line(
1048 ... subplots=True, color={"pig": "pink", "horse": "#742802"}
1049 ... )
1051 .. plot::
1052 :context: close-figs
1054 The following example shows the relationship between both
1055 populations.
1057 >>> lines = df.plot.line(x='pig', y='horse')
1058 """
1059 )
1060 @Substitution(kind="line")
1061 @Appender(_bar_or_line_doc)
1062 def line(self, x=None, y=None, **kwargs) -> PlotAccessor:
1063 """
1064 Plot Series or DataFrame as lines.
1066 This function is useful to plot lines using DataFrame's values
1067 as coordinates.
1068 """
1069 return self(kind="line", x=x, y=y, **kwargs)
1071 @Appender(
1072 """
1073 See Also
1074 --------
1075 DataFrame.plot.barh : Horizontal bar plot.
1076 DataFrame.plot : Make plots of a DataFrame.
1077 matplotlib.pyplot.bar : Make a bar plot with matplotlib.
1079 Examples
1080 --------
1081 Basic plot.
1083 .. plot::
1084 :context: close-figs
1086 >>> df = pd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]})
1087 >>> ax = df.plot.bar(x='lab', y='val', rot=0)
1089 Plot a whole dataframe to a bar plot. Each column is assigned a
1090 distinct color, and each row is nested in a group along the
1091 horizontal axis.
1093 .. plot::
1094 :context: close-figs
1096 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
1097 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
1098 >>> index = ['snail', 'pig', 'elephant',
1099 ... 'rabbit', 'giraffe', 'coyote', 'horse']
1100 >>> df = pd.DataFrame({'speed': speed,
1101 ... 'lifespan': lifespan}, index=index)
1102 >>> ax = df.plot.bar(rot=0)
1104 Plot stacked bar charts for the DataFrame
1106 .. plot::
1107 :context: close-figs
1109 >>> ax = df.plot.bar(stacked=True)
1111 Instead of nesting, the figure can be split by column with
1112 ``subplots=True``. In this case, a :class:`numpy.ndarray` of
1113 :class:`matplotlib.axes.Axes` are returned.
1115 .. plot::
1116 :context: close-figs
1118 >>> axes = df.plot.bar(rot=0, subplots=True)
1119 >>> axes[1].legend(loc=2) # doctest: +SKIP
1121 If you don't like the default colours, you can specify how you'd
1122 like each column to be colored.
1124 .. plot::
1125 :context: close-figs
1127 >>> axes = df.plot.bar(
1128 ... rot=0, subplots=True, color={"speed": "red", "lifespan": "green"}
1129 ... )
1130 >>> axes[1].legend(loc=2) # doctest: +SKIP
1132 Plot a single column.
1134 .. plot::
1135 :context: close-figs
1137 >>> ax = df.plot.bar(y='speed', rot=0)
1139 Plot only selected categories for the DataFrame.
1141 .. plot::
1142 :context: close-figs
1144 >>> ax = df.plot.bar(x='lifespan', rot=0)
1145 """
1146 )
1147 @Substitution(kind="bar")
1148 @Appender(_bar_or_line_doc)
1149 def bar(self, x=None, y=None, **kwargs) -> PlotAccessor:
1150 """
1151 Vertical bar plot.
1153 A bar plot is a plot that presents categorical data with
1154 rectangular bars with lengths proportional to the values that they
1155 represent. A bar plot shows comparisons among discrete categories. One
1156 axis of the plot shows the specific categories being compared, and the
1157 other axis represents a measured value.
1158 """
1159 return self(kind="bar", x=x, y=y, **kwargs)
1161 @Appender(
1162 """
1163 See Also
1164 --------
1165 DataFrame.plot.bar: Vertical bar plot.
1166 DataFrame.plot : Make plots of DataFrame using matplotlib.
1167 matplotlib.axes.Axes.bar : Plot a vertical bar plot using matplotlib.
1169 Examples
1170 --------
1171 Basic example
1173 .. plot::
1174 :context: close-figs
1176 >>> df = pd.DataFrame({'lab': ['A', 'B', 'C'], 'val': [10, 30, 20]})
1177 >>> ax = df.plot.barh(x='lab', y='val')
1179 Plot a whole DataFrame to a horizontal bar plot
1181 .. plot::
1182 :context: close-figs
1184 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
1185 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
1186 >>> index = ['snail', 'pig', 'elephant',
1187 ... 'rabbit', 'giraffe', 'coyote', 'horse']
1188 >>> df = pd.DataFrame({'speed': speed,
1189 ... 'lifespan': lifespan}, index=index)
1190 >>> ax = df.plot.barh()
1192 Plot stacked barh charts for the DataFrame
1194 .. plot::
1195 :context: close-figs
1197 >>> ax = df.plot.barh(stacked=True)
1199 We can specify colors for each column
1201 .. plot::
1202 :context: close-figs
1204 >>> ax = df.plot.barh(color={"speed": "red", "lifespan": "green"})
1206 Plot a column of the DataFrame to a horizontal bar plot
1208 .. plot::
1209 :context: close-figs
1211 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
1212 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
1213 >>> index = ['snail', 'pig', 'elephant',
1214 ... 'rabbit', 'giraffe', 'coyote', 'horse']
1215 >>> df = pd.DataFrame({'speed': speed,
1216 ... 'lifespan': lifespan}, index=index)
1217 >>> ax = df.plot.barh(y='speed')
1219 Plot DataFrame versus the desired column
1221 .. plot::
1222 :context: close-figs
1224 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
1225 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
1226 >>> index = ['snail', 'pig', 'elephant',
1227 ... 'rabbit', 'giraffe', 'coyote', 'horse']
1228 >>> df = pd.DataFrame({'speed': speed,
1229 ... 'lifespan': lifespan}, index=index)
1230 >>> ax = df.plot.barh(x='lifespan')
1231 """
1232 )
1233 @Substitution(kind="bar")
1234 @Appender(_bar_or_line_doc)
1235 def barh(self, x=None, y=None, **kwargs) -> PlotAccessor:
1236 """
1237 Make a horizontal bar plot.
1239 A horizontal bar plot is a plot that presents quantitative data with
1240 rectangular bars with lengths proportional to the values that they
1241 represent. A bar plot shows comparisons among discrete categories. One
1242 axis of the plot shows the specific categories being compared, and the
1243 other axis represents a measured value.
1244 """
1245 return self(kind="barh", x=x, y=y, **kwargs)
1247 def box(self, by=None, **kwargs) -> PlotAccessor:
1248 r"""
1249 Make a box plot of the DataFrame columns.
1251 A box plot is a method for graphically depicting groups of numerical
1252 data through their quartiles.
1253 The box extends from the Q1 to Q3 quartile values of the data,
1254 with a line at the median (Q2). The whiskers extend from the edges
1255 of box to show the range of the data. The position of the whiskers
1256 is set by default to 1.5*IQR (IQR = Q3 - Q1) from the edges of the
1257 box. Outlier points are those past the end of the whiskers.
1259 For further details see Wikipedia's
1260 entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`__.
1262 A consideration when using this chart is that the box and the whiskers
1263 can overlap, which is very common when plotting small sets of data.
1265 Parameters
1266 ----------
1267 by : str or sequence
1268 Column in the DataFrame to group by.
1270 .. versionchanged:: 1.4.0
1272 Previously, `by` is silently ignore and makes no groupings
1274 **kwargs
1275 Additional keywords are documented in
1276 :meth:`DataFrame.plot`.
1278 Returns
1279 -------
1280 :class:`matplotlib.axes.Axes` or numpy.ndarray of them
1282 See Also
1283 --------
1284 DataFrame.boxplot: Another method to draw a box plot.
1285 Series.plot.box: Draw a box plot from a Series object.
1286 matplotlib.pyplot.boxplot: Draw a box plot in matplotlib.
1288 Examples
1289 --------
1290 Draw a box plot from a DataFrame with four columns of randomly
1291 generated data.
1293 .. plot::
1294 :context: close-figs
1296 >>> data = np.random.randn(25, 4)
1297 >>> df = pd.DataFrame(data, columns=list('ABCD'))
1298 >>> ax = df.plot.box()
1300 You can also generate groupings if you specify the `by` parameter (which
1301 can take a column name, or a list or tuple of column names):
1303 .. versionchanged:: 1.4.0
1305 .. plot::
1306 :context: close-figs
1308 >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85]
1309 >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list})
1310 >>> ax = df.plot.box(column="age", by="gender", figsize=(10, 8))
1311 """
1312 return self(kind="box", by=by, **kwargs)
1314 def hist(self, by=None, bins: int = 10, **kwargs) -> PlotAccessor:
1315 """
1316 Draw one histogram of the DataFrame's columns.
1318 A histogram is a representation of the distribution of data.
1319 This function groups the values of all given Series in the DataFrame
1320 into bins and draws all bins in one :class:`matplotlib.axes.Axes`.
1321 This is useful when the DataFrame's Series are in a similar scale.
1323 Parameters
1324 ----------
1325 by : str or sequence, optional
1326 Column in the DataFrame to group by.
1328 .. versionchanged:: 1.4.0
1330 Previously, `by` is silently ignore and makes no groupings
1332 bins : int, default 10
1333 Number of histogram bins to be used.
1334 **kwargs
1335 Additional keyword arguments are documented in
1336 :meth:`DataFrame.plot`.
1338 Returns
1339 -------
1340 class:`matplotlib.AxesSubplot`
1341 Return a histogram plot.
1343 See Also
1344 --------
1345 DataFrame.hist : Draw histograms per DataFrame's Series.
1346 Series.hist : Draw a histogram with Series' data.
1348 Examples
1349 --------
1350 When we roll a die 6000 times, we expect to get each value around 1000
1351 times. But when we roll two dice and sum the result, the distribution
1352 is going to be quite different. A histogram illustrates those
1353 distributions.
1355 .. plot::
1356 :context: close-figs
1358 >>> df = pd.DataFrame(
1359 ... np.random.randint(1, 7, 6000),
1360 ... columns = ['one'])
1361 >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000)
1362 >>> ax = df.plot.hist(bins=12, alpha=0.5)
1364 A grouped histogram can be generated by providing the parameter `by` (which
1365 can be a column name, or a list of column names):
1367 .. plot::
1368 :context: close-figs
1370 >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85]
1371 >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list})
1372 >>> ax = df.plot.hist(column=["age"], by="gender", figsize=(10, 8))
1373 """
1374 return self(kind="hist", by=by, bins=bins, **kwargs)
1376 def kde(self, bw_method=None, ind=None, **kwargs) -> PlotAccessor:
1377 """
1378 Generate Kernel Density Estimate plot using Gaussian kernels.
1380 In statistics, `kernel density estimation`_ (KDE) is a non-parametric
1381 way to estimate the probability density function (PDF) of a random
1382 variable. This function uses Gaussian kernels and includes automatic
1383 bandwidth determination.
1385 .. _kernel density estimation:
1386 https://en.wikipedia.org/wiki/Kernel_density_estimation
1388 Parameters
1389 ----------
1390 bw_method : str, scalar or callable, optional
1391 The method used to calculate the estimator bandwidth. This can be
1392 'scott', 'silverman', a scalar constant or a callable.
1393 If None (default), 'scott' is used.
1394 See :class:`scipy.stats.gaussian_kde` for more information.
1395 ind : NumPy array or int, optional
1396 Evaluation points for the estimated PDF. If None (default),
1397 1000 equally spaced points are used. If `ind` is a NumPy array, the
1398 KDE is evaluated at the points passed. If `ind` is an integer,
1399 `ind` number of equally spaced points are used.
1400 **kwargs
1401 Additional keyword arguments are documented in
1402 :meth:`DataFrame.plot`.
1404 Returns
1405 -------
1406 matplotlib.axes.Axes or numpy.ndarray of them
1408 See Also
1409 --------
1410 scipy.stats.gaussian_kde : Representation of a kernel-density
1411 estimate using Gaussian kernels. This is the function used
1412 internally to estimate the PDF.
1414 Examples
1415 --------
1416 Given a Series of points randomly sampled from an unknown
1417 distribution, estimate its PDF using KDE with automatic
1418 bandwidth determination and plot the results, evaluating them at
1419 1000 equally spaced points (default):
1421 .. plot::
1422 :context: close-figs
1424 >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5])
1425 >>> ax = s.plot.kde()
1427 A scalar bandwidth can be specified. Using a small bandwidth value can
1428 lead to over-fitting, while using a large bandwidth value may result
1429 in under-fitting:
1431 .. plot::
1432 :context: close-figs
1434 >>> ax = s.plot.kde(bw_method=0.3)
1436 .. plot::
1437 :context: close-figs
1439 >>> ax = s.plot.kde(bw_method=3)
1441 Finally, the `ind` parameter determines the evaluation points for the
1442 plot of the estimated PDF:
1444 .. plot::
1445 :context: close-figs
1447 >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5])
1449 For DataFrame, it works in the same way:
1451 .. plot::
1452 :context: close-figs
1454 >>> df = pd.DataFrame({
1455 ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5],
1456 ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6],
1457 ... })
1458 >>> ax = df.plot.kde()
1460 A scalar bandwidth can be specified. Using a small bandwidth value can
1461 lead to over-fitting, while using a large bandwidth value may result
1462 in under-fitting:
1464 .. plot::
1465 :context: close-figs
1467 >>> ax = df.plot.kde(bw_method=0.3)
1469 .. plot::
1470 :context: close-figs
1472 >>> ax = df.plot.kde(bw_method=3)
1474 Finally, the `ind` parameter determines the evaluation points for the
1475 plot of the estimated PDF:
1477 .. plot::
1478 :context: close-figs
1480 >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6])
1481 """
1482 return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs)
1484 density = kde
1486 def area(self, x=None, y=None, **kwargs) -> PlotAccessor:
1487 """
1488 Draw a stacked area plot.
1490 An area plot displays quantitative data visually.
1491 This function wraps the matplotlib area function.
1493 Parameters
1494 ----------
1495 x : label or position, optional
1496 Coordinates for the X axis. By default uses the index.
1497 y : label or position, optional
1498 Column to plot. By default uses all columns.
1499 stacked : bool, default True
1500 Area plots are stacked by default. Set to False to create a
1501 unstacked plot.
1502 **kwargs
1503 Additional keyword arguments are documented in
1504 :meth:`DataFrame.plot`.
1506 Returns
1507 -------
1508 matplotlib.axes.Axes or numpy.ndarray
1509 Area plot, or array of area plots if subplots is True.
1511 See Also
1512 --------
1513 DataFrame.plot : Make plots of DataFrame using matplotlib / pylab.
1515 Examples
1516 --------
1517 Draw an area plot based on basic business metrics:
1519 .. plot::
1520 :context: close-figs
1522 >>> df = pd.DataFrame({
1523 ... 'sales': [3, 2, 3, 9, 10, 6],
1524 ... 'signups': [5, 5, 6, 12, 14, 13],
1525 ... 'visits': [20, 42, 28, 62, 81, 50],
1526 ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01',
1527 ... freq='M'))
1528 >>> ax = df.plot.area()
1530 Area plots are stacked by default. To produce an unstacked plot,
1531 pass ``stacked=False``:
1533 .. plot::
1534 :context: close-figs
1536 >>> ax = df.plot.area(stacked=False)
1538 Draw an area plot for a single column:
1540 .. plot::
1541 :context: close-figs
1543 >>> ax = df.plot.area(y='sales')
1545 Draw with a different `x`:
1547 .. plot::
1548 :context: close-figs
1550 >>> df = pd.DataFrame({
1551 ... 'sales': [3, 2, 3],
1552 ... 'visits': [20, 42, 28],
1553 ... 'day': [1, 2, 3],
1554 ... })
1555 >>> ax = df.plot.area(x='day')
1556 """
1557 return self(kind="area", x=x, y=y, **kwargs)
1559 def pie(self, **kwargs) -> PlotAccessor:
1560 """
1561 Generate a pie plot.
1563 A pie plot is a proportional representation of the numerical data in a
1564 column. This function wraps :meth:`matplotlib.pyplot.pie` for the
1565 specified column. If no column reference is passed and
1566 ``subplots=True`` a pie plot is drawn for each numerical column
1567 independently.
1569 Parameters
1570 ----------
1571 y : int or label, optional
1572 Label or position of the column to plot.
1573 If not provided, ``subplots=True`` argument must be passed.
1574 **kwargs
1575 Keyword arguments to pass on to :meth:`DataFrame.plot`.
1577 Returns
1578 -------
1579 matplotlib.axes.Axes or np.ndarray of them
1580 A NumPy array is returned when `subplots` is True.
1582 See Also
1583 --------
1584 Series.plot.pie : Generate a pie plot for a Series.
1585 DataFrame.plot : Make plots of a DataFrame.
1587 Examples
1588 --------
1589 In the example below we have a DataFrame with the information about
1590 planet's mass and radius. We pass the 'mass' column to the
1591 pie function to get a pie plot.
1593 .. plot::
1594 :context: close-figs
1596 >>> df = pd.DataFrame({'mass': [0.330, 4.87 , 5.97],
1597 ... 'radius': [2439.7, 6051.8, 6378.1]},
1598 ... index=['Mercury', 'Venus', 'Earth'])
1599 >>> plot = df.plot.pie(y='mass', figsize=(5, 5))
1601 .. plot::
1602 :context: close-figs
1604 >>> plot = df.plot.pie(subplots=True, figsize=(11, 6))
1605 """
1606 if (
1607 isinstance(self._parent, ABCDataFrame)
1608 and kwargs.get("y", None) is None
1609 and not kwargs.get("subplots", False)
1610 ):
1611 raise ValueError("pie requires either y column or 'subplots=True'")
1612 return self(kind="pie", **kwargs)
1614 def scatter(self, x, y, s=None, c=None, **kwargs) -> PlotAccessor:
1615 """
1616 Create a scatter plot with varying marker point size and color.
1618 The coordinates of each point are defined by two dataframe columns and
1619 filled circles are used to represent each point. This kind of plot is
1620 useful to see complex correlations between two variables. Points could
1621 be for instance natural 2D coordinates like longitude and latitude in
1622 a map or, in general, any pair of metrics that can be plotted against
1623 each other.
1625 Parameters
1626 ----------
1627 x : int or str
1628 The column name or column position to be used as horizontal
1629 coordinates for each point.
1630 y : int or str
1631 The column name or column position to be used as vertical
1632 coordinates for each point.
1633 s : str, scalar or array-like, optional
1634 The size of each point. Possible values are:
1636 - A string with the name of the column to be used for marker's size.
1638 - A single scalar so all points have the same size.
1640 - A sequence of scalars, which will be used for each point's size
1641 recursively. For instance, when passing [2,14] all points size
1642 will be either 2 or 14, alternatively.
1644 .. versionchanged:: 1.1.0
1646 c : str, int or array-like, optional
1647 The color of each point. Possible values are:
1649 - A single color string referred to by name, RGB or RGBA code,
1650 for instance 'red' or '#a98d19'.
1652 - A sequence of color strings referred to by name, RGB or RGBA
1653 code, which will be used for each point's color recursively. For
1654 instance ['green','yellow'] all points will be filled in green or
1655 yellow, alternatively.
1657 - A column name or position whose values will be used to color the
1658 marker points according to a colormap.
1660 **kwargs
1661 Keyword arguments to pass on to :meth:`DataFrame.plot`.
1663 Returns
1664 -------
1665 :class:`matplotlib.axes.Axes` or numpy.ndarray of them
1667 See Also
1668 --------
1669 matplotlib.pyplot.scatter : Scatter plot using multiple input data
1670 formats.
1672 Examples
1673 --------
1674 Let's see how to draw a scatter plot using coordinates from the values
1675 in a DataFrame's columns.
1677 .. plot::
1678 :context: close-figs
1680 >>> df = pd.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1],
1681 ... [6.4, 3.2, 1], [5.9, 3.0, 2]],
1682 ... columns=['length', 'width', 'species'])
1683 >>> ax1 = df.plot.scatter(x='length',
1684 ... y='width',
1685 ... c='DarkBlue')
1687 And now with the color determined by a column as well.
1689 .. plot::
1690 :context: close-figs
1692 >>> ax2 = df.plot.scatter(x='length',
1693 ... y='width',
1694 ... c='species',
1695 ... colormap='viridis')
1696 """
1697 size = kwargs.pop("size", None)
1698 if s is not None and size is not None:
1699 raise TypeError("Specify exactly one of `s` and `size`")
1700 elif s is not None or size is not None:
1701 kwargs["s"] = s if s is not None else size
1703 color = kwargs.pop("color", None)
1704 if c is not None and color is not None:
1705 raise TypeError("Specify exactly one of `c` and `color`")
1706 elif c is not None or color is not None:
1707 kwargs["c"] = c if c is not None else color
1709 return self(kind="scatter", x=x, y=y, **kwargs)
1711 def hexbin(
1712 self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs
1713 ) -> PlotAccessor:
1714 """
1715 Generate a hexagonal binning plot.
1717 Generate a hexagonal binning plot of `x` versus `y`. If `C` is `None`
1718 (the default), this is a histogram of the number of occurrences
1719 of the observations at ``(x[i], y[i])``.
1721 If `C` is specified, specifies values at given coordinates
1722 ``(x[i], y[i])``. These values are accumulated for each hexagonal
1723 bin and then reduced according to `reduce_C_function`,
1724 having as default the NumPy's mean function (:meth:`numpy.mean`).
1725 (If `C` is specified, it must also be a 1-D sequence
1726 of the same length as `x` and `y`, or a column label.)
1728 Parameters
1729 ----------
1730 x : int or str
1731 The column label or position for x points.
1732 y : int or str
1733 The column label or position for y points.
1734 C : int or str, optional
1735 The column label or position for the value of `(x, y)` point.
1736 reduce_C_function : callable, default `np.mean`
1737 Function of one argument that reduces all the values in a bin to
1738 a single number (e.g. `np.mean`, `np.max`, `np.sum`, `np.std`).
1739 gridsize : int or tuple of (int, int), default 100
1740 The number of hexagons in the x-direction.
1741 The corresponding number of hexagons in the y-direction is
1742 chosen in a way that the hexagons are approximately regular.
1743 Alternatively, gridsize can be a tuple with two elements
1744 specifying the number of hexagons in the x-direction and the
1745 y-direction.
1746 **kwargs
1747 Additional keyword arguments are documented in
1748 :meth:`DataFrame.plot`.
1750 Returns
1751 -------
1752 matplotlib.AxesSubplot
1753 The matplotlib ``Axes`` on which the hexbin is plotted.
1755 See Also
1756 --------
1757 DataFrame.plot : Make plots of a DataFrame.
1758 matplotlib.pyplot.hexbin : Hexagonal binning plot using matplotlib,
1759 the matplotlib function that is used under the hood.
1761 Examples
1762 --------
1763 The following examples are generated with random data from
1764 a normal distribution.
1766 .. plot::
1767 :context: close-figs
1769 >>> n = 10000
1770 >>> df = pd.DataFrame({'x': np.random.randn(n),
1771 ... 'y': np.random.randn(n)})
1772 >>> ax = df.plot.hexbin(x='x', y='y', gridsize=20)
1774 The next example uses `C` and `np.sum` as `reduce_C_function`.
1775 Note that `'observations'` values ranges from 1 to 5 but the result
1776 plot shows values up to more than 25. This is because of the
1777 `reduce_C_function`.
1779 .. plot::
1780 :context: close-figs
1782 >>> n = 500
1783 >>> df = pd.DataFrame({
1784 ... 'coord_x': np.random.uniform(-3, 3, size=n),
1785 ... 'coord_y': np.random.uniform(30, 50, size=n),
1786 ... 'observations': np.random.randint(1,5, size=n)
1787 ... })
1788 >>> ax = df.plot.hexbin(x='coord_x',
1789 ... y='coord_y',
1790 ... C='observations',
1791 ... reduce_C_function=np.sum,
1792 ... gridsize=10,
1793 ... cmap="viridis")
1794 """
1795 if reduce_C_function is not None:
1796 kwargs["reduce_C_function"] = reduce_C_function
1797 if gridsize is not None:
1798 kwargs["gridsize"] = gridsize
1800 return self(kind="hexbin", x=x, y=y, C=C, **kwargs)
1803_backends: dict[str, types.ModuleType] = {}
1806def _load_backend(backend: str) -> types.ModuleType:
1807 """
1808 Load a pandas plotting backend.
1810 Parameters
1811 ----------
1812 backend : str
1813 The identifier for the backend. Either an entrypoint item registered
1814 with importlib.metadata, "matplotlib", or a module name.
1816 Returns
1817 -------
1818 types.ModuleType
1819 The imported backend.
1820 """
1821 from importlib.metadata import entry_points
1823 if backend == "matplotlib":
1824 # Because matplotlib is an optional dependency and first-party backend,
1825 # we need to attempt an import here to raise an ImportError if needed.
1826 try:
1827 module = importlib.import_module("pandas.plotting._matplotlib")
1828 except ImportError:
1829 raise ImportError(
1830 "matplotlib is required for plotting when the "
1831 'default backend "matplotlib" is selected.'
1832 ) from None
1833 return module
1835 found_backend = False
1837 eps = entry_points()
1838 key = "pandas_plotting_backends"
1839 # entry_points lost dict API ~ PY 3.10
1840 # https://github.com/python/importlib_metadata/issues/298
1841 if hasattr(eps, "select"):
1842 # error: "Dict[str, Tuple[EntryPoint, ...]]" has no attribute "select"
1843 entry = eps.select(group=key) # type: ignore[attr-defined]
1844 else:
1845 entry = eps.get(key, ())
1846 for entry_point in entry:
1847 found_backend = entry_point.name == backend
1848 if found_backend:
1849 module = entry_point.load()
1850 break
1852 if not found_backend:
1853 # Fall back to unregistered, module name approach.
1854 try:
1855 module = importlib.import_module(backend)
1856 found_backend = True
1857 except ImportError:
1858 # We re-raise later on.
1859 pass
1861 if found_backend:
1862 if hasattr(module, "plot"):
1863 # Validate that the interface is implemented when the option is set,
1864 # rather than at plot time.
1865 return module
1867 raise ValueError(
1868 f"Could not find plotting backend '{backend}'. Ensure that you've "
1869 f"installed the package providing the '{backend}' entrypoint, or that "
1870 "the package has a top-level `.plot` method."
1871 )
1874def _get_plot_backend(backend: str | None = None):
1875 """
1876 Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`).
1878 The plotting system of pandas uses matplotlib by default, but the idea here
1879 is that it can also work with other third-party backends. This function
1880 returns the module which provides a top-level `.plot` method that will
1881 actually do the plotting. The backend is specified from a string, which
1882 either comes from the keyword argument `backend`, or, if not specified, from
1883 the option `pandas.options.plotting.backend`. All the rest of the code in
1884 this file uses the backend specified there for the plotting.
1886 The backend is imported lazily, as matplotlib is a soft dependency, and
1887 pandas can be used without it being installed.
1889 Notes
1890 -----
1891 Modifies `_backends` with imported backend as a side effect.
1892 """
1893 backend_str: str = backend or get_option("plotting.backend")
1895 if backend_str in _backends:
1896 return _backends[backend_str]
1898 module = _load_backend(backend_str)
1899 _backends[backend_str] = module
1900 return module