Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/tablib/core.py: 16%
413 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1"""
2 tablib.core
3 ~~~~~~~~~~~
5 This module implements the central Tablib objects.
7 :copyright: (c) 2016 by Kenneth Reitz. 2019 Jazzband.
8 :license: MIT, see LICENSE for more details.
9"""
11from collections import OrderedDict
12from copy import copy
13from operator import itemgetter
15from tablib.exceptions import (
16 HeadersNeeded,
17 InvalidDatasetIndex,
18 InvalidDatasetType,
19 InvalidDimensions,
20 UnsupportedFormat,
21)
22from tablib.formats import registry
23from tablib.utils import normalize_input
25__title__ = 'tablib'
26__author__ = 'Kenneth Reitz'
27__license__ = 'MIT'
28__copyright__ = 'Copyright 2017 Kenneth Reitz. 2019 Jazzband.'
29__docformat__ = 'restructuredtext'
32class Row:
33 """Internal Row object. Mainly used for filtering."""
35 __slots__ = ['_row', 'tags']
37 def __init__(self, row=(), tags=()):
38 self._row = list(row)
39 self.tags = list(tags)
41 def __iter__(self):
42 return (col for col in self._row)
44 def __len__(self):
45 return len(self._row)
47 def __repr__(self):
48 return repr(self._row)
50 def __getitem__(self, i):
51 return self._row[i]
53 def __setitem__(self, i, value):
54 self._row[i] = value
56 def __delitem__(self, i):
57 del self._row[i]
59 def __getstate__(self):
60 return self._row, self.tags
62 def __setstate__(self, state):
63 self._row, self.tags = state
65 def rpush(self, value):
66 self.insert(len(self._row), value)
68 def lpush(self, value):
69 self.insert(0, value)
71 def append(self, value):
72 self.rpush(value)
74 def insert(self, index, value):
75 self._row.insert(index, value)
77 def __contains__(self, item):
78 return (item in self._row)
80 @property
81 def tuple(self):
82 """Tuple representation of :class:`Row`."""
83 return tuple(self._row)
85 @property
86 def list(self):
87 """List representation of :class:`Row`."""
88 return list(self._row)
90 def has_tag(self, tag):
91 """Returns true if current row contains tag."""
93 if tag is None:
94 return False
95 elif isinstance(tag, str):
96 return (tag in self.tags)
97 else:
98 return bool(len(set(tag) & set(self.tags)))
101class Dataset:
102 """The :class:`Dataset` object is the heart of Tablib. It provides all core
103 functionality.
105 Usually you create a :class:`Dataset` instance in your main module, and append
106 rows as you collect data. ::
108 data = tablib.Dataset()
109 data.headers = ('name', 'age')
111 for (name, age) in some_collector():
112 data.append((name, age))
115 Setting columns is similar. The column data length must equal the
116 current height of the data and headers must be set. ::
118 data = tablib.Dataset()
119 data.headers = ('first_name', 'last_name')
121 data.append(('John', 'Adams'))
122 data.append(('George', 'Washington'))
124 data.append_col((90, 67), header='age')
127 You can also set rows and headers upon instantiation. This is useful if
128 dealing with dozens or hundreds of :class:`Dataset` objects. ::
130 headers = ('first_name', 'last_name')
131 data = [('John', 'Adams'), ('George', 'Washington')]
133 data = tablib.Dataset(*data, headers=headers)
135 :param \\*args: (optional) list of rows to populate Dataset
136 :param headers: (optional) list strings for Dataset header row
137 :param title: (optional) string to use as title of the Dataset
140 .. admonition:: Format Attributes Definition
142 If you look at the code, the various output/import formats are not
143 defined within the :class:`Dataset` object. To add support for a new format, see
144 :ref:`Adding New Formats <newformats>`.
146 """
148 def __init__(self, *args, **kwargs):
149 self._data = list(Row(arg) for arg in args)
150 self.__headers = None
152 # ('title', index) tuples
153 self._separators = []
155 # (column, callback) tuples
156 self._formatters = []
158 self.headers = kwargs.get('headers')
160 self.title = kwargs.get('title')
162 def __len__(self):
163 return self.height
165 def __getitem__(self, key):
166 if isinstance(key, str):
167 if key in self.headers:
168 pos = self.headers.index(key) # get 'key' index from each data
169 return [row[pos] for row in self._data]
170 else:
171 raise KeyError
172 else:
173 _results = self._data[key]
174 if isinstance(_results, Row):
175 return _results.tuple
176 else:
177 return [result.tuple for result in _results]
179 def __setitem__(self, key, value):
180 self._validate(value)
181 self._data[key] = Row(value)
183 def __delitem__(self, key):
184 if isinstance(key, str):
186 if key in self.headers:
188 pos = self.headers.index(key)
189 del self.headers[pos]
191 for i, row in enumerate(self._data):
193 del row[pos]
194 self._data[i] = row
195 else:
196 raise KeyError
197 else:
198 del self._data[key]
200 def __repr__(self):
201 try:
202 return '<%s dataset>' % (self.title.lower())
203 except AttributeError:
204 return '<dataset object>'
206 def __str__(self):
207 result = []
209 # Add str representation of headers.
210 if self.__headers:
211 result.append([str(h) for h in self.__headers])
213 # Add str representation of rows.
214 result.extend(list(map(str, row)) for row in self._data)
216 lens = [list(map(len, row)) for row in result]
217 field_lens = list(map(max, zip(*lens)))
219 # delimiter between header and data
220 if self.__headers:
221 result.insert(1, ['-' * length for length in field_lens])
223 format_string = '|'.join('{%s:%s}' % item for item in enumerate(field_lens))
225 return '\n'.join(format_string.format(*row) for row in result)
227 # ---------
228 # Internals
229 # ---------
231 def _get_in_format(self, fmt_key, **kwargs):
232 return registry.get_format(fmt_key).export_set(self, **kwargs)
234 def _set_in_format(self, fmt_key, in_stream, **kwargs):
235 in_stream = normalize_input(in_stream)
236 return registry.get_format(fmt_key).import_set(self, in_stream, **kwargs)
238 def _validate(self, row=None, col=None, safety=False):
239 """Assures size of every row in dataset is of proper proportions."""
240 if row:
241 is_valid = (len(row) == self.width) if self.width else True
242 elif col:
243 if len(col) < 1:
244 is_valid = True
245 else:
246 is_valid = (len(col) == self.height) if self.height else True
247 else:
248 is_valid = all(len(x) == self.width for x in self._data)
250 if is_valid:
251 return True
252 else:
253 if not safety:
254 raise InvalidDimensions
255 return False
257 def _package(self, dicts=True, ordered=True):
258 """Packages Dataset into lists of dictionaries for transmission."""
259 # TODO: Dicts default to false?
261 _data = list(self._data)
263 if ordered:
264 dict_pack = OrderedDict
265 else:
266 dict_pack = dict
268 # Execute formatters
269 if self._formatters:
270 for row_i, row in enumerate(_data):
271 for col, callback in self._formatters:
272 try:
273 if col is None:
274 for j, c in enumerate(row):
275 _data[row_i][j] = callback(c)
276 else:
277 _data[row_i][col] = callback(row[col])
278 except IndexError:
279 raise InvalidDatasetIndex
281 if self.headers:
282 if dicts:
283 data = [dict_pack(list(zip(self.headers, data_row))) for data_row in _data]
284 else:
285 data = [list(self.headers)] + list(_data)
286 else:
287 data = [list(row) for row in _data]
289 return data
291 def _get_headers(self):
292 """An *optional* list of strings to be used for header rows and attribute names.
294 This must be set manually. The given list length must equal :attr:`Dataset.width`.
296 """
297 return self.__headers
299 def _set_headers(self, collection):
300 """Validating headers setter."""
301 self._validate(collection)
302 if collection:
303 try:
304 self.__headers = list(collection)
305 except TypeError:
306 raise TypeError
307 else:
308 self.__headers = None
310 headers = property(_get_headers, _set_headers)
312 def _get_dict(self):
313 """A native Python representation of the :class:`Dataset` object. If headers have
314 been set, a list of Python dictionaries will be returned. If no headers have been set,
315 a list of tuples (rows) will be returned instead.
317 A dataset object can also be imported by setting the `Dataset.dict` attribute: ::
319 data = tablib.Dataset()
320 data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}]
322 """
323 return self._package()
325 def _set_dict(self, pickle):
326 """A native Python representation of the Dataset object. If headers have been
327 set, a list of Python dictionaries will be returned. If no headers have been
328 set, a list of tuples (rows) will be returned instead.
330 A dataset object can also be imported by setting the :attr:`Dataset.dict` attribute. ::
332 data = tablib.Dataset()
333 data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}]
335 """
337 if not len(pickle):
338 return
340 # if list of rows
341 if isinstance(pickle[0], list):
342 self.wipe()
343 for row in pickle:
344 self.append(Row(row))
346 # if list of objects
347 elif isinstance(pickle[0], dict):
348 self.wipe()
349 self.headers = list(pickle[0].keys())
350 for row in pickle:
351 self.append(Row(list(row.values())))
352 else:
353 raise UnsupportedFormat
355 dict = property(_get_dict, _set_dict)
357 def _clean_col(self, col):
358 """Prepares the given column for insert/append."""
360 col = list(col)
362 if self.headers:
363 header = [col.pop(0)]
364 else:
365 header = []
367 if len(col) == 1 and hasattr(col[0], '__call__'):
369 col = list(map(col[0], self._data))
370 col = tuple(header + col)
372 return col
374 @property
375 def height(self):
376 """The number of rows currently in the :class:`Dataset`.
377 Cannot be directly modified.
378 """
379 return len(self._data)
381 @property
382 def width(self):
383 """The number of columns currently in the :class:`Dataset`.
384 Cannot be directly modified.
385 """
387 try:
388 return len(self._data[0])
389 except IndexError:
390 try:
391 return len(self.headers)
392 except TypeError:
393 return 0
395 def load(self, in_stream, format=None, **kwargs):
396 """
397 Import `in_stream` to the :class:`Dataset` object using the `format`.
398 `in_stream` can be a file-like object, a string, or a bytestring.
400 :param \\*\\*kwargs: (optional) custom configuration to the format `import_set`.
401 """
403 stream = normalize_input(in_stream)
404 if not format:
405 format = detect_format(stream)
407 fmt = registry.get_format(format)
408 if not hasattr(fmt, 'import_set'):
409 raise UnsupportedFormat(f'Format {format} cannot be imported.')
411 if not import_set:
412 raise UnsupportedFormat(f'Format {format} cannot be imported.')
414 fmt.import_set(self, stream, **kwargs)
415 return self
417 def export(self, format, **kwargs):
418 """
419 Export :class:`Dataset` object to `format`.
421 :param \\*\\*kwargs: (optional) custom configuration to the format `export_set`.
422 """
423 fmt = registry.get_format(format)
424 if not hasattr(fmt, 'export_set'):
425 raise UnsupportedFormat(f'Format {format} cannot be exported.')
427 return fmt.export_set(self, **kwargs)
429 # ----
430 # Rows
431 # ----
433 def insert(self, index, row, tags=()):
434 """Inserts a row to the :class:`Dataset` at the given index.
436 Rows inserted must be the correct size (height or width).
438 The default behaviour is to insert the given row to the :class:`Dataset`
439 object at the given index.
440 """
442 self._validate(row)
443 self._data.insert(index, Row(row, tags=tags))
445 def rpush(self, row, tags=()):
446 """Adds a row to the end of the :class:`Dataset`.
447 See :method:`Dataset.insert` for additional documentation.
448 """
450 self.insert(self.height, row=row, tags=tags)
452 def lpush(self, row, tags=()):
453 """Adds a row to the top of the :class:`Dataset`.
454 See :method:`Dataset.insert` for additional documentation.
455 """
457 self.insert(0, row=row, tags=tags)
459 def append(self, row, tags=()):
460 """Adds a row to the :class:`Dataset`.
461 See :method:`Dataset.insert` for additional documentation.
462 """
464 self.rpush(row, tags)
466 def extend(self, rows, tags=()):
467 """Adds a list of rows to the :class:`Dataset` using
468 :method:`Dataset.append`
469 """
471 for row in rows:
472 self.append(row, tags)
474 def lpop(self):
475 """Removes and returns the first row of the :class:`Dataset`."""
477 cache = self[0]
478 del self[0]
480 return cache
482 def rpop(self):
483 """Removes and returns the last row of the :class:`Dataset`."""
485 cache = self[-1]
486 del self[-1]
488 return cache
490 def pop(self):
491 """Removes and returns the last row of the :class:`Dataset`."""
493 return self.rpop()
495 # -------
496 # Columns
497 # -------
499 def insert_col(self, index, col=None, header=None):
500 """Inserts a column to the :class:`Dataset` at the given index.
502 Columns inserted must be the correct height.
504 You can also insert a column of a single callable object, which will
505 add a new column with the return values of the callable each as an
506 item in the column. ::
508 data.append_col(col=random.randint)
510 If inserting a column, and :attr:`Dataset.headers` is set, the
511 header attribute must be set, and will be considered the header for
512 that row.
514 See :ref:`dyncols` for an in-depth example.
516 .. versionchanged:: 0.9.0
517 If inserting a column, and :attr:`Dataset.headers` is set, the
518 header attribute must be set, and will be considered the header for
519 that row.
521 .. versionadded:: 0.9.0
522 If inserting a row, you can add :ref:`tags <tags>` to the row you are inserting.
523 This gives you the ability to :method:`filter <Dataset.filter>` your
524 :class:`Dataset` later.
526 """
528 if col is None:
529 col = []
531 # Callable Columns...
532 if hasattr(col, '__call__'):
533 col = list(map(col, self._data))
535 col = self._clean_col(col)
536 self._validate(col=col)
538 if self.headers:
539 # pop the first item off, add to headers
540 if not header:
541 raise HeadersNeeded()
543 # corner case - if header is set without data
544 elif header and self.height == 0 and len(col):
545 raise InvalidDimensions
547 self.headers.insert(index, header)
549 if self.height and self.width:
551 for i, row in enumerate(self._data):
553 row.insert(index, col[i])
554 self._data[i] = row
555 else:
556 self._data = [Row([row]) for row in col]
558 def rpush_col(self, col, header=None):
559 """Adds a column to the end of the :class:`Dataset`.
560 See :method:`Dataset.insert` for additional documentation.
561 """
563 self.insert_col(self.width, col, header=header)
565 def lpush_col(self, col, header=None):
566 """Adds a column to the top of the :class:`Dataset`.
567 See :method:`Dataset.insert` for additional documentation.
568 """
570 self.insert_col(0, col, header=header)
572 def insert_separator(self, index, text='-'):
573 """Adds a separator to :class:`Dataset` at given index."""
575 sep = (index, text)
576 self._separators.append(sep)
578 def append_separator(self, text='-'):
579 """Adds a :ref:`separator <separators>` to the :class:`Dataset`."""
581 # change offsets if headers are or aren't defined
582 if not self.headers:
583 index = self.height if self.height else 0
584 else:
585 index = (self.height + 1) if self.height else 1
587 self.insert_separator(index, text)
589 def append_col(self, col, header=None):
590 """Adds a column to the :class:`Dataset`.
591 See :method:`Dataset.insert_col` for additional documentation.
592 """
594 self.rpush_col(col, header)
596 def get_col(self, index):
597 """Returns the column from the :class:`Dataset` at the given index."""
599 return [row[index] for row in self._data]
601 # ----
602 # Misc
603 # ----
605 def add_formatter(self, col, handler):
606 """Adds a formatter to the :class:`Dataset`.
608 .. versionadded:: 0.9.5
610 :param col: column to. Accepts index int or header str.
611 :param handler: reference to callback function to execute against
612 each cell value.
613 """
615 if isinstance(col, str):
616 if col in self.headers:
617 col = self.headers.index(col) # get 'key' index from each data
618 else:
619 raise KeyError
621 if not col > self.width:
622 self._formatters.append((col, handler))
623 else:
624 raise InvalidDatasetIndex
626 return True
628 def filter(self, tag):
629 """Returns a new instance of the :class:`Dataset`, excluding any rows
630 that do not contain the given :ref:`tags <tags>`.
631 """
632 _dset = copy(self)
633 _dset._data = [row for row in _dset._data if row.has_tag(tag)]
635 return _dset
637 def sort(self, col, reverse=False):
638 """Sort a :class:`Dataset` by a specific column, given string (for
639 header) or integer (for column index). The order can be reversed by
640 setting ``reverse`` to ``True``.
642 Returns a new :class:`Dataset` instance where columns have been
643 sorted.
644 """
646 if isinstance(col, str):
648 if not self.headers:
649 raise HeadersNeeded
651 _sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse)
652 _dset = Dataset(headers=self.headers, title=self.title)
654 for item in _sorted:
655 row = [item[key] for key in self.headers]
656 _dset.append(row=row)
658 else:
659 if self.headers:
660 col = self.headers[col]
662 _sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse)
663 _dset = Dataset(headers=self.headers, title=self.title)
665 for item in _sorted:
666 if self.headers:
667 row = [item[key] for key in self.headers]
668 else:
669 row = item
670 _dset.append(row=row)
672 return _dset
674 def transpose(self):
675 """Transpose a :class:`Dataset`, turning rows into columns and vice
676 versa, returning a new ``Dataset`` instance. The first row of the
677 original instance becomes the new header row."""
679 # Don't transpose if there is no data
680 if not self:
681 return
683 _dset = Dataset()
684 # The first element of the headers stays in the headers,
685 # it is our "hinge" on which we rotate the data
686 new_headers = [self.headers[0]] + self[self.headers[0]]
688 _dset.headers = new_headers
689 for index, column in enumerate(self.headers):
691 if column == self.headers[0]:
692 # It's in the headers, so skip it
693 continue
695 # Adding the column name as now they're a regular column
696 # Use `get_col(index)` in case there are repeated values
697 row_data = [column] + self.get_col(index)
698 row_data = Row(row_data)
699 _dset.append(row=row_data)
700 return _dset
702 def stack(self, other):
703 """Stack two :class:`Dataset` instances together by
704 joining at the row level, and return new combined
705 ``Dataset`` instance."""
707 if not isinstance(other, Dataset):
708 return
710 if self.width != other.width:
711 raise InvalidDimensions
713 # Copy the source data
714 _dset = copy(self)
716 rows_to_stack = [row for row in _dset._data]
717 other_rows = [row for row in other._data]
719 rows_to_stack.extend(other_rows)
720 _dset._data = rows_to_stack
722 return _dset
724 def stack_cols(self, other):
725 """Stack two :class:`Dataset` instances together by
726 joining at the column level, and return a new
727 combined ``Dataset`` instance. If either ``Dataset``
728 has headers set, than the other must as well."""
730 if not isinstance(other, Dataset):
731 return
733 if self.headers or other.headers:
734 if not self.headers or not other.headers:
735 raise HeadersNeeded
737 if self.height != other.height:
738 raise InvalidDimensions
740 try:
741 new_headers = self.headers + other.headers
742 except TypeError:
743 new_headers = None
745 _dset = Dataset()
747 for column in self.headers:
748 _dset.append_col(col=self[column])
750 for column in other.headers:
751 _dset.append_col(col=other[column])
753 _dset.headers = new_headers
755 return _dset
757 def remove_duplicates(self):
758 """Removes all duplicate rows from the :class:`Dataset` object
759 while maintaining the original order."""
760 seen = set()
761 self._data[:] = [row for row in self._data if not (tuple(row) in seen or seen.add(tuple(row)))]
763 def wipe(self):
764 """Removes all content and headers from the :class:`Dataset` object."""
765 self._data = list()
766 self.__headers = None
768 def subset(self, rows=None, cols=None):
769 """Returns a new instance of the :class:`Dataset`,
770 including only specified rows and columns.
771 """
773 # Don't return if no data
774 if not self:
775 return
777 if rows is None:
778 rows = list(range(self.height))
780 if cols is None:
781 cols = list(self.headers)
783 # filter out impossible rows and columns
784 rows = [row for row in rows if row in range(self.height)]
785 cols = [header for header in cols if header in self.headers]
787 _dset = Dataset()
789 # filtering rows and columns
790 _dset.headers = list(cols)
792 _dset._data = []
793 for row_no, row in enumerate(self._data):
794 data_row = []
795 for key in _dset.headers:
796 if key in self.headers:
797 pos = self.headers.index(key)
798 data_row.append(row[pos])
799 else:
800 raise KeyError
802 if row_no in rows:
803 _dset.append(row=Row(data_row))
805 return _dset
808class Databook:
809 """A book of :class:`Dataset` objects.
810 """
812 def __init__(self, sets=None):
813 self._datasets = sets or []
815 def __repr__(self):
816 try:
817 return '<%s databook>' % (self.title.lower())
818 except AttributeError:
819 return '<databook object>'
821 def wipe(self):
822 """Removes all :class:`Dataset` objects from the :class:`Databook`."""
823 self._datasets = []
825 def sheets(self):
826 return self._datasets
828 def add_sheet(self, dataset):
829 """Adds given :class:`Dataset` to the :class:`Databook`."""
830 if isinstance(dataset, Dataset):
831 self._datasets.append(dataset)
832 else:
833 raise InvalidDatasetType
835 def _package(self, ordered=True):
836 """Packages :class:`Databook` for delivery."""
837 collector = []
839 if ordered:
840 dict_pack = OrderedDict
841 else:
842 dict_pack = dict
844 for dset in self._datasets:
845 collector.append(dict_pack(
846 title=dset.title,
847 data=dset._package(ordered=ordered)
848 ))
849 return collector
851 @property
852 def size(self):
853 """The number of the :class:`Dataset` objects within :class:`Databook`."""
854 return len(self._datasets)
856 def load(self, in_stream, format, **kwargs):
857 """
858 Import `in_stream` to the :class:`Databook` object using the `format`.
859 `in_stream` can be a file-like object, a string, or a bytestring.
861 :param \\*\\*kwargs: (optional) custom configuration to the format `import_book`.
862 """
864 stream = normalize_input(in_stream)
865 if not format:
866 format = detect_format(stream)
868 fmt = registry.get_format(format)
869 if not hasattr(fmt, 'import_book'):
870 raise UnsupportedFormat(f'Format {format} cannot be loaded.')
872 fmt.import_book(self, stream, **kwargs)
873 return self
875 def export(self, format, **kwargs):
876 """
877 Export :class:`Databook` object to `format`.
879 :param \\*\\*kwargs: (optional) custom configuration to the format `export_book`.
880 """
881 fmt = registry.get_format(format)
882 if not hasattr(fmt, 'export_book'):
883 raise UnsupportedFormat(f'Format {format} cannot be exported.')
885 return fmt.export_book(self, **kwargs)
888def detect_format(stream):
889 """Return format name of given stream (file-like object, string, or bytestring)."""
890 stream = normalize_input(stream)
891 fmt_title = None
892 for fmt in registry.formats():
893 try:
894 if fmt.detect(stream):
895 fmt_title = fmt.title
896 break
897 except AttributeError:
898 pass
899 finally:
900 if hasattr(stream, 'seek'):
901 stream.seek(0)
902 return fmt_title
905def import_set(stream, format=None, **kwargs):
906 """Return dataset of given stream (file-like object, string, or bytestring)."""
908 return Dataset().load(normalize_input(stream), format, **kwargs)
911def import_book(stream, format=None, **kwargs):
912 """Return dataset of given stream (file-like object, string, or bytestring)."""
914 return Databook().load(normalize_input(stream), format, **kwargs)
917registry.register_builtins()