Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/tablib/core.py: 16%

1"""

2 tablib.core

3 ~~~~~~~~~~~

5 This module implements the central Tablib objects.

8 :license: MIT, see LICENSE for more details.

9"""

11from collections import OrderedDict

12from copy import copy

13from operator import itemgetter

15from tablib.exceptions import (

16 HeadersNeeded,

17 InvalidDatasetIndex,

18 InvalidDatasetType,

19 InvalidDimensions,

20 UnsupportedFormat,

21)

22from tablib.formats import registry

23from tablib.utils import normalize_input

25__title__ = 'tablib'

26__author__ = 'Kenneth Reitz'

27__license__ = 'MIT'

29__docformat__ = 'restructuredtext'

32class Row:

33 """Internal Row object. Mainly used for filtering."""

35 __slots__ = ['_row', 'tags']

37 def __init__(self, row=(), tags=()):

38 self._row = list(row)

39 self.tags = list(tags)

41 def __iter__(self):

42 return (col for col in self._row)

44 def __len__(self):

45 return len(self._row)

47 def __repr__(self):

48 return repr(self._row)

50 def __getitem__(self, i):

51 return self._row[i]

53 def __setitem__(self, i, value):

54 self._row[i] = value

56 def __delitem__(self, i):

57 del self._row[i]

59 def __getstate__(self):

60 return self._row, self.tags

62 def __setstate__(self, state):

63 self._row, self.tags = state

65 def rpush(self, value):

66 self.insert(len(self._row), value)

68 def lpush(self, value):

69 self.insert(0, value)

71 def append(self, value):

72 self.rpush(value)

74 def insert(self, index, value):

75 self._row.insert(index, value)

77 def __contains__(self, item):

78 return (item in self._row)

80 @property

81 def tuple(self):

82 """Tuple representation of :class:`Row`."""

83 return tuple(self._row)

85 @property

86 def list(self):

87 """List representation of :class:`Row`."""

88 return list(self._row)

90 def has_tag(self, tag):

91 """Returns true if current row contains tag."""

93 if tag is None:

94 return False

95 elif isinstance(tag, str):

96 return (tag in self.tags)

97 else:

98 return bool(len(set(tag) & set(self.tags)))

100

101class Dataset:

102 """The :class:`Dataset` object is the heart of Tablib. It provides all core

103 functionality.

104

105 Usually you create a :class:`Dataset` instance in your main module, and append

106 rows as you collect data. ::

107

108 data = tablib.Dataset()

109 data.headers = ('name', 'age')

110

111 for (name, age) in some_collector():

112 data.append((name, age))

113

114

115 Setting columns is similar. The column data length must equal the

116 current height of the data and headers must be set. ::

117

118 data = tablib.Dataset()

119 data.headers = ('first_name', 'last_name')

120

121 data.append(('John', 'Adams'))

122 data.append(('George', 'Washington'))

123

124 data.append_col((90, 67), header='age')

125

126

127 You can also set rows and headers upon instantiation. This is useful if

128 dealing with dozens or hundreds of :class:`Dataset` objects. ::

129

130 headers = ('first_name', 'last_name')

131 data = [('John', 'Adams'), ('George', 'Washington')]

132

133 data = tablib.Dataset(*data, headers=headers)

134

135 :param \\*args: (optional) list of rows to populate Dataset

136 :param headers: (optional) list strings for Dataset header row

137 :param title: (optional) string to use as title of the Dataset

138

139

140 .. admonition:: Format Attributes Definition

141

142 If you look at the code, the various output/import formats are not

143 defined within the :class:`Dataset` object. To add support for a new format, see

144 :ref:`Adding New Formats <newformats>`.

145

146 """

147

148 def __init__(self, *args, **kwargs):

149 self._data = list(Row(arg) for arg in args)

150 self.__headers = None

151

152 # ('title', index) tuples

153 self._separators = []

154

155 # (column, callback) tuples

156 self._formatters = []

157

158 self.headers = kwargs.get('headers')

159

160 self.title = kwargs.get('title')

161

162 def __len__(self):

163 return self.height

164

165 def __getitem__(self, key):

166 if isinstance(key, str):

167 if key in self.headers:

168 pos = self.headers.index(key) # get 'key' index from each data

169 return [row[pos] for row in self._data]

170 else:

171 raise KeyError

172 else:

173 _results = self._data[key]

174 if isinstance(_results, Row):

175 return _results.tuple

176 else:

177 return [result.tuple for result in _results]

178

179 def __setitem__(self, key, value):

180 self._validate(value)

181 self._data[key] = Row(value)

182

183 def __delitem__(self, key):

184 if isinstance(key, str):

185

186 if key in self.headers:

187

188 pos = self.headers.index(key)

189 del self.headers[pos]

190

191 for i, row in enumerate(self._data):

192

193 del row[pos]

194 self._data[i] = row

195 else:

196 raise KeyError

197 else:

198 del self._data[key]

199

200 def __repr__(self):

201 try:

202 return '<%s dataset>' % (self.title.lower())

203 except AttributeError:

204 return '<dataset object>'

205

206 def __str__(self):

207 result = []

208

209 # Add str representation of headers.

210 if self.__headers:

211 result.append([str(h) for h in self.__headers])

212

213 # Add str representation of rows.

214 result.extend(list(map(str, row)) for row in self._data)

215

216 lens = [list(map(len, row)) for row in result]

217 field_lens = list(map(max, zip(*lens)))

218

219 # delimiter between header and data

220 if self.__headers:

221 result.insert(1, ['-' * length for length in field_lens])

222

223 format_string = '|'.join('{%s:%s}' % item for item in enumerate(field_lens))

224

225 return '\n'.join(format_string.format(*row) for row in result)

226

227 # ---------

228 # Internals

229 # ---------

230

231 def _get_in_format(self, fmt_key, **kwargs):

232 return registry.get_format(fmt_key).export_set(self, **kwargs)

233

234 def _set_in_format(self, fmt_key, in_stream, **kwargs):

235 in_stream = normalize_input(in_stream)

236 return registry.get_format(fmt_key).import_set(self, in_stream, **kwargs)

237

238 def _validate(self, row=None, col=None, safety=False):

239 """Assures size of every row in dataset is of proper proportions."""

240 if row:

241 is_valid = (len(row) == self.width) if self.width else True

242 elif col:

243 if len(col) < 1:

244 is_valid = True

245 else:

246 is_valid = (len(col) == self.height) if self.height else True

247 else:

248 is_valid = all(len(x) == self.width for x in self._data)

249

250 if is_valid:

251 return True

252 else:

253 if not safety:

254 raise InvalidDimensions

255 return False

256

257 def _package(self, dicts=True, ordered=True):

258 """Packages Dataset into lists of dictionaries for transmission."""

259 # TODO: Dicts default to false?

260

261 _data = list(self._data)

262

263 if ordered:

264 dict_pack = OrderedDict

265 else:

266 dict_pack = dict

267

268 # Execute formatters

269 if self._formatters:

270 for row_i, row in enumerate(_data):

271 for col, callback in self._formatters:

272 try:

273 if col is None:

274 for j, c in enumerate(row):

275 _data[row_i][j] = callback(c)

276 else:

277 _data[row_i][col] = callback(row[col])

278 except IndexError:

279 raise InvalidDatasetIndex

280

281 if self.headers:

282 if dicts:

283 data = [dict_pack(list(zip(self.headers, data_row))) for data_row in _data]

284 else:

285 data = [list(self.headers)] + list(_data)

286 else:

287 data = [list(row) for row in _data]

288

289 return data

290

291 def _get_headers(self):

292 """An *optional* list of strings to be used for header rows and attribute names.

293

294 This must be set manually. The given list length must equal :attr:`Dataset.width`.

295

296 """

297 return self.__headers

298

299 def _set_headers(self, collection):

300 """Validating headers setter."""

301 self._validate(collection)

302 if collection:

303 try:

304 self.__headers = list(collection)

305 except TypeError:

306 raise TypeError

307 else:

308 self.__headers = None

309

310 headers = property(_get_headers, _set_headers)

311

312 def _get_dict(self):

313 """A native Python representation of the :class:`Dataset` object. If headers have

314 been set, a list of Python dictionaries will be returned. If no headers have been set,

315 a list of tuples (rows) will be returned instead.

316

317 A dataset object can also be imported by setting the `Dataset.dict` attribute: ::

318

319 data = tablib.Dataset()

320 data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}]

321

322 """

323 return self._package()

324

325 def _set_dict(self, pickle):

326 """A native Python representation of the Dataset object. If headers have been

327 set, a list of Python dictionaries will be returned. If no headers have been

328 set, a list of tuples (rows) will be returned instead.

329

330 A dataset object can also be imported by setting the :attr:`Dataset.dict` attribute. ::

331

332 data = tablib.Dataset()

333 data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}]

334

335 """

336

337 if not len(pickle):

338 return

339

340 # if list of rows

341 if isinstance(pickle[0], list):

342 self.wipe()

343 for row in pickle:

344 self.append(Row(row))

345

346 # if list of objects

347 elif isinstance(pickle[0], dict):

348 self.wipe()

349 self.headers = list(pickle[0].keys())

350 for row in pickle:

351 self.append(Row(list(row.values())))

352 else:

353 raise UnsupportedFormat

354

355 dict = property(_get_dict, _set_dict)

356

357 def _clean_col(self, col):

358 """Prepares the given column for insert/append."""

359

360 col = list(col)

361

362 if self.headers:

363 header = [col.pop(0)]

364 else:

365 header = []

366

367 if len(col) == 1 and hasattr(col[0], '__call__'):

368

369 col = list(map(col[0], self._data))

370 col = tuple(header + col)

371

372 return col

373

374 @property

375 def height(self):

376 """The number of rows currently in the :class:`Dataset`.

377 Cannot be directly modified.

378 """

379 return len(self._data)

380

381 @property

382 def width(self):

383 """The number of columns currently in the :class:`Dataset`.

384 Cannot be directly modified.

385 """

386

387 try:

388 return len(self._data[0])

389 except IndexError:

390 try:

391 return len(self.headers)

392 except TypeError:

393 return 0

394

395 def load(self, in_stream, format=None, **kwargs):

396 """

397 Import `in_stream` to the :class:`Dataset` object using the `format`.

398 `in_stream` can be a file-like object, a string, or a bytestring.

399

400 :param \\*\\*kwargs: (optional) custom configuration to the format `import_set`.

401 """

402

403 stream = normalize_input(in_stream)

404 if not format:

405 format = detect_format(stream)

406

407 fmt = registry.get_format(format)

408 if not hasattr(fmt, 'import_set'):

409 raise UnsupportedFormat(f'Format {format} cannot be imported.')

410

411 if not import_set:

412 raise UnsupportedFormat(f'Format {format} cannot be imported.')

413

414 fmt.import_set(self, stream, **kwargs)

415 return self

416

417 def export(self, format, **kwargs):

418 """

419 Export :class:`Dataset` object to `format`.

420

421 :param \\*\\*kwargs: (optional) custom configuration to the format `export_set`.

422 """

423 fmt = registry.get_format(format)

424 if not hasattr(fmt, 'export_set'):

425 raise UnsupportedFormat(f'Format {format} cannot be exported.')

426

427 return fmt.export_set(self, **kwargs)

428

429 # ----

430 # Rows

431 # ----

432

433 def insert(self, index, row, tags=()):

434 """Inserts a row to the :class:`Dataset` at the given index.

435

436 Rows inserted must be the correct size (height or width).

437

438 The default behaviour is to insert the given row to the :class:`Dataset`

439 object at the given index.

440 """

441

442 self._validate(row)

443 self._data.insert(index, Row(row, tags=tags))

444

445 def rpush(self, row, tags=()):

446 """Adds a row to the end of the :class:`Dataset`.

447 See :method:`Dataset.insert` for additional documentation.

448 """

449

450 self.insert(self.height, row=row, tags=tags)

451

452 def lpush(self, row, tags=()):

453 """Adds a row to the top of the :class:`Dataset`.

454 See :method:`Dataset.insert` for additional documentation.

455 """

456

457 self.insert(0, row=row, tags=tags)

458

459 def append(self, row, tags=()):

460 """Adds a row to the :class:`Dataset`.

461 See :method:`Dataset.insert` for additional documentation.

462 """

463

464 self.rpush(row, tags)

465

466 def extend(self, rows, tags=()):

467 """Adds a list of rows to the :class:`Dataset` using

468 :method:`Dataset.append`

469 """

470

471 for row in rows:

472 self.append(row, tags)

473

474 def lpop(self):

475 """Removes and returns the first row of the :class:`Dataset`."""

476

477 cache = self[0]

478 del self[0]

479

480 return cache

481

482 def rpop(self):

483 """Removes and returns the last row of the :class:`Dataset`."""

484

485 cache = self[-1]

486 del self[-1]

487

488 return cache

489

490 def pop(self):

491 """Removes and returns the last row of the :class:`Dataset`."""

492

493 return self.rpop()

494

495 # -------

496 # Columns

497 # -------

498

499 def insert_col(self, index, col=None, header=None):

500 """Inserts a column to the :class:`Dataset` at the given index.

501

502 Columns inserted must be the correct height.

503

504 You can also insert a column of a single callable object, which will

505 add a new column with the return values of the callable each as an

506 item in the column. ::

507

508 data.append_col(col=random.randint)

509

510 If inserting a column, and :attr:`Dataset.headers` is set, the

511 header attribute must be set, and will be considered the header for

512 that row.

513

514 See :ref:`dyncols` for an in-depth example.

515

516 .. versionchanged:: 0.9.0

517 If inserting a column, and :attr:`Dataset.headers` is set, the

518 header attribute must be set, and will be considered the header for

519 that row.

520

521 .. versionadded:: 0.9.0

522 If inserting a row, you can add :ref:`tags <tags>` to the row you are inserting.

523 This gives you the ability to :method:`filter <Dataset.filter>` your

524 :class:`Dataset` later.

525

526 """

527

528 if col is None:

529 col = []

530

531 # Callable Columns...

532 if hasattr(col, '__call__'):

533 col = list(map(col, self._data))

534

535 col = self._clean_col(col)

536 self._validate(col=col)

537

538 if self.headers:

539 # pop the first item off, add to headers

540 if not header:

541 raise HeadersNeeded()

542

543 # corner case - if header is set without data

544 elif header and self.height == 0 and len(col):

545 raise InvalidDimensions

546

547 self.headers.insert(index, header)

548

549 if self.height and self.width:

550

551 for i, row in enumerate(self._data):

552

553 row.insert(index, col[i])

554 self._data[i] = row

555 else:

556 self._data = [Row([row]) for row in col]

557

558 def rpush_col(self, col, header=None):

559 """Adds a column to the end of the :class:`Dataset`.

560 See :method:`Dataset.insert` for additional documentation.

561 """

562

563 self.insert_col(self.width, col, header=header)

564

565 def lpush_col(self, col, header=None):

566 """Adds a column to the top of the :class:`Dataset`.

567 See :method:`Dataset.insert` for additional documentation.

568 """

569

570 self.insert_col(0, col, header=header)

571

572 def insert_separator(self, index, text='-'):

573 """Adds a separator to :class:`Dataset` at given index."""

574

575 sep = (index, text)

576 self._separators.append(sep)

577

578 def append_separator(self, text='-'):

579 """Adds a :ref:`separator <separators>` to the :class:`Dataset`."""

580

581 # change offsets if headers are or aren't defined

582 if not self.headers:

583 index = self.height if self.height else 0

584 else:

585 index = (self.height + 1) if self.height else 1

586

587 self.insert_separator(index, text)

588

589 def append_col(self, col, header=None):

590 """Adds a column to the :class:`Dataset`.

591 See :method:`Dataset.insert_col` for additional documentation.

592 """

593

594 self.rpush_col(col, header)

595

596 def get_col(self, index):

597 """Returns the column from the :class:`Dataset` at the given index."""

598

599 return [row[index] for row in self._data]

600

601 # ----

602 # Misc

603 # ----

604

605 def add_formatter(self, col, handler):

606 """Adds a formatter to the :class:`Dataset`.

607

608 .. versionadded:: 0.9.5

609

610 :param col: column to. Accepts index int or header str.

611 :param handler: reference to callback function to execute against

612 each cell value.

613 """

614

615 if isinstance(col, str):

616 if col in self.headers:

617 col = self.headers.index(col) # get 'key' index from each data

618 else:

619 raise KeyError

620

621 if not col > self.width:

622 self._formatters.append((col, handler))

623 else:

624 raise InvalidDatasetIndex

625

626 return True

627

628 def filter(self, tag):

629 """Returns a new instance of the :class:`Dataset`, excluding any rows

630 that do not contain the given :ref:`tags <tags>`.

631 """

632 _dset = copy(self)

633 _dset._data = [row for row in _dset._data if row.has_tag(tag)]

634

635 return _dset

636

637 def sort(self, col, reverse=False):

638 """Sort a :class:`Dataset` by a specific column, given string (for

639 header) or integer (for column index). The order can be reversed by

640 setting ``reverse`` to ``True``.

641

642 Returns a new :class:`Dataset` instance where columns have been

643 sorted.

644 """

645

646 if isinstance(col, str):

647

648 if not self.headers:

649 raise HeadersNeeded

650

651 _sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse)

652 _dset = Dataset(headers=self.headers, title=self.title)

653

654 for item in _sorted:

655 row = [item[key] for key in self.headers]

656 _dset.append(row=row)

657

658 else:

659 if self.headers:

660 col = self.headers[col]

661

662 _sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse)

663 _dset = Dataset(headers=self.headers, title=self.title)

664

665 for item in _sorted:

666 if self.headers:

667 row = [item[key] for key in self.headers]

668 else:

669 row = item

670 _dset.append(row=row)

671

672 return _dset

673

674 def transpose(self):

675 """Transpose a :class:`Dataset`, turning rows into columns and vice

676 versa, returning a new ``Dataset`` instance. The first row of the

677 original instance becomes the new header row."""

678

679 # Don't transpose if there is no data

680 if not self:

681 return

682

683 _dset = Dataset()

684 # The first element of the headers stays in the headers,

685 # it is our "hinge" on which we rotate the data

686 new_headers = [self.headers[0]] + self[self.headers[0]]

687

688 _dset.headers = new_headers

689 for index, column in enumerate(self.headers):

690

691 if column == self.headers[0]:

692 # It's in the headers, so skip it

693 continue

694

695 # Adding the column name as now they're a regular column

696 # Use `get_col(index)` in case there are repeated values

697 row_data = [column] + self.get_col(index)

698 row_data = Row(row_data)

699 _dset.append(row=row_data)

700 return _dset

701

702 def stack(self, other):

703 """Stack two :class:`Dataset` instances together by

704 joining at the row level, and return new combined

705 ``Dataset`` instance."""

706

707 if not isinstance(other, Dataset):

708 return

709

710 if self.width != other.width:

711 raise InvalidDimensions

712

713 # Copy the source data

714 _dset = copy(self)

715

716 rows_to_stack = [row for row in _dset._data]

717 other_rows = [row for row in other._data]

718

719 rows_to_stack.extend(other_rows)

720 _dset._data = rows_to_stack

721

722 return _dset

723

724 def stack_cols(self, other):

725 """Stack two :class:`Dataset` instances together by

726 joining at the column level, and return a new

727 combined ``Dataset`` instance. If either ``Dataset``

728 has headers set, than the other must as well."""

729

730 if not isinstance(other, Dataset):

731 return

732

733 if self.headers or other.headers:

734 if not self.headers or not other.headers:

735 raise HeadersNeeded

736

737 if self.height != other.height:

738 raise InvalidDimensions

739

740 try:

741 new_headers = self.headers + other.headers

742 except TypeError:

743 new_headers = None

744

745 _dset = Dataset()

746

747 for column in self.headers:

748 _dset.append_col(col=self[column])

749

750 for column in other.headers:

751 _dset.append_col(col=other[column])

752

753 _dset.headers = new_headers

754

755 return _dset

756

757 def remove_duplicates(self):

758 """Removes all duplicate rows from the :class:`Dataset` object

759 while maintaining the original order."""

760 seen = set()

761 self._data[:] = [row for row in self._data if not (tuple(row) in seen or seen.add(tuple(row)))]

762

763 def wipe(self):

764 """Removes all content and headers from the :class:`Dataset` object."""

765 self._data = list()

766 self.__headers = None

767

768 def subset(self, rows=None, cols=None):

769 """Returns a new instance of the :class:`Dataset`,

770 including only specified rows and columns.

771 """

772

773 # Don't return if no data

774 if not self:

775 return

776

777 if rows is None:

778 rows = list(range(self.height))

779

780 if cols is None:

781 cols = list(self.headers)

782

783 # filter out impossible rows and columns

784 rows = [row for row in rows if row in range(self.height)]

785 cols = [header for header in cols if header in self.headers]

786

787 _dset = Dataset()

788

789 # filtering rows and columns

790 _dset.headers = list(cols)

791

792 _dset._data = []

793 for row_no, row in enumerate(self._data):

794 data_row = []

795 for key in _dset.headers:

796 if key in self.headers:

797 pos = self.headers.index(key)

798 data_row.append(row[pos])

799 else:

800 raise KeyError

801

802 if row_no in rows:

803 _dset.append(row=Row(data_row))

804

805 return _dset

806

807

808class Databook:

809 """A book of :class:`Dataset` objects.

810 """

811

812 def __init__(self, sets=None):

813 self._datasets = sets or []

814

815 def __repr__(self):

816 try:

817 return '<%s databook>' % (self.title.lower())

818 except AttributeError:

819 return '<databook object>'

820

821 def wipe(self):

822 """Removes all :class:`Dataset` objects from the :class:`Databook`."""

823 self._datasets = []

824

825 def sheets(self):

826 return self._datasets

827

828 def add_sheet(self, dataset):

829 """Adds given :class:`Dataset` to the :class:`Databook`."""

830 if isinstance(dataset, Dataset):

831 self._datasets.append(dataset)

832 else:

833 raise InvalidDatasetType

834

835 def _package(self, ordered=True):

836 """Packages :class:`Databook` for delivery."""

837 collector = []

838

839 if ordered:

840 dict_pack = OrderedDict

841 else:

842 dict_pack = dict

843

844 for dset in self._datasets:

845 collector.append(dict_pack(

846 title=dset.title,

847 data=dset._package(ordered=ordered)

848 ))

849 return collector

850

851 @property

852 def size(self):

853 """The number of the :class:`Dataset` objects within :class:`Databook`."""

854 return len(self._datasets)

855

856 def load(self, in_stream, format, **kwargs):

857 """

858 Import `in_stream` to the :class:`Databook` object using the `format`.

859 `in_stream` can be a file-like object, a string, or a bytestring.

860

861 :param \\*\\*kwargs: (optional) custom configuration to the format `import_book`.

862 """

863

864 stream = normalize_input(in_stream)

865 if not format:

866 format = detect_format(stream)

867

868 fmt = registry.get_format(format)

869 if not hasattr(fmt, 'import_book'):

870 raise UnsupportedFormat(f'Format {format} cannot be loaded.')

871

872 fmt.import_book(self, stream, **kwargs)

873 return self

874

875 def export(self, format, **kwargs):

876 """

877 Export :class:`Databook` object to `format`.

878

879 :param \\*\\*kwargs: (optional) custom configuration to the format `export_book`.

880 """

881 fmt = registry.get_format(format)

882 if not hasattr(fmt, 'export_book'):

883 raise UnsupportedFormat(f'Format {format} cannot be exported.')

884

885 return fmt.export_book(self, **kwargs)

886

887

888def detect_format(stream):

889 """Return format name of given stream (file-like object, string, or bytestring)."""

890 stream = normalize_input(stream)

891 fmt_title = None

892 for fmt in registry.formats():

893 try:

894 if fmt.detect(stream):

895 fmt_title = fmt.title

896 break

897 except AttributeError:

898 pass

899 finally:

900 if hasattr(stream, 'seek'):

901 stream.seek(0)

902 return fmt_title

903

904

905def import_set(stream, format=None, **kwargs):

906 """Return dataset of given stream (file-like object, string, or bytestring)."""

907

908 return Dataset().load(normalize_input(stream), format, **kwargs)

909

910

911def import_book(stream, format=None, **kwargs):

912 """Return dataset of given stream (file-like object, string, or bytestring)."""

913

914 return Databook().load(normalize_input(stream), format, **kwargs)

915

916

917registry.register_builtins()