Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/tablib/core.py: 16%

413 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" 

2 tablib.core 

3 ~~~~~~~~~~~ 

4 

5 This module implements the central Tablib objects. 

6 

7 :copyright: (c) 2016 by Kenneth Reitz. 2019 Jazzband. 

8 :license: MIT, see LICENSE for more details. 

9""" 

10 

11from collections import OrderedDict 

12from copy import copy 

13from operator import itemgetter 

14 

15from tablib.exceptions import ( 

16 HeadersNeeded, 

17 InvalidDatasetIndex, 

18 InvalidDatasetType, 

19 InvalidDimensions, 

20 UnsupportedFormat, 

21) 

22from tablib.formats import registry 

23from tablib.utils import normalize_input 

24 

25__title__ = 'tablib' 

26__author__ = 'Kenneth Reitz' 

27__license__ = 'MIT' 

28__copyright__ = 'Copyright 2017 Kenneth Reitz. 2019 Jazzband.' 

29__docformat__ = 'restructuredtext' 

30 

31 

32class Row: 

33 """Internal Row object. Mainly used for filtering.""" 

34 

35 __slots__ = ['_row', 'tags'] 

36 

37 def __init__(self, row=(), tags=()): 

38 self._row = list(row) 

39 self.tags = list(tags) 

40 

41 def __iter__(self): 

42 return (col for col in self._row) 

43 

44 def __len__(self): 

45 return len(self._row) 

46 

47 def __repr__(self): 

48 return repr(self._row) 

49 

50 def __getitem__(self, i): 

51 return self._row[i] 

52 

53 def __setitem__(self, i, value): 

54 self._row[i] = value 

55 

56 def __delitem__(self, i): 

57 del self._row[i] 

58 

59 def __getstate__(self): 

60 return self._row, self.tags 

61 

62 def __setstate__(self, state): 

63 self._row, self.tags = state 

64 

65 def rpush(self, value): 

66 self.insert(len(self._row), value) 

67 

68 def lpush(self, value): 

69 self.insert(0, value) 

70 

71 def append(self, value): 

72 self.rpush(value) 

73 

74 def insert(self, index, value): 

75 self._row.insert(index, value) 

76 

77 def __contains__(self, item): 

78 return (item in self._row) 

79 

80 @property 

81 def tuple(self): 

82 """Tuple representation of :class:`Row`.""" 

83 return tuple(self._row) 

84 

85 @property 

86 def list(self): 

87 """List representation of :class:`Row`.""" 

88 return list(self._row) 

89 

90 def has_tag(self, tag): 

91 """Returns true if current row contains tag.""" 

92 

93 if tag is None: 

94 return False 

95 elif isinstance(tag, str): 

96 return (tag in self.tags) 

97 else: 

98 return bool(len(set(tag) & set(self.tags))) 

99 

100 

101class Dataset: 

102 """The :class:`Dataset` object is the heart of Tablib. It provides all core 

103 functionality. 

104 

105 Usually you create a :class:`Dataset` instance in your main module, and append 

106 rows as you collect data. :: 

107 

108 data = tablib.Dataset() 

109 data.headers = ('name', 'age') 

110 

111 for (name, age) in some_collector(): 

112 data.append((name, age)) 

113 

114 

115 Setting columns is similar. The column data length must equal the 

116 current height of the data and headers must be set. :: 

117 

118 data = tablib.Dataset() 

119 data.headers = ('first_name', 'last_name') 

120 

121 data.append(('John', 'Adams')) 

122 data.append(('George', 'Washington')) 

123 

124 data.append_col((90, 67), header='age') 

125 

126 

127 You can also set rows and headers upon instantiation. This is useful if 

128 dealing with dozens or hundreds of :class:`Dataset` objects. :: 

129 

130 headers = ('first_name', 'last_name') 

131 data = [('John', 'Adams'), ('George', 'Washington')] 

132 

133 data = tablib.Dataset(*data, headers=headers) 

134 

135 :param \\*args: (optional) list of rows to populate Dataset 

136 :param headers: (optional) list strings for Dataset header row 

137 :param title: (optional) string to use as title of the Dataset 

138 

139 

140 .. admonition:: Format Attributes Definition 

141 

142 If you look at the code, the various output/import formats are not 

143 defined within the :class:`Dataset` object. To add support for a new format, see 

144 :ref:`Adding New Formats <newformats>`. 

145 

146 """ 

147 

148 def __init__(self, *args, **kwargs): 

149 self._data = list(Row(arg) for arg in args) 

150 self.__headers = None 

151 

152 # ('title', index) tuples 

153 self._separators = [] 

154 

155 # (column, callback) tuples 

156 self._formatters = [] 

157 

158 self.headers = kwargs.get('headers') 

159 

160 self.title = kwargs.get('title') 

161 

162 def __len__(self): 

163 return self.height 

164 

165 def __getitem__(self, key): 

166 if isinstance(key, str): 

167 if key in self.headers: 

168 pos = self.headers.index(key) # get 'key' index from each data 

169 return [row[pos] for row in self._data] 

170 else: 

171 raise KeyError 

172 else: 

173 _results = self._data[key] 

174 if isinstance(_results, Row): 

175 return _results.tuple 

176 else: 

177 return [result.tuple for result in _results] 

178 

179 def __setitem__(self, key, value): 

180 self._validate(value) 

181 self._data[key] = Row(value) 

182 

183 def __delitem__(self, key): 

184 if isinstance(key, str): 

185 

186 if key in self.headers: 

187 

188 pos = self.headers.index(key) 

189 del self.headers[pos] 

190 

191 for i, row in enumerate(self._data): 

192 

193 del row[pos] 

194 self._data[i] = row 

195 else: 

196 raise KeyError 

197 else: 

198 del self._data[key] 

199 

200 def __repr__(self): 

201 try: 

202 return '<%s dataset>' % (self.title.lower()) 

203 except AttributeError: 

204 return '<dataset object>' 

205 

206 def __str__(self): 

207 result = [] 

208 

209 # Add str representation of headers. 

210 if self.__headers: 

211 result.append([str(h) for h in self.__headers]) 

212 

213 # Add str representation of rows. 

214 result.extend(list(map(str, row)) for row in self._data) 

215 

216 lens = [list(map(len, row)) for row in result] 

217 field_lens = list(map(max, zip(*lens))) 

218 

219 # delimiter between header and data 

220 if self.__headers: 

221 result.insert(1, ['-' * length for length in field_lens]) 

222 

223 format_string = '|'.join('{%s:%s}' % item for item in enumerate(field_lens)) 

224 

225 return '\n'.join(format_string.format(*row) for row in result) 

226 

227 # --------- 

228 # Internals 

229 # --------- 

230 

231 def _get_in_format(self, fmt_key, **kwargs): 

232 return registry.get_format(fmt_key).export_set(self, **kwargs) 

233 

234 def _set_in_format(self, fmt_key, in_stream, **kwargs): 

235 in_stream = normalize_input(in_stream) 

236 return registry.get_format(fmt_key).import_set(self, in_stream, **kwargs) 

237 

238 def _validate(self, row=None, col=None, safety=False): 

239 """Assures size of every row in dataset is of proper proportions.""" 

240 if row: 

241 is_valid = (len(row) == self.width) if self.width else True 

242 elif col: 

243 if len(col) < 1: 

244 is_valid = True 

245 else: 

246 is_valid = (len(col) == self.height) if self.height else True 

247 else: 

248 is_valid = all(len(x) == self.width for x in self._data) 

249 

250 if is_valid: 

251 return True 

252 else: 

253 if not safety: 

254 raise InvalidDimensions 

255 return False 

256 

257 def _package(self, dicts=True, ordered=True): 

258 """Packages Dataset into lists of dictionaries for transmission.""" 

259 # TODO: Dicts default to false? 

260 

261 _data = list(self._data) 

262 

263 if ordered: 

264 dict_pack = OrderedDict 

265 else: 

266 dict_pack = dict 

267 

268 # Execute formatters 

269 if self._formatters: 

270 for row_i, row in enumerate(_data): 

271 for col, callback in self._formatters: 

272 try: 

273 if col is None: 

274 for j, c in enumerate(row): 

275 _data[row_i][j] = callback(c) 

276 else: 

277 _data[row_i][col] = callback(row[col]) 

278 except IndexError: 

279 raise InvalidDatasetIndex 

280 

281 if self.headers: 

282 if dicts: 

283 data = [dict_pack(list(zip(self.headers, data_row))) for data_row in _data] 

284 else: 

285 data = [list(self.headers)] + list(_data) 

286 else: 

287 data = [list(row) for row in _data] 

288 

289 return data 

290 

291 def _get_headers(self): 

292 """An *optional* list of strings to be used for header rows and attribute names. 

293 

294 This must be set manually. The given list length must equal :attr:`Dataset.width`. 

295 

296 """ 

297 return self.__headers 

298 

299 def _set_headers(self, collection): 

300 """Validating headers setter.""" 

301 self._validate(collection) 

302 if collection: 

303 try: 

304 self.__headers = list(collection) 

305 except TypeError: 

306 raise TypeError 

307 else: 

308 self.__headers = None 

309 

310 headers = property(_get_headers, _set_headers) 

311 

312 def _get_dict(self): 

313 """A native Python representation of the :class:`Dataset` object. If headers have 

314 been set, a list of Python dictionaries will be returned. If no headers have been set, 

315 a list of tuples (rows) will be returned instead. 

316 

317 A dataset object can also be imported by setting the `Dataset.dict` attribute: :: 

318 

319 data = tablib.Dataset() 

320 data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}] 

321 

322 """ 

323 return self._package() 

324 

325 def _set_dict(self, pickle): 

326 """A native Python representation of the Dataset object. If headers have been 

327 set, a list of Python dictionaries will be returned. If no headers have been 

328 set, a list of tuples (rows) will be returned instead. 

329 

330 A dataset object can also be imported by setting the :attr:`Dataset.dict` attribute. :: 

331 

332 data = tablib.Dataset() 

333 data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}] 

334 

335 """ 

336 

337 if not len(pickle): 

338 return 

339 

340 # if list of rows 

341 if isinstance(pickle[0], list): 

342 self.wipe() 

343 for row in pickle: 

344 self.append(Row(row)) 

345 

346 # if list of objects 

347 elif isinstance(pickle[0], dict): 

348 self.wipe() 

349 self.headers = list(pickle[0].keys()) 

350 for row in pickle: 

351 self.append(Row(list(row.values()))) 

352 else: 

353 raise UnsupportedFormat 

354 

355 dict = property(_get_dict, _set_dict) 

356 

357 def _clean_col(self, col): 

358 """Prepares the given column for insert/append.""" 

359 

360 col = list(col) 

361 

362 if self.headers: 

363 header = [col.pop(0)] 

364 else: 

365 header = [] 

366 

367 if len(col) == 1 and hasattr(col[0], '__call__'): 

368 

369 col = list(map(col[0], self._data)) 

370 col = tuple(header + col) 

371 

372 return col 

373 

374 @property 

375 def height(self): 

376 """The number of rows currently in the :class:`Dataset`. 

377 Cannot be directly modified. 

378 """ 

379 return len(self._data) 

380 

381 @property 

382 def width(self): 

383 """The number of columns currently in the :class:`Dataset`. 

384 Cannot be directly modified. 

385 """ 

386 

387 try: 

388 return len(self._data[0]) 

389 except IndexError: 

390 try: 

391 return len(self.headers) 

392 except TypeError: 

393 return 0 

394 

395 def load(self, in_stream, format=None, **kwargs): 

396 """ 

397 Import `in_stream` to the :class:`Dataset` object using the `format`. 

398 `in_stream` can be a file-like object, a string, or a bytestring. 

399 

400 :param \\*\\*kwargs: (optional) custom configuration to the format `import_set`. 

401 """ 

402 

403 stream = normalize_input(in_stream) 

404 if not format: 

405 format = detect_format(stream) 

406 

407 fmt = registry.get_format(format) 

408 if not hasattr(fmt, 'import_set'): 

409 raise UnsupportedFormat(f'Format {format} cannot be imported.') 

410 

411 if not import_set: 

412 raise UnsupportedFormat(f'Format {format} cannot be imported.') 

413 

414 fmt.import_set(self, stream, **kwargs) 

415 return self 

416 

417 def export(self, format, **kwargs): 

418 """ 

419 Export :class:`Dataset` object to `format`. 

420 

421 :param \\*\\*kwargs: (optional) custom configuration to the format `export_set`. 

422 """ 

423 fmt = registry.get_format(format) 

424 if not hasattr(fmt, 'export_set'): 

425 raise UnsupportedFormat(f'Format {format} cannot be exported.') 

426 

427 return fmt.export_set(self, **kwargs) 

428 

429 # ---- 

430 # Rows 

431 # ---- 

432 

433 def insert(self, index, row, tags=()): 

434 """Inserts a row to the :class:`Dataset` at the given index. 

435 

436 Rows inserted must be the correct size (height or width). 

437 

438 The default behaviour is to insert the given row to the :class:`Dataset` 

439 object at the given index. 

440 """ 

441 

442 self._validate(row) 

443 self._data.insert(index, Row(row, tags=tags)) 

444 

445 def rpush(self, row, tags=()): 

446 """Adds a row to the end of the :class:`Dataset`. 

447 See :method:`Dataset.insert` for additional documentation. 

448 """ 

449 

450 self.insert(self.height, row=row, tags=tags) 

451 

452 def lpush(self, row, tags=()): 

453 """Adds a row to the top of the :class:`Dataset`. 

454 See :method:`Dataset.insert` for additional documentation. 

455 """ 

456 

457 self.insert(0, row=row, tags=tags) 

458 

459 def append(self, row, tags=()): 

460 """Adds a row to the :class:`Dataset`. 

461 See :method:`Dataset.insert` for additional documentation. 

462 """ 

463 

464 self.rpush(row, tags) 

465 

466 def extend(self, rows, tags=()): 

467 """Adds a list of rows to the :class:`Dataset` using 

468 :method:`Dataset.append` 

469 """ 

470 

471 for row in rows: 

472 self.append(row, tags) 

473 

474 def lpop(self): 

475 """Removes and returns the first row of the :class:`Dataset`.""" 

476 

477 cache = self[0] 

478 del self[0] 

479 

480 return cache 

481 

482 def rpop(self): 

483 """Removes and returns the last row of the :class:`Dataset`.""" 

484 

485 cache = self[-1] 

486 del self[-1] 

487 

488 return cache 

489 

490 def pop(self): 

491 """Removes and returns the last row of the :class:`Dataset`.""" 

492 

493 return self.rpop() 

494 

495 # ------- 

496 # Columns 

497 # ------- 

498 

499 def insert_col(self, index, col=None, header=None): 

500 """Inserts a column to the :class:`Dataset` at the given index. 

501 

502 Columns inserted must be the correct height. 

503 

504 You can also insert a column of a single callable object, which will 

505 add a new column with the return values of the callable each as an 

506 item in the column. :: 

507 

508 data.append_col(col=random.randint) 

509 

510 If inserting a column, and :attr:`Dataset.headers` is set, the 

511 header attribute must be set, and will be considered the header for 

512 that row. 

513 

514 See :ref:`dyncols` for an in-depth example. 

515 

516 .. versionchanged:: 0.9.0 

517 If inserting a column, and :attr:`Dataset.headers` is set, the 

518 header attribute must be set, and will be considered the header for 

519 that row. 

520 

521 .. versionadded:: 0.9.0 

522 If inserting a row, you can add :ref:`tags <tags>` to the row you are inserting. 

523 This gives you the ability to :method:`filter <Dataset.filter>` your 

524 :class:`Dataset` later. 

525 

526 """ 

527 

528 if col is None: 

529 col = [] 

530 

531 # Callable Columns... 

532 if hasattr(col, '__call__'): 

533 col = list(map(col, self._data)) 

534 

535 col = self._clean_col(col) 

536 self._validate(col=col) 

537 

538 if self.headers: 

539 # pop the first item off, add to headers 

540 if not header: 

541 raise HeadersNeeded() 

542 

543 # corner case - if header is set without data 

544 elif header and self.height == 0 and len(col): 

545 raise InvalidDimensions 

546 

547 self.headers.insert(index, header) 

548 

549 if self.height and self.width: 

550 

551 for i, row in enumerate(self._data): 

552 

553 row.insert(index, col[i]) 

554 self._data[i] = row 

555 else: 

556 self._data = [Row([row]) for row in col] 

557 

558 def rpush_col(self, col, header=None): 

559 """Adds a column to the end of the :class:`Dataset`. 

560 See :method:`Dataset.insert` for additional documentation. 

561 """ 

562 

563 self.insert_col(self.width, col, header=header) 

564 

565 def lpush_col(self, col, header=None): 

566 """Adds a column to the top of the :class:`Dataset`. 

567 See :method:`Dataset.insert` for additional documentation. 

568 """ 

569 

570 self.insert_col(0, col, header=header) 

571 

572 def insert_separator(self, index, text='-'): 

573 """Adds a separator to :class:`Dataset` at given index.""" 

574 

575 sep = (index, text) 

576 self._separators.append(sep) 

577 

578 def append_separator(self, text='-'): 

579 """Adds a :ref:`separator <separators>` to the :class:`Dataset`.""" 

580 

581 # change offsets if headers are or aren't defined 

582 if not self.headers: 

583 index = self.height if self.height else 0 

584 else: 

585 index = (self.height + 1) if self.height else 1 

586 

587 self.insert_separator(index, text) 

588 

589 def append_col(self, col, header=None): 

590 """Adds a column to the :class:`Dataset`. 

591 See :method:`Dataset.insert_col` for additional documentation. 

592 """ 

593 

594 self.rpush_col(col, header) 

595 

596 def get_col(self, index): 

597 """Returns the column from the :class:`Dataset` at the given index.""" 

598 

599 return [row[index] for row in self._data] 

600 

601 # ---- 

602 # Misc 

603 # ---- 

604 

605 def add_formatter(self, col, handler): 

606 """Adds a formatter to the :class:`Dataset`. 

607 

608 .. versionadded:: 0.9.5 

609 

610 :param col: column to. Accepts index int or header str. 

611 :param handler: reference to callback function to execute against 

612 each cell value. 

613 """ 

614 

615 if isinstance(col, str): 

616 if col in self.headers: 

617 col = self.headers.index(col) # get 'key' index from each data 

618 else: 

619 raise KeyError 

620 

621 if not col > self.width: 

622 self._formatters.append((col, handler)) 

623 else: 

624 raise InvalidDatasetIndex 

625 

626 return True 

627 

628 def filter(self, tag): 

629 """Returns a new instance of the :class:`Dataset`, excluding any rows 

630 that do not contain the given :ref:`tags <tags>`. 

631 """ 

632 _dset = copy(self) 

633 _dset._data = [row for row in _dset._data if row.has_tag(tag)] 

634 

635 return _dset 

636 

637 def sort(self, col, reverse=False): 

638 """Sort a :class:`Dataset` by a specific column, given string (for 

639 header) or integer (for column index). The order can be reversed by 

640 setting ``reverse`` to ``True``. 

641 

642 Returns a new :class:`Dataset` instance where columns have been 

643 sorted. 

644 """ 

645 

646 if isinstance(col, str): 

647 

648 if not self.headers: 

649 raise HeadersNeeded 

650 

651 _sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse) 

652 _dset = Dataset(headers=self.headers, title=self.title) 

653 

654 for item in _sorted: 

655 row = [item[key] for key in self.headers] 

656 _dset.append(row=row) 

657 

658 else: 

659 if self.headers: 

660 col = self.headers[col] 

661 

662 _sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse) 

663 _dset = Dataset(headers=self.headers, title=self.title) 

664 

665 for item in _sorted: 

666 if self.headers: 

667 row = [item[key] for key in self.headers] 

668 else: 

669 row = item 

670 _dset.append(row=row) 

671 

672 return _dset 

673 

674 def transpose(self): 

675 """Transpose a :class:`Dataset`, turning rows into columns and vice 

676 versa, returning a new ``Dataset`` instance. The first row of the 

677 original instance becomes the new header row.""" 

678 

679 # Don't transpose if there is no data 

680 if not self: 

681 return 

682 

683 _dset = Dataset() 

684 # The first element of the headers stays in the headers, 

685 # it is our "hinge" on which we rotate the data 

686 new_headers = [self.headers[0]] + self[self.headers[0]] 

687 

688 _dset.headers = new_headers 

689 for index, column in enumerate(self.headers): 

690 

691 if column == self.headers[0]: 

692 # It's in the headers, so skip it 

693 continue 

694 

695 # Adding the column name as now they're a regular column 

696 # Use `get_col(index)` in case there are repeated values 

697 row_data = [column] + self.get_col(index) 

698 row_data = Row(row_data) 

699 _dset.append(row=row_data) 

700 return _dset 

701 

702 def stack(self, other): 

703 """Stack two :class:`Dataset` instances together by 

704 joining at the row level, and return new combined 

705 ``Dataset`` instance.""" 

706 

707 if not isinstance(other, Dataset): 

708 return 

709 

710 if self.width != other.width: 

711 raise InvalidDimensions 

712 

713 # Copy the source data 

714 _dset = copy(self) 

715 

716 rows_to_stack = [row for row in _dset._data] 

717 other_rows = [row for row in other._data] 

718 

719 rows_to_stack.extend(other_rows) 

720 _dset._data = rows_to_stack 

721 

722 return _dset 

723 

724 def stack_cols(self, other): 

725 """Stack two :class:`Dataset` instances together by 

726 joining at the column level, and return a new 

727 combined ``Dataset`` instance. If either ``Dataset`` 

728 has headers set, than the other must as well.""" 

729 

730 if not isinstance(other, Dataset): 

731 return 

732 

733 if self.headers or other.headers: 

734 if not self.headers or not other.headers: 

735 raise HeadersNeeded 

736 

737 if self.height != other.height: 

738 raise InvalidDimensions 

739 

740 try: 

741 new_headers = self.headers + other.headers 

742 except TypeError: 

743 new_headers = None 

744 

745 _dset = Dataset() 

746 

747 for column in self.headers: 

748 _dset.append_col(col=self[column]) 

749 

750 for column in other.headers: 

751 _dset.append_col(col=other[column]) 

752 

753 _dset.headers = new_headers 

754 

755 return _dset 

756 

757 def remove_duplicates(self): 

758 """Removes all duplicate rows from the :class:`Dataset` object 

759 while maintaining the original order.""" 

760 seen = set() 

761 self._data[:] = [row for row in self._data if not (tuple(row) in seen or seen.add(tuple(row)))] 

762 

763 def wipe(self): 

764 """Removes all content and headers from the :class:`Dataset` object.""" 

765 self._data = list() 

766 self.__headers = None 

767 

768 def subset(self, rows=None, cols=None): 

769 """Returns a new instance of the :class:`Dataset`, 

770 including only specified rows and columns. 

771 """ 

772 

773 # Don't return if no data 

774 if not self: 

775 return 

776 

777 if rows is None: 

778 rows = list(range(self.height)) 

779 

780 if cols is None: 

781 cols = list(self.headers) 

782 

783 # filter out impossible rows and columns 

784 rows = [row for row in rows if row in range(self.height)] 

785 cols = [header for header in cols if header in self.headers] 

786 

787 _dset = Dataset() 

788 

789 # filtering rows and columns 

790 _dset.headers = list(cols) 

791 

792 _dset._data = [] 

793 for row_no, row in enumerate(self._data): 

794 data_row = [] 

795 for key in _dset.headers: 

796 if key in self.headers: 

797 pos = self.headers.index(key) 

798 data_row.append(row[pos]) 

799 else: 

800 raise KeyError 

801 

802 if row_no in rows: 

803 _dset.append(row=Row(data_row)) 

804 

805 return _dset 

806 

807 

808class Databook: 

809 """A book of :class:`Dataset` objects. 

810 """ 

811 

812 def __init__(self, sets=None): 

813 self._datasets = sets or [] 

814 

815 def __repr__(self): 

816 try: 

817 return '<%s databook>' % (self.title.lower()) 

818 except AttributeError: 

819 return '<databook object>' 

820 

821 def wipe(self): 

822 """Removes all :class:`Dataset` objects from the :class:`Databook`.""" 

823 self._datasets = [] 

824 

825 def sheets(self): 

826 return self._datasets 

827 

828 def add_sheet(self, dataset): 

829 """Adds given :class:`Dataset` to the :class:`Databook`.""" 

830 if isinstance(dataset, Dataset): 

831 self._datasets.append(dataset) 

832 else: 

833 raise InvalidDatasetType 

834 

835 def _package(self, ordered=True): 

836 """Packages :class:`Databook` for delivery.""" 

837 collector = [] 

838 

839 if ordered: 

840 dict_pack = OrderedDict 

841 else: 

842 dict_pack = dict 

843 

844 for dset in self._datasets: 

845 collector.append(dict_pack( 

846 title=dset.title, 

847 data=dset._package(ordered=ordered) 

848 )) 

849 return collector 

850 

851 @property 

852 def size(self): 

853 """The number of the :class:`Dataset` objects within :class:`Databook`.""" 

854 return len(self._datasets) 

855 

856 def load(self, in_stream, format, **kwargs): 

857 """ 

858 Import `in_stream` to the :class:`Databook` object using the `format`. 

859 `in_stream` can be a file-like object, a string, or a bytestring. 

860 

861 :param \\*\\*kwargs: (optional) custom configuration to the format `import_book`. 

862 """ 

863 

864 stream = normalize_input(in_stream) 

865 if not format: 

866 format = detect_format(stream) 

867 

868 fmt = registry.get_format(format) 

869 if not hasattr(fmt, 'import_book'): 

870 raise UnsupportedFormat(f'Format {format} cannot be loaded.') 

871 

872 fmt.import_book(self, stream, **kwargs) 

873 return self 

874 

875 def export(self, format, **kwargs): 

876 """ 

877 Export :class:`Databook` object to `format`. 

878 

879 :param \\*\\*kwargs: (optional) custom configuration to the format `export_book`. 

880 """ 

881 fmt = registry.get_format(format) 

882 if not hasattr(fmt, 'export_book'): 

883 raise UnsupportedFormat(f'Format {format} cannot be exported.') 

884 

885 return fmt.export_book(self, **kwargs) 

886 

887 

888def detect_format(stream): 

889 """Return format name of given stream (file-like object, string, or bytestring).""" 

890 stream = normalize_input(stream) 

891 fmt_title = None 

892 for fmt in registry.formats(): 

893 try: 

894 if fmt.detect(stream): 

895 fmt_title = fmt.title 

896 break 

897 except AttributeError: 

898 pass 

899 finally: 

900 if hasattr(stream, 'seek'): 

901 stream.seek(0) 

902 return fmt_title 

903 

904 

905def import_set(stream, format=None, **kwargs): 

906 """Return dataset of given stream (file-like object, string, or bytestring).""" 

907 

908 return Dataset().load(normalize_input(stream), format, **kwargs) 

909 

910 

911def import_book(stream, format=None, **kwargs): 

912 """Return dataset of given stream (file-like object, string, or bytestring).""" 

913 

914 return Databook().load(normalize_input(stream), format, **kwargs) 

915 

916 

917registry.register_builtins()