Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/xlrd/sheet.py: 7%

1# -*- coding: utf-8 -*-

3# This module is part of the xlrd package, which is released under a

4# BSD-style licence.

6from __future__ import print_function

8from array import array

9from struct import calcsize, unpack

11from .biffh import *

12from .formatting import Format, nearest_colour_index

13from .formula import (

14 FMLA_TYPE_CELL, FMLA_TYPE_SHARED, decompile_formula, dump_formula,

15 rangename2d,

16)

17from .timemachine import *

19DEBUG = 0

20OBJ_MSO_DEBUG = 0

22_WINDOW2_options = (

23 # Attribute names and initial values to use in case

24 # a WINDOW2 record is not written.

25 ("show_formulas", 0),

26 ("show_grid_lines", 1),

27 ("show_sheet_headers", 1),

28 ("panes_are_frozen", 0),

29 ("show_zero_values", 1),

30 ("automatic_grid_line_colour", 1),

31 ("columns_from_right_to_left", 0),

32 ("show_outline_symbols", 1),

33 ("remove_splits_if_pane_freeze_is_removed", 0),

34 # Multiple sheets can be selected, but only one can be active

35 # (hold down Ctrl and click multiple tabs in the file in OOo)

36 ("sheet_selected", 0),

37 # "sheet_visible" should really be called "sheet_active"

38 # and is 1 when this sheet is the sheet displayed when the file

39 # is open. More than likely only one sheet should ever be set as

40 # visible.

41 # This would correspond to the Book's sheet_active attribute, but

42 # that doesn't exist as WINDOW1 records aren't currently processed.

43 # The real thing is the visibility attribute from the BOUNDSHEET record.

44 ("sheet_visible", 0),

45 ("show_in_page_break_preview", 0),

46)

50class Sheet(BaseObject):

51 """

52 Contains the data for one worksheet.

54 In the cell access functions, ``rowx`` is a row index, counting from

55 zero, and ``colx`` is a column index, counting from zero.

56 Negative values for row/column indexes and slice positions are supported in

57 the expected fashion.

59 For information about cell types and cell values, refer to the documentation

60 of the :class:`Cell` class.

62 .. warning::

64 You don't instantiate this class yourself. You access :class:`Sheet`

65 objects via the :class:`~xlrd.book.Book` object that

66 was returned when you called :func:`xlrd.open_workbook`.

67 """

69 #: Name of sheet.

70 name = ''

72 #: A reference to the :class:`~xlrd.book.Book` object to which this sheet

73 #: belongs.

74 #:

75 #: Example usage: ``some_sheet.book.datemode``

76 book = None

78 #: Number of rows in sheet. A row index is in ``range(thesheet.nrows)``.

79 nrows = 0

81 #: Nominal number of columns in sheet. It is one more than the maximum

82 #: column index found, ignoring trailing empty cells.

83 #: See also the ``ragged_rows`` parameter to :func:`~xlrd.open_workbook`

84 #: and :meth:`~xlrd.sheet.Sheet.row_len`.

85 ncols = 0

88 #: The map from a column index to a :class:`Colinfo` object. Often there is

89 #: an entry in ``COLINFO`` records for all column indexes in ``range(257)``.

90 #:

91 #: .. note::

92 #: xlrd ignores the entry for the non-existent

93 #: 257th column.

94 #:

95 #: On the other hand, there may be no entry for unused columns.

96 #:

97 #: .. versionadded:: 0.6.1

98 #:

99 #: Populated only if ``open_workbook(..., formatting_info=True)``

100 colinfo_map = {}

101

102 #: The map from a row index to a :class:`Rowinfo` object.

103 #:

104 #: ..note::

105 #: It is possible to have missing entries -- at least one source of

106 #: XLS files doesn't bother writing ``ROW`` records.

107 #:

108 #: .. versionadded:: 0.6.1

109 #:

110 #: Populated only if ``open_workbook(..., formatting_info=True)``

111 rowinfo_map = {}

112

113 #: List of address ranges of cells containing column labels.

114 #: These are set up in Excel by Insert > Name > Labels > Columns.

115 #:

116 #: .. versionadded:: 0.6.0

117 #:

118 #: How to deconstruct the list:

119 #:

120 #: .. code-block:: python

121 #:

122 #: for crange in thesheet.col_label_ranges:

123 #: rlo, rhi, clo, chi = crange

124 #: for rx in xrange(rlo, rhi):

125 #: for cx in xrange(clo, chi):

126 #: print "Column label at (rowx=%d, colx=%d) is %r" \

127 #: (rx, cx, thesheet.cell_value(rx, cx))

128 col_label_ranges = []

129

130 #: List of address ranges of cells containing row labels.

131 #: For more details, see :attr:`col_label_ranges`.

132 #:

133 #: .. versionadded:: 0.6.0

134 row_label_ranges = []

135

136 #: List of address ranges of cells which have been merged.

137 #: These are set up in Excel by Format > Cells > Alignment, then ticking

138 #: the "Merge cells" box.

139 #:

140 #: .. note::

141 #: The upper limits are exclusive: i.e. ``[2, 3, 7, 9]`` only

142 #: spans two cells.

143 #:

144 #: .. note:: Extracted only if ``open_workbook(..., formatting_info=True)``

145 #:

146 #: .. versionadded:: 0.6.1

147 #:

148 #: How to deconstruct the list:

149 #:

150 #: .. code-block:: python

151 #:

152 #: for crange in thesheet.merged_cells:

153 #: rlo, rhi, clo, chi = crange

154 #: for rowx in xrange(rlo, rhi):

155 #: for colx in xrange(clo, chi):

156 #: # cell (rlo, clo) (the top left one) will carry the data

157 #: # and formatting info; the remainder will be recorded as

158 #: # blank cells, but a renderer will apply the formatting info

159 #: # for the top left cell (e.g. border, pattern) to all cells in

160 #: # the range.

161 merged_cells = []

162

163 #: Mapping of ``(rowx, colx)`` to list of ``(offset, font_index)`` tuples.

164 #: The offset defines where in the string the font begins to be used.

165 #: Offsets are expected to be in ascending order.

166 #: If the first offset is not zero, the meaning is that the cell's ``XF``'s

167 #: font should be used from offset 0.

168 #:

169 #: This is a sparse mapping. There is no entry for cells that are not

170 #: formatted with rich text.

171 #:

172 #: How to use:

173 #:

174 #: .. code-block:: python

175 #:

176 #: runlist = thesheet.rich_text_runlist_map.get((rowx, colx))

177 #: if runlist:

178 #: for offset, font_index in runlist:

179 #: # do work here.

180 #: pass

181 #:

182 #: .. versionadded:: 0.7.2

183 #:

184 #: Populated only if ``open_workbook(..., formatting_info=True)``

185 rich_text_runlist_map = {}

186

187 #: Default column width from ``DEFCOLWIDTH`` record, else ``None``.

188 #: From the OOo docs:

189 #:

190 #: Column width in characters, using the width of the zero character

191 #: from default font (first FONT record in the file). Excel adds some

192 #: extra space to the default width, depending on the default font and

193 #: default font size. The algorithm how to exactly calculate the resulting

194 #: column width is not known.

195 #: Example: The default width of 8 set in this record results in a column

196 #: width of 8.43 using Arial font with a size of 10 points.

197 #:

198 #: For the default hierarchy, refer to the :class:`Colinfo` class.

199 #:

200 #: .. versionadded:: 0.6.1

201 defcolwidth = None

202

203 #: Default column width from ``STANDARDWIDTH`` record, else ``None``.

204 #:

205 #: From the OOo docs:

206 #:

207 #: Default width of the columns in 1/256 of the width of the zero

208 #: character, using default font (first FONT record in the file).

209 #:

210 #: For the default hierarchy, refer to the :class:`Colinfo` class.

211 #:

212 #: .. versionadded:: 0.6.1

213 standardwidth = None

214

215 #: Default value to be used for a row if there is

216 #: no ``ROW`` record for that row.

217 #: From the *optional* ``DEFAULTROWHEIGHT`` record.

218 default_row_height = None

219

220 #: Default value to be used for a row if there is

221 #: no ``ROW`` record for that row.

222 #: From the *optional* ``DEFAULTROWHEIGHT`` record.

223 default_row_height_mismatch = None

224

225 #: Default value to be used for a row if there is

226 #: no ``ROW`` record for that row.

227 #: From the *optional* ``DEFAULTROWHEIGHT`` record.

228 default_row_hidden = None

229

230 #: Default value to be used for a row if there is

231 #: no ``ROW`` record for that row.

232 #: From the *optional* ``DEFAULTROWHEIGHT`` record.

233 default_additional_space_above = None

234

235 #: Default value to be used for a row if there is

236 #: no ``ROW`` record for that row.

237 #: From the *optional* ``DEFAULTROWHEIGHT`` record.

238 default_additional_space_below = None

239

240 #: Visibility of the sheet:

241 #: ::

242 #:

243 #: 0 = visible

244 #: 1 = hidden (can be unhidden by user -- Format -> Sheet -> Unhide)

245 #: 2 = "very hidden" (can be unhidden only by VBA macro).

246 visibility = 0

247

248 #: A 256-element tuple corresponding to the contents of the GCW record for

249 #: this sheet. If no such record, treat as all bits zero.

250 #: Applies to BIFF4-7 only. See docs of the :class:`Colinfo` class for

251 #: discussion.

252 gcw = (0, ) * 256

253

254 #: A list of :class:`Hyperlink` objects corresponding to ``HLINK`` records

255 #: found in the worksheet.

256 #:

257 #: .. versionadded:: 0.7.2

258 hyperlink_list = []

259

260 #: A sparse mapping from ``(rowx, colx)`` to an item in

261 #: :attr:`~xlrd.sheet.Sheet.hyperlink_list`.

262 #: Cells not covered by a hyperlink are not mapped.

263 #: It is possible using the Excel UI to set up a hyperlink that

264 #: covers a larger-than-1x1 rectangle of cells.

265 #: Hyperlink rectangles may overlap (Excel doesn't check).

266 #: When a multiply-covered cell is clicked on, the hyperlink that is

267 #: activated

268 #: (and the one that is mapped here) is the last in

269 #: :attr:`~xlrd.sheet.Sheet.hyperlink_list`.

270 #:

271 #: .. versionadded:: 0.7.2

272 hyperlink_map = {}

273

274 #: A sparse mapping from ``(rowx, colx)`` to a :class:`Note` object.

275 #: Cells not containing a note ("comment") are not mapped.

276 #:

277 #: .. versionadded:: 0.7.2

278 cell_note_map = {}

279

280 #: Number of columns in left pane (frozen panes; for split panes, see

281 #: comments in code)

282 vert_split_pos = 0

283

284 #: Number of rows in top pane (frozen panes; for split panes, see comments

285 #: in code)

286 horz_split_pos = 0

287

288 #: Index of first visible row in bottom frozen/split pane

289 horz_split_first_visible = 0

290

291 #: Index of first visible column in right frozen/split pane

292 vert_split_first_visible = 0

293

294 #: Frozen panes: ignore it. Split panes: explanation and diagrams in

295 #: OOo docs.

296 split_active_pane = 0

297

298 #: Boolean specifying if a ``PANE`` record was present, ignore unless you're

299 #: ``xlutils.copy``

300 has_pane_record = 0

301

302 #: A list of the horizontal page breaks in this sheet.

303 #: Breaks are tuples in the form

304 #: ``(index of row after break, start col index, end col index)``.

305 #:

306 #: Populated only if ``open_workbook(..., formatting_info=True)``

307 #:

308 #: .. versionadded:: 0.7.2

309 horizontal_page_breaks = []

310

311 #: A list of the vertical page breaks in this sheet.

312 #: Breaks are tuples in the form

313 #: ``(index of col after break, start row index, end row index)``.

314 #:

315 #: Populated only if ``open_workbook(..., formatting_info=True)``

316 #:

317 #: .. versionadded:: 0.7.2

318 vertical_page_breaks = []

319

320 def __init__(self, book, position, name, number):

321 self.book = book

322 self.biff_version = book.biff_version

323 self._position = position

324 self.logfile = book.logfile

325 self.bt = array('B', [XL_CELL_EMPTY])

326 self.bf = array('h', [-1])

327 self.name = name

328 self.number = number

329 self.verbosity = book.verbosity

330 self.formatting_info = book.formatting_info

331 self.ragged_rows = book.ragged_rows

332 if self.ragged_rows:

333 self.put_cell = self.put_cell_ragged

334 else:

335 self.put_cell = self.put_cell_unragged

336 self._xf_index_to_xl_type_map = book._xf_index_to_xl_type_map

337 self.nrows = 0 # actual, including possibly empty cells

338 self.ncols = 0

339 self._maxdatarowx = -1 # highest rowx containing a non-empty cell

340 self._maxdatacolx = -1 # highest colx containing a non-empty cell

341 self._dimnrows = 0 # as per DIMENSIONS record

342 self._dimncols = 0

343 self._cell_values = []

344 self._cell_types = []

345 self._cell_xf_indexes = []

346 self.defcolwidth = None

347 self.standardwidth = None

348 self.default_row_height = None

349 self.default_row_height_mismatch = 0

350 self.default_row_hidden = 0

351 self.default_additional_space_above = 0

352 self.default_additional_space_below = 0

353 self.colinfo_map = {}

354 self.rowinfo_map = {}

355 self.col_label_ranges = []

356 self.row_label_ranges = []

357 self.merged_cells = []

358 self.rich_text_runlist_map = {}

359 self.horizontal_page_breaks = []

360 self.vertical_page_breaks = []

361 self._xf_index_stats = [0, 0, 0, 0]

362 self.visibility = book._sheet_visibility[number] # from BOUNDSHEET record

363 for attr, defval in _WINDOW2_options:

364 setattr(self, attr, defval)

365 self.first_visible_rowx = 0

366 self.first_visible_colx = 0

367 self.gridline_colour_index = 0x40

368 self.gridline_colour_rgb = None # pre-BIFF8

369 self.hyperlink_list = []

370 self.hyperlink_map = {}

371 self.cell_note_map = {}

372

373 # Values calculated by xlrd to predict the mag factors that

374 # will actually be used by Excel to display your worksheet.

375 # Pass these values to xlwt when writing XLS files.

376 # Warning 1: Behaviour of OOo Calc and Gnumeric has been observed to differ from Excel's.

377 # Warning 2: A value of zero means almost exactly what it says. Your sheet will be

378 # displayed as a very tiny speck on the screen. xlwt will reject attempts to set

379 # a mag_factor that is not (10 <= mag_factor <= 400).

380 self.cooked_page_break_preview_mag_factor = 60

381 self.cooked_normal_view_mag_factor = 100

382

383 # Values (if any) actually stored on the XLS file

384 self.cached_page_break_preview_mag_factor = 0 # default (60%), from WINDOW2 record

385 self.cached_normal_view_mag_factor = 0 # default (100%), from WINDOW2 record

386 self.scl_mag_factor = None # from SCL record

387

388 self._ixfe = None # BIFF2 only

389 self._cell_attr_to_xfx = {} # BIFF2.0 only

390

391 if self.biff_version >= 80:

392 self.utter_max_rows = 65536

393 else:

394 self.utter_max_rows = 16384

395 self.utter_max_cols = 256

396

397 self._first_full_rowx = -1

398

399 # self._put_cell_exceptions = 0

400 # self._put_cell_row_widenings = 0

401 # self._put_cell_rows_appended = 0

402 # self._put_cell_cells_appended = 0

403

404 def cell(self, rowx, colx):

405 """

406 :class:`Cell` object in the given row and column.

407 """

408 if self.formatting_info:

409 xfx = self.cell_xf_index(rowx, colx)

410 else:

411 xfx = None

412 return Cell(

413 self._cell_types[rowx][colx],

414 self._cell_values[rowx][colx],

415 xfx,

416 )

417

418 def cell_value(self, rowx, colx):

419 "Value of the cell in the given row and column."

420 return self._cell_values[rowx][colx]

421

422 def cell_type(self, rowx, colx):

423 """

424 Type of the cell in the given row and column.

425

426 Refer to the documentation of the :class:`Cell` class.

427 """

428 return self._cell_types[rowx][colx]

429

430 def cell_xf_index(self, rowx, colx):

431 """

432 XF index of the cell in the given row and column.

433 This is an index into :attr:`~xlrd.book.Book.xf_list`.

434

435 .. versionadded:: 0.6.1

436 """

437 self.req_fmt_info()

438 xfx = self._cell_xf_indexes[rowx][colx]

439 if xfx > -1:

440 self._xf_index_stats[0] += 1

441 return xfx

442 # Check for a row xf_index

443 try:

444 xfx = self.rowinfo_map[rowx].xf_index

445 if xfx > -1:

446 self._xf_index_stats[1] += 1

447 return xfx

448 except KeyError:

449 pass

450 # Check for a column xf_index

451 try:

452 xfx = self.colinfo_map[colx].xf_index

453 if xfx == -1: xfx = 15

454 self._xf_index_stats[2] += 1

455 return xfx

456 except KeyError:

457 # If all else fails, 15 is used as hardwired global default xf_index.

458 self._xf_index_stats[3] += 1

459 return 15

460

461 def row_len(self, rowx):

462 """

463 Returns the effective number of cells in the given row. For use with

464 ``open_workbook(ragged_rows=True)`` which is likely to produce rows

465 with fewer than :attr:`~Sheet.ncols` cells.

466

467 .. versionadded:: 0.7.2

468 """

469 return len(self._cell_values[rowx])

470

471 def row(self, rowx):

472 """

473 Returns a sequence of the :class:`Cell` objects in the given row.

474 """

475 return [

476 self.cell(rowx, colx)

477 for colx in xrange(len(self._cell_values[rowx]))

478 ]

479

480 def __getitem__(self, item):

481 """

482 Takes either rowindex or (rowindex, colindex) as an index,

483 and returns either row or cell respectively.

484 """

485 try:

486 rowix, colix = item

487 except TypeError:

488 # it's not a tuple (or of right size), let's try indexing as is

489 # if this is a problem, let this error propagate back

490 return self.row(item)

491 else:

492 return self.cell(rowix, colix)

493

494 def get_rows(self):

495 "Returns a generator for iterating through each row."

496 return (self.row(index) for index in range(self.nrows))

497

498 # makes `for row in sheet` natural and intuitive

499 __iter__ = get_rows

500

501 def row_types(self, rowx, start_colx=0, end_colx=None):

502 """

503 Returns a slice of the types of the cells in the given row.

504 """

505 if end_colx is None:

506 return self._cell_types[rowx][start_colx:]

507 return self._cell_types[rowx][start_colx:end_colx]

508

509 def row_values(self, rowx, start_colx=0, end_colx=None):

510 """

511 Returns a slice of the values of the cells in the given row.

512 """

513 if end_colx is None:

514 return self._cell_values[rowx][start_colx:]

515 return self._cell_values[rowx][start_colx:end_colx]

516

517 def row_slice(self, rowx, start_colx=0, end_colx=None):

518 """

519 Returns a slice of the :class:`Cell` objects in the given row.

520 """

521 nc = len(self._cell_values[rowx])

522 if start_colx < 0:

523 start_colx += nc

524 if start_colx < 0:

525 start_colx = 0

526 if end_colx is None or end_colx > nc:

527 end_colx = nc

528 elif end_colx < 0:

529 end_colx += nc

530 return [

531 self.cell(rowx, colx)

532 for colx in xrange(start_colx, end_colx)

533 ]

534

535 def col_slice(self, colx, start_rowx=0, end_rowx=None):

536 """

537 Returns a slice of the :class:`Cell` objects in the given column.

538 """

539 nr = self.nrows

540 if start_rowx < 0:

541 start_rowx += nr

542 if start_rowx < 0:

543 start_rowx = 0

544 if end_rowx is None or end_rowx > nr:

545 end_rowx = nr

546 elif end_rowx < 0:

547 end_rowx += nr

548 return [

549 self.cell(rowx, colx)

550 for rowx in xrange(start_rowx, end_rowx)

551 ]

552

553 def col_values(self, colx, start_rowx=0, end_rowx=None):

554 """

555 Returns a slice of the values of the cells in the given column.

556 """

557 nr = self.nrows

558 if start_rowx < 0:

559 start_rowx += nr

560 if start_rowx < 0:

561 start_rowx = 0

562 if end_rowx is None or end_rowx > nr:

563 end_rowx = nr

564 elif end_rowx < 0:

565 end_rowx += nr

566 return [

567 self._cell_values[rowx][colx]

568 for rowx in xrange(start_rowx, end_rowx)

569 ]

570

571 def col_types(self, colx, start_rowx=0, end_rowx=None):

572 """

573 Returns a slice of the types of the cells in the given column.

574 """

575 nr = self.nrows

576 if start_rowx < 0:

577 start_rowx += nr

578 if start_rowx < 0:

579 start_rowx = 0

580 if end_rowx is None or end_rowx > nr:

581 end_rowx = nr

582 elif end_rowx < 0:

583 end_rowx += nr

584 return [

585 self._cell_types[rowx][colx]

586 for rowx in xrange(start_rowx, end_rowx)

587 ]

588

589 col = col_slice

590

591 # === Following methods are used in building the worksheet.

592 # === They are not part of the API.

593

594 def tidy_dimensions(self):

595 if self.verbosity >= 3:

596 fprintf(

597 self.logfile,

598 "tidy_dimensions: nrows=%d ncols=%d \n",

599 self.nrows, self.ncols,

600 )

601 if 1 and self.merged_cells:

602 nr = nc = 0

603 umaxrows = self.utter_max_rows

604 umaxcols = self.utter_max_cols

605 for crange in self.merged_cells:

606 rlo, rhi, clo, chi = crange

607 if not (0 <= rlo < rhi <= umaxrows) or not (0 <= clo < chi <= umaxcols):

608 fprintf(self.logfile,

609 "*** WARNING: sheet #%d (%r), MERGEDCELLS bad range %r\n",

610 self.number, self.name, crange)

611 if rhi > nr: nr = rhi

612 if chi > nc: nc = chi

613 if nc > self.ncols:

614 self.ncols = nc

615 self._first_full_rowx = -2

616 if nr > self.nrows:

617 # we put one empty cell at (nr-1,0) to make sure

618 # we have the right number of rows. The ragged rows

619 # will sort out the rest if needed.

620 self.put_cell(nr-1, 0, XL_CELL_EMPTY, UNICODE_LITERAL(''), -1)

621 if (self.verbosity >= 1 and

622 (self.nrows != self._dimnrows or self.ncols != self._dimncols)):

623 fprintf(

624 self.logfile,

625 "NOTE *** sheet %d (%r): DIMENSIONS R,C = %d,%d should be %d,%d\n",

626 self.number,

627 self.name,

628 self._dimnrows,

629 self._dimncols,

630 self.nrows,

631 self.ncols,

632 )

633 if not self.ragged_rows:

634 # fix ragged rows

635 ncols = self.ncols

636 s_cell_types = self._cell_types

637 s_cell_values = self._cell_values

638 s_cell_xf_indexes = self._cell_xf_indexes

639 s_fmt_info = self.formatting_info

640 # for rowx in xrange(self.nrows):

641 if self._first_full_rowx == -2:

642 ubound = self.nrows

643 else:

644 ubound = self._first_full_rowx

645 for rowx in xrange(ubound):

646 trow = s_cell_types[rowx]

647 rlen = len(trow)

648 nextra = ncols - rlen

649 if nextra > 0:

650 s_cell_values[rowx][rlen:] = [UNICODE_LITERAL('')] * nextra

651 trow[rlen:] = self.bt * nextra

652 if s_fmt_info:

653 s_cell_xf_indexes[rowx][rlen:] = self.bf * nextra

654

655 def put_cell_ragged(self, rowx, colx, ctype, value, xf_index):

656 if ctype is None:

657 # we have a number, so look up the cell type

658 ctype = self._xf_index_to_xl_type_map[xf_index]

659 assert 0 <= colx < self.utter_max_cols

660 assert 0 <= rowx < self.utter_max_rows

661 fmt_info = self.formatting_info

662

663 try:

664 nr = rowx + 1

665 if self.nrows < nr:

666

667 scta = self._cell_types.append

668 scva = self._cell_values.append

669 scxa = self._cell_xf_indexes.append

670 bt = self.bt

671 bf = self.bf

672 for _unused in xrange(self.nrows, nr):

673 scta(bt * 0)

674 scva([])

675 if fmt_info:

676 scxa(bf * 0)

677 self.nrows = nr

678

679 types_row = self._cell_types[rowx]

680 values_row = self._cell_values[rowx]

681 if fmt_info:

682 fmt_row = self._cell_xf_indexes[rowx]

683 ltr = len(types_row)

684 if colx >= self.ncols:

685 self.ncols = colx + 1

686 num_empty = colx - ltr

687 if not num_empty:

688 # most common case: colx == previous colx + 1

689 # self._put_cell_cells_appended += 1

690 types_row.append(ctype)

691 values_row.append(value)

692 if fmt_info:

693 fmt_row.append(xf_index)

694 return

695 if num_empty > 0:

696 num_empty += 1

697 # self._put_cell_row_widenings += 1

698 # types_row.extend(self.bt * num_empty)

699 # values_row.extend([UNICODE_LITERAL('')] * num_empty)

700 # if fmt_info:

701 # fmt_row.extend(self.bf * num_empty)

702 types_row[ltr:] = self.bt * num_empty

703 values_row[ltr:] = [UNICODE_LITERAL('')] * num_empty

704 if fmt_info:

705 fmt_row[ltr:] = self.bf * num_empty

706 types_row[colx] = ctype

707 values_row[colx] = value

708 if fmt_info:

709 fmt_row[colx] = xf_index

710 except:

711 print("put_cell", rowx, colx, file=self.logfile)

712 raise

713

714 def put_cell_unragged(self, rowx, colx, ctype, value, xf_index):

715 if ctype is None:

716 # we have a number, so look up the cell type

717 ctype = self._xf_index_to_xl_type_map[xf_index]

718 # assert 0 <= colx < self.utter_max_cols

719 # assert 0 <= rowx < self.utter_max_rows

720 try:

721 self._cell_types[rowx][colx] = ctype

722 self._cell_values[rowx][colx] = value

723 if self.formatting_info:

724 self._cell_xf_indexes[rowx][colx] = xf_index

725 except IndexError:

726 # print >> self.logfile, "put_cell extending", rowx, colx

727 # self.extend_cells(rowx+1, colx+1)

728 # self._put_cell_exceptions += 1

729 nr = rowx + 1

730 nc = colx + 1

731 assert 1 <= nc <= self.utter_max_cols

732 assert 1 <= nr <= self.utter_max_rows

733 if nc > self.ncols:

734 self.ncols = nc

735 # The row self._first_full_rowx and all subsequent rows

736 # are guaranteed to have length == self.ncols. Thus the

737 # "fix ragged rows" section of the tidy_dimensions method

738 # doesn't need to examine them.

739 if nr < self.nrows:

740 # cell data is not in non-descending row order *AND*

741 # self.ncols has been bumped up.

742 # This very rare case ruins this optimisation.

743 self._first_full_rowx = -2

744 elif rowx > self._first_full_rowx > -2:

745 self._first_full_rowx = rowx

746 if nr <= self.nrows:

747 # New cell is in an existing row, so extend that row (if necessary).

748 # Note that nr < self.nrows means that the cell data

749 # is not in ascending row order!!

750 trow = self._cell_types[rowx]

751 nextra = self.ncols - len(trow)

752 if nextra > 0:

753 # self._put_cell_row_widenings += 1

754 trow.extend(self.bt * nextra)

755 if self.formatting_info:

756 self._cell_xf_indexes[rowx].extend(self.bf * nextra)

757 self._cell_values[rowx].extend([UNICODE_LITERAL('')] * nextra)

758 else:

759 scta = self._cell_types.append

760 scva = self._cell_values.append

761 scxa = self._cell_xf_indexes.append

762 fmt_info = self.formatting_info

763 nc = self.ncols

764 bt = self.bt

765 bf = self.bf

766 for _unused in xrange(self.nrows, nr):

767 # self._put_cell_rows_appended += 1

768 scta(bt * nc)

769 scva([UNICODE_LITERAL('')] * nc)

770 if fmt_info:

771 scxa(bf * nc)

772 self.nrows = nr

773 # === end of code from extend_cells()

774 try:

775 self._cell_types[rowx][colx] = ctype

776 self._cell_values[rowx][colx] = value

777 if self.formatting_info:

778 self._cell_xf_indexes[rowx][colx] = xf_index

779 except:

780 print("put_cell", rowx, colx, file=self.logfile)

781 raise

782 except:

783 print("put_cell", rowx, colx, file=self.logfile)

784 raise

785

786

787 # === Methods after this line neither know nor care about how cells are stored.

788

789 def read(self, bk):

790 global rc_stats

791 DEBUG = 0

792 blah = DEBUG or self.verbosity >= 2

793 blah_rows = DEBUG or self.verbosity >= 4

794 blah_formulas = 0 and blah

795 r1c1 = 0

796 oldpos = bk._position

797 bk._position = self._position

798 XL_SHRFMLA_ETC_ETC = (

799 XL_SHRFMLA, XL_ARRAY, XL_TABLEOP, XL_TABLEOP2,

800 XL_ARRAY2, XL_TABLEOP_B2,

801 )

802 self_put_cell = self.put_cell

803 local_unpack = unpack

804 bk_get_record_parts = bk.get_record_parts

805 bv = self.biff_version

806 fmt_info = self.formatting_info

807 do_sst_rich_text = fmt_info and bk._rich_text_runlist_map

808 rowinfo_sharing_dict = {}

809 txos = {}

810 eof_found = 0

811 while 1:

812 # if DEBUG: print "SHEET.READ: about to read from position %d" % bk._position

813 rc, data_len, data = bk_get_record_parts()

814 # if rc in rc_stats:

815 # rc_stats[rc] += 1

816 # else:

817 # rc_stats[rc] = 1

818 # if DEBUG: print "SHEET.READ: op 0x%04x, %d bytes %r" % (rc, data_len, data)

819 if rc == XL_NUMBER:

820 # [:14] in following stmt ignores extraneous rubbish at end of record.

821 # Sample file testEON-8.xls supplied by Jan Kraus.

822 rowx, colx, xf_index, d = local_unpack('<HHHd', data[:14])

823 # if xf_index == 0:

824 # fprintf(self.logfile,

825 # "NUMBER: r=%d c=%d xfx=%d %f\n", rowx, colx, xf_index, d)

826 self_put_cell(rowx, colx, None, d, xf_index)

827 elif rc == XL_LABELSST:

828 rowx, colx, xf_index, sstindex = local_unpack('<HHHi', data)

829 # print "LABELSST", rowx, colx, sstindex, bk._sharedstrings[sstindex]

830 self_put_cell(rowx, colx, XL_CELL_TEXT, bk._sharedstrings[sstindex], xf_index)

831 if do_sst_rich_text:

832 runlist = bk._rich_text_runlist_map.get(sstindex)

833 if runlist:

834 self.rich_text_runlist_map[(rowx, colx)] = runlist

835 elif rc == XL_LABEL:

836 rowx, colx, xf_index = local_unpack('<HHH', data[0:6])

837 if bv < BIFF_FIRST_UNICODE:

838 strg = unpack_string(data, 6, bk.encoding or bk.derive_encoding(), lenlen=2)

839 else:

840 strg = unpack_unicode(data, 6, lenlen=2)

841 self_put_cell(rowx, colx, XL_CELL_TEXT, strg, xf_index)

842 elif rc == XL_RSTRING:

843 rowx, colx, xf_index = local_unpack('<HHH', data[0:6])

844 if bv < BIFF_FIRST_UNICODE:

845 strg, pos = unpack_string_update_pos(data, 6, bk.encoding or bk.derive_encoding(), lenlen=2)

846 nrt = BYTES_ORD(data[pos])

847 pos += 1

848 runlist = []

849 for _unused in xrange(nrt):

850 runlist.append(unpack('<BB', data[pos:pos+2]))

851 pos += 2

852 assert pos == len(data)

853 else:

854 strg, pos = unpack_unicode_update_pos(data, 6, lenlen=2)

855 nrt = unpack('<H', data[pos:pos+2])[0]

856 pos += 2

857 runlist = []

858 for _unused in xrange(nrt):

859 runlist.append(unpack('<HH', data[pos:pos+4]))

860 pos += 4

861 assert pos == len(data)

862 self_put_cell(rowx, colx, XL_CELL_TEXT, strg, xf_index)

863 self.rich_text_runlist_map[(rowx, colx)] = runlist

864 elif rc == XL_RK:

865 rowx, colx, xf_index = local_unpack('<HHH', data[:6])

866 d = unpack_RK(data[6:10])

867 self_put_cell(rowx, colx, None, d, xf_index)

868 elif rc == XL_MULRK:

869 mulrk_row, mulrk_first = local_unpack('<HH', data[0:4])

870 mulrk_last, = local_unpack('<H', data[-2:])

871 pos = 4

872 for colx in xrange(mulrk_first, mulrk_last+1):

873 xf_index, = local_unpack('<H', data[pos:pos+2])

874 d = unpack_RK(data[pos+2:pos+6])

875 pos += 6

876 self_put_cell(mulrk_row, colx, None, d, xf_index)

877 elif rc == XL_ROW:

878 # Version 0.6.0a3: ROW records are just not worth using (for memory allocation).

879 # Version 0.6.1: now used for formatting info.

880 if not fmt_info: continue

881 rowx, bits1, bits2 = local_unpack('<H4xH4xi', data[0:16])

882 if not(0 <= rowx < self.utter_max_rows):

883 print("*** NOTE: ROW record has row index %d; "

884 "should have 0 <= rowx < %d -- record ignored!"

885 % (rowx, self.utter_max_rows), file=self.logfile)

886 continue

887 key = (bits1, bits2)

888 r = rowinfo_sharing_dict.get(key)

889 if r is None:

890 rowinfo_sharing_dict[key] = r = Rowinfo()

891 # Using upkbits() is far too slow on a file

892 # with 30 sheets each with 10K rows :-(

893 # upkbits(r, bits1, (

894 # ( 0, 0x7FFF, 'height'),

895 # (15, 0x8000, 'has_default_height'),

896 # ))

897 # upkbits(r, bits2, (

898 # ( 0, 0x00000007, 'outline_level'),

899 # ( 4, 0x00000010, 'outline_group_starts_ends'),

900 # ( 5, 0x00000020, 'hidden'),

901 # ( 6, 0x00000040, 'height_mismatch'),

902 # ( 7, 0x00000080, 'has_default_xf_index'),

903 # (16, 0x0FFF0000, 'xf_index'),

904 # (28, 0x10000000, 'additional_space_above'),

905 # (29, 0x20000000, 'additional_space_below'),

906 # ))

907 # So:

908 r.height = bits1 & 0x7fff

909 r.has_default_height = (bits1 >> 15) & 1

910 r.outline_level = bits2 & 7

911 r.outline_group_starts_ends = (bits2 >> 4) & 1

912 r.hidden = (bits2 >> 5) & 1

913 r.height_mismatch = (bits2 >> 6) & 1

914 r.has_default_xf_index = (bits2 >> 7) & 1

915 r.xf_index = (bits2 >> 16) & 0xfff

916 r.additional_space_above = (bits2 >> 28) & 1

917 r.additional_space_below = (bits2 >> 29) & 1

918 if not r.has_default_xf_index:

919 r.xf_index = -1

920 self.rowinfo_map[rowx] = r

921 if 0 and r.xf_index > -1:

922 fprintf(self.logfile,

923 "**ROW %d %d %d\n",

924 self.number, rowx, r.xf_index)

925 if blah_rows:

926 print('ROW', rowx, bits1, bits2, file=self.logfile)

927 r.dump(self.logfile,

928 header="--- sh #%d, rowx=%d ---" % (self.number, rowx))

929 elif rc in XL_FORMULA_OPCODES: # 06, 0206, 0406

930 # DEBUG = 1

931 # if DEBUG: print "FORMULA: rc: 0x%04x data: %r" % (rc, data)

932 if bv >= 50:

933 rowx, colx, xf_index, result_str, flags = local_unpack('<HHH8sH', data[0:16])

934 elif bv >= 30:

935 rowx, colx, xf_index, result_str, flags = local_unpack('<HHH8sH', data[0:16])

936 else: # BIFF2

937 rowx, colx, cell_attr, result_str, flags = local_unpack('<HH3s8sB', data[0:16])

938 xf_index = self.fixed_BIFF2_xfindex(cell_attr, rowx, colx)

939 if blah_formulas: # testing formula dumper

940 #### XXXX FIXME

941 fprintf(self.logfile, "FORMULA: rowx=%d colx=%d\n", rowx, colx)

942 fmlalen = local_unpack("<H", data[20:22])[0]

943 decompile_formula(bk, data[22:], fmlalen, FMLA_TYPE_CELL,

944 browx=rowx, bcolx=colx, blah=1, r1c1=r1c1)

945 if result_str[6:8] == b"\xFF\xFF":

946 first_byte = BYTES_ORD(result_str[0])

947 if first_byte == 0:

948 # need to read next record (STRING)

949 gotstring = 0

950 # if flags & 8:

951 if 1: # "flags & 8" applies only to SHRFMLA

952 # actually there's an optional SHRFMLA or ARRAY etc record to skip over

953 rc2, data2_len, data2 = bk.get_record_parts()

954 if rc2 == XL_STRING or rc2 == XL_STRING_B2:

955 gotstring = 1

956 elif rc2 == XL_ARRAY:

957 row1x, rownx, col1x, colnx, array_flags, tokslen = \

958 local_unpack("<HHBBBxxxxxH", data2[:14])

959 if blah_formulas:

960 fprintf(self.logfile, "ARRAY: %d %d %d %d %d\n",

961 row1x, rownx, col1x, colnx, array_flags)

962 # dump_formula(bk, data2[14:], tokslen, bv, reldelta=0, blah=1)

963 elif rc2 == XL_SHRFMLA:

964 row1x, rownx, col1x, colnx, nfmlas, tokslen = \

965 local_unpack("<HHBBxBH", data2[:10])

966 if blah_formulas:

967 fprintf(self.logfile, "SHRFMLA (sub): %d %d %d %d %d\n",

968 row1x, rownx, col1x, colnx, nfmlas)

969 decompile_formula(bk, data2[10:], tokslen, FMLA_TYPE_SHARED,

970 blah=1, browx=rowx, bcolx=colx, r1c1=r1c1)

971 elif rc2 not in XL_SHRFMLA_ETC_ETC:

972 raise XLRDError(

973 "Expected SHRFMLA, ARRAY, TABLEOP* or STRING record; found 0x%04x" % rc2)

974 # if DEBUG: print "gotstring:", gotstring

975 # now for the STRING record

976 if not gotstring:

977 rc2, _unused_len, data2 = bk.get_record_parts()

978 if rc2 not in (XL_STRING, XL_STRING_B2):

979 raise XLRDError("Expected STRING record; found 0x%04x" % rc2)

980 # if DEBUG: print "STRING: data=%r BIFF=%d cp=%d" % (data2, self.biff_version, bk.encoding)

981 strg = self.string_record_contents(data2)

982 self.put_cell(rowx, colx, XL_CELL_TEXT, strg, xf_index)

983 # if DEBUG: print "FORMULA strg %r" % strg

984 elif first_byte == 1:

985 # boolean formula result

986 value = BYTES_ORD(result_str[2])

987 self_put_cell(rowx, colx, XL_CELL_BOOLEAN, value, xf_index)

988 elif first_byte == 2:

989 # Error in cell

990 value = BYTES_ORD(result_str[2])

991 self_put_cell(rowx, colx, XL_CELL_ERROR, value, xf_index)

992 elif first_byte == 3:

993 # empty ... i.e. empty (zero-length) string, NOT an empty cell.

994 self_put_cell(rowx, colx, XL_CELL_TEXT, "", xf_index)

995 else:

996 raise XLRDError("unexpected special case (0x%02x) in FORMULA" % first_byte)

997 else:

998 # it is a number

999 d = local_unpack('<d', result_str)[0]

1000 self_put_cell(rowx, colx, None, d, xf_index)

1001 elif rc == XL_BOOLERR:

1002 rowx, colx, xf_index, value, is_err = local_unpack('<HHHBB', data[:8])

1003 # Note OOo Calc 2.0 writes 9-byte BOOLERR records.

1004 # OOo docs say 8. Excel writes 8.

1005 cellty = (XL_CELL_BOOLEAN, XL_CELL_ERROR)[is_err]

1006 # if DEBUG: print "XL_BOOLERR", rowx, colx, xf_index, value, is_err

1007 self_put_cell(rowx, colx, cellty, value, xf_index)

1008 elif rc == XL_COLINFO:

1009 if not fmt_info: continue

1010 c = Colinfo()

1011 first_colx, last_colx, c.width, c.xf_index, flags \

1012 = local_unpack("<HHHHH", data[:10])

1013 #### Colinfo.width is denominated in 256ths of a character,

1014 #### *not* in characters.

1015 if not(0 <= first_colx <= last_colx <= 256):

1016 # Note: 256 instead of 255 is a common mistake.

1017 # We silently ignore the non-existing 257th column in that case.

1018 print("*** NOTE: COLINFO record has first col index %d, last %d; "

1019 "should have 0 <= first <= last <= 255 -- record ignored!"

1020 % (first_colx, last_colx), file=self.logfile)

1021 del c

1022 continue

1023 upkbits(c, flags, (

1024 ( 0, 0x0001, 'hidden'),

1025 ( 1, 0x0002, 'bit1_flag'),

1026 # *ALL* colinfos created by Excel in "default" cases are 0x0002!!

1027 # Maybe it's "locked" by analogy with XFProtection data.

1028 ( 8, 0x0700, 'outline_level'),

1029 (12, 0x1000, 'collapsed'),

1030 ))

1031 for colx in xrange(first_colx, last_colx+1):

1032 if colx > 255: break # Excel does 0 to 256 inclusive

1033 self.colinfo_map[colx] = c

1034 if 0:

1035 fprintf(self.logfile,

1036 "**COL %d %d %d\n",

1037 self.number, colx, c.xf_index)

1038 if blah:

1039 fprintf(

1040 self.logfile,

1041 "COLINFO sheet #%d cols %d-%d: wid=%d xf_index=%d flags=0x%04x\n",

1042 self.number, first_colx, last_colx, c.width, c.xf_index, flags,

1043 )

1044 c.dump(self.logfile, header='===')

1045 elif rc == XL_DEFCOLWIDTH:

1046 self.defcolwidth, = local_unpack("<H", data[:2])

1047 if 0: print('DEFCOLWIDTH', self.defcolwidth, file=self.logfile)

1048 elif rc == XL_STANDARDWIDTH:

1049 if data_len != 2:

1050 print('*** ERROR *** STANDARDWIDTH', data_len, repr(data), file=self.logfile)

1051 self.standardwidth, = local_unpack("<H", data[:2])

1052 if 0: print('STANDARDWIDTH', self.standardwidth, file=self.logfile)

1053 elif rc == XL_GCW:

1054 if not fmt_info: continue # useless w/o COLINFO

1055 assert data_len == 34

1056 assert data[0:2] == b"\x20\x00"

1057 iguff = unpack("<8i", data[2:34])

1058 gcw = []

1059 for bits in iguff:

1060 for j in xrange(32):

1061 gcw.append(bits & 1)

1062 bits >>= 1

1063 self.gcw = tuple(gcw)

1064 if 0:

1065 showgcw = "".join(map(lambda x: "F "[x], gcw)).rstrip().replace(' ', '.')

1066 print("GCW:", showgcw, file=self.logfile)

1067 elif rc == XL_BLANK:

1068 if not fmt_info: continue

1069 rowx, colx, xf_index = local_unpack('<HHH', data[:6])

1070 # if 0: print >> self.logfile, "BLANK", rowx, colx, xf_index

1071 self_put_cell(rowx, colx, XL_CELL_BLANK, '', xf_index)

1072 elif rc == XL_MULBLANK: # 00BE

1073 if not fmt_info: continue

1074 nitems = data_len >> 1

1075 result = local_unpack("<%dH" % nitems, data)

1076 rowx, mul_first = result[:2]

1077 mul_last = result[-1]

1078 # print >> self.logfile, "MULBLANK", rowx, mul_first, mul_last, data_len, nitems, mul_last + 4 - mul_first

1079 assert nitems == mul_last + 4 - mul_first

1080 pos = 2

1081 for colx in xrange(mul_first, mul_last + 1):

1082 self_put_cell(rowx, colx, XL_CELL_BLANK, '', result[pos])

1083 pos += 1

1084 elif rc == XL_DIMENSION or rc == XL_DIMENSION2:

1085 if data_len == 0:

1086 # Four zero bytes after some other record. See github issue 64.

1087 continue

1088 # if data_len == 10:

1089 # Was crashing on BIFF 4.0 file w/o the two trailing unused bytes.

1090 # Reported by Ralph Heimburger.

1091 if bv < 80:

1092 dim_tuple = local_unpack('<HxxH', data[2:8])

1093 else:

1094 dim_tuple = local_unpack('<ixxH', data[4:12])

1095 self.nrows, self.ncols = 0, 0

1096 self._dimnrows, self._dimncols = dim_tuple

1097 if bv in (21, 30, 40) and self.book.xf_list and not self.book._xf_epilogue_done:

1098 self.book.xf_epilogue()

1099 if blah:

1100 fprintf(

1101 self.logfile,

1102 "sheet %d(%r) DIMENSIONS: ncols=%d nrows=%d\n",

1103 self.number, self.name, self._dimncols, self._dimnrows

1104 )

1105 elif rc == XL_HLINK:

1106 self.handle_hlink(data)

1107 elif rc == XL_QUICKTIP:

1108 self.handle_quicktip(data)

1109 elif rc == XL_EOF:

1110 DEBUG = 0

1111 if DEBUG: print("SHEET.READ: EOF", file=self.logfile)

1112 eof_found = 1

1113 break

1114 elif rc == XL_OBJ:

1115 # handle SHEET-level objects; note there's a separate Book.handle_obj

1116 saved_obj = self.handle_obj(data)

1117 if saved_obj: saved_obj_id = saved_obj.id

1118 else: saved_obj_id = None

1119 elif rc == XL_MSO_DRAWING:

1120 self.handle_msodrawingetc(rc, data_len, data)

1121 elif rc == XL_TXO:

1122 txo = self.handle_txo(data)

1123 if txo and saved_obj_id:

1124 txos[saved_obj_id] = txo

1125 saved_obj_id = None

1126 elif rc == XL_NOTE:

1127 self.handle_note(data, txos)

1128 elif rc == XL_FEAT11:

1129 self.handle_feat11(data)

1130 elif rc in bofcodes: ##### EMBEDDED BOF #####

1131 version, boftype = local_unpack('<HH', data[0:4])

1132 if boftype != 0x20: # embedded chart

1133 print("*** Unexpected embedded BOF (0x%04x) at offset %d: version=0x%04x type=0x%04x"

1134 % (rc, bk._position - data_len - 4, version, boftype), file=self.logfile)

1135 while 1:

1136 code, data_len, data = bk.get_record_parts()

1137 if code == XL_EOF:

1138 break

1139 if DEBUG: print("---> found EOF", file=self.logfile)

1140 elif rc == XL_COUNTRY:

1141 bk.handle_country(data)

1142 elif rc == XL_LABELRANGES:

1143 pos = 0

1144 pos = unpack_cell_range_address_list_update_pos(

1145 self.row_label_ranges, data, pos, bv, addr_size=8,

1146 )

1147 pos = unpack_cell_range_address_list_update_pos(

1148 self.col_label_ranges, data, pos, bv, addr_size=8,

1149 )

1150 assert pos == data_len

1151 elif rc == XL_ARRAY:

1152 row1x, rownx, col1x, colnx, array_flags, tokslen = \

1153 local_unpack("<HHBBBxxxxxH", data[:14])

1154 if blah_formulas:

1155 print("ARRAY:", row1x, rownx, col1x, colnx, array_flags, file=self.logfile)

1156 # dump_formula(bk, data[14:], tokslen, bv, reldelta=0, blah=1)

1157 elif rc == XL_SHRFMLA:

1158 row1x, rownx, col1x, colnx, nfmlas, tokslen = \

1159 local_unpack("<HHBBxBH", data[:10])

1160 if blah_formulas:

1161 print("SHRFMLA (main):", row1x, rownx, col1x, colnx, nfmlas, file=self.logfile)

1162 decompile_formula(bk, data[10:], tokslen, FMLA_TYPE_SHARED,

1163 blah=1, browx=rowx, bcolx=colx, r1c1=r1c1)

1164 elif rc == XL_CONDFMT:

1165 if not fmt_info: continue

1166 assert bv >= 80

1167 num_CFs, needs_recalc, browx1, browx2, bcolx1, bcolx2 = \

1168 unpack("<6H", data[0:12])

1169 if self.verbosity >= 1:

1170 fprintf(

1171 self.logfile,

1172 "\n*** WARNING: Ignoring CONDFMT (conditional formatting) record\n"

1173 "*** in Sheet %d (%r).\n"

1174 "*** %d CF record(s); needs_recalc_or_redraw = %d\n"

1175 "*** Bounding box is %s\n",

1176 self.number, self.name, num_CFs, needs_recalc,

1177 rangename2d(browx1, browx2+1, bcolx1, bcolx2+1),

1178 )

1179 olist = [] # updated by the function

1180 pos = unpack_cell_range_address_list_update_pos(

1181 olist, data, 12, bv, addr_size=8)

1182 # print >> self.logfile, repr(result), len(result)

1183 if self.verbosity >= 1:

1184 fprintf(

1185 self.logfile,

1186 "*** %d individual range(s):\n"

1187 "*** %s\n",

1188 len(olist),

1189 ", ".join(rangename2d(*coords) for coords in olist),

1190 )

1191 elif rc == XL_CF:

1192 if not fmt_info: continue

1193 cf_type, cmp_op, sz1, sz2, flags = unpack("<BBHHi", data[0:10])

1194 font_block = (flags >> 26) & 1

1195 bord_block = (flags >> 28) & 1

1196 patt_block = (flags >> 29) & 1

1197 if self.verbosity >= 1:

1198 fprintf(

1199 self.logfile,

1200 "\n*** WARNING: Ignoring CF (conditional formatting) sub-record.\n"

1201 "*** cf_type=%d, cmp_op=%d, sz1=%d, sz2=%d, flags=0x%08x\n"

1202 "*** optional data blocks: font=%d, border=%d, pattern=%d\n",

1203 cf_type, cmp_op, sz1, sz2, flags,

1204 font_block, bord_block, patt_block,

1205 )

1206 # hex_char_dump(data, 0, data_len, fout=self.logfile)

1207 pos = 12

1208 if font_block:

1209 (font_height, font_options, weight, escapement, underline,

1210 font_colour_index, two_bits, font_esc, font_underl) = unpack("<64x i i H H B 3x i 4x i i i 18x", data[pos:pos+118])

1211 font_style = (two_bits > 1) & 1

1212 posture = (font_options > 1) & 1

1213 font_canc = (two_bits > 7) & 1

1214 cancellation = (font_options > 7) & 1

1215 if self.verbosity >= 1:

1216 fprintf(

1217 self.logfile,

1218 "*** Font info: height=%d, weight=%d, escapement=%d,\n"

1219 "*** underline=%d, colour_index=%d, esc=%d, underl=%d,\n"

1220 "*** style=%d, posture=%d, canc=%d, cancellation=%d\n",

1221 font_height, weight, escapement, underline,

1222 font_colour_index, font_esc, font_underl,

1223 font_style, posture, font_canc, cancellation,

1224 )

1225 pos += 118

1226 if bord_block:

1227 pos += 8

1228 if patt_block:

1229 pos += 4

1230 fmla1 = data[pos:pos+sz1]

1231 pos += sz1

1232 if blah and sz1:

1233 fprintf(self.logfile, "*** formula 1:\n")

1234 dump_formula(bk, fmla1, sz1, bv, reldelta=0, blah=1)

1235 fmla2 = data[pos:pos+sz2]

1236 pos += sz2

1237 assert pos == data_len

1238 if blah and sz2:

1239 fprintf(self.logfile, "*** formula 2:\n")

1240 dump_formula(bk, fmla2, sz2, bv, reldelta=0, blah=1)

1241 elif rc == XL_DEFAULTROWHEIGHT:

1242 if data_len == 4:

1243 bits, self.default_row_height = unpack("<HH", data[:4])

1244 elif data_len == 2:

1245 self.default_row_height, = unpack("<H", data)

1246 bits = 0

1247 fprintf(self.logfile,

1248 "*** WARNING: DEFAULTROWHEIGHT record len is 2, "

1249 "should be 4; assuming BIFF2 format\n")

1250 else:

1251 bits = 0

1252 fprintf(self.logfile,

1253 "*** WARNING: DEFAULTROWHEIGHT record len is %d, "

1254 "should be 4; ignoring this record\n",

1255 data_len)

1256 self.default_row_height_mismatch = bits & 1

1257 self.default_row_hidden = (bits >> 1) & 1

1258 self.default_additional_space_above = (bits >> 2) & 1

1259 self.default_additional_space_below = (bits >> 3) & 1

1260 elif rc == XL_MERGEDCELLS:

1261 if not fmt_info: continue

1262 pos = unpack_cell_range_address_list_update_pos(

1263 self.merged_cells, data, 0, bv, addr_size=8)

1264 if blah:

1265 fprintf(self.logfile,

1266 "MERGEDCELLS: %d ranges\n", (pos - 2) // 8)

1267 assert pos == data_len, \

1268 "MERGEDCELLS: pos=%d data_len=%d" % (pos, data_len)

1269 elif rc == XL_WINDOW2:

1270 if bv >= 80 and data_len >= 14:

1271 (

1272 options,

1273 self.first_visible_rowx, self.first_visible_colx,

1274 self.gridline_colour_index,

1275 self.cached_page_break_preview_mag_factor,

1276 self.cached_normal_view_mag_factor

1277 ) = unpack("<HHHHxxHH", data[:14])

1278 else:

1279 assert bv >= 30 # BIFF3-7

1280 (

1281 options,

1282 self.first_visible_rowx, self.first_visible_colx,

1283 ) = unpack("<HHH", data[:6])

1284 self.gridline_colour_rgb = unpack("<BBB", data[6:9])

1285 self.gridline_colour_index = nearest_colour_index(

1286 self.book.colour_map, self.gridline_colour_rgb, debug=0)

1287 # options -- Bit, Mask, Contents:

1288 # 0 0001H 0 = Show formula results 1 = Show formulas

1289 # 1 0002H 0 = Do not show grid lines 1 = Show grid lines

1290 # 2 0004H 0 = Do not show sheet headers 1 = Show sheet headers

1291 # 3 0008H 0 = Panes are not frozen 1 = Panes are frozen (freeze)

1292 # 4 0010H 0 = Show zero values as empty cells 1 = Show zero values

1293 # 5 0020H 0 = Manual grid line colour 1 = Automatic grid line colour

1294 # 6 0040H 0 = Columns from left to right 1 = Columns from right to left

1295 # 7 0080H 0 = Do not show outline symbols 1 = Show outline symbols

1296 # 8 0100H 0 = Keep splits if pane freeze is removed 1 = Remove splits if pane freeze is removed

1297 # 9 0200H 0 = Sheet not selected 1 = Sheet selected (BIFF5-BIFF8)

1298 # 10 0400H 0 = Sheet not visible 1 = Sheet visible (BIFF5-BIFF8)

1299 # 11 0800H 0 = Show in normal view 1 = Show in page break preview (BIFF8)

1300 # The freeze flag specifies, if a following PANE record (6.71) describes unfrozen or frozen panes.

1301 for attr, _unused_defval in _WINDOW2_options:

1302 setattr(self, attr, options & 1)

1303 options >>= 1

1304 elif rc == XL_SCL:

1305 num, den = unpack("<HH", data)

1306 result = 0

1307 if den:

1308 result = (num * 100) // den

1309 if not(10 <= result <= 400):

1310 if DEBUG or self.verbosity >= 0:

1311 print(

1312 "WARNING *** SCL rcd sheet %d: should have 0.1 <= num/den <= 4; got %d/%d"

1313 % (self.number, num, den),

1314 file=self.logfile,

1315 )

1316 result = 100

1317 self.scl_mag_factor = result

1318 elif rc == XL_PANE:

1319 (

1320 self.vert_split_pos,

1321 self.horz_split_pos,

1322 self.horz_split_first_visible,

1323 self.vert_split_first_visible,

1324 self.split_active_pane,

1325 ) = unpack("<HHHHB", data[:9])

1326 self.has_pane_record = 1

1327 elif rc == XL_HORIZONTALPAGEBREAKS:

1328 if not fmt_info: continue

1329 num_breaks, = local_unpack("<H", data[:2])

1330 assert num_breaks * (2 + 4 * (bv >= 80)) + 2 == data_len

1331 pos = 2

1332 if bv < 80:

1333 while pos < data_len:

1334 self.horizontal_page_breaks.append((local_unpack("<H", data[pos:pos+2])[0], 0, 255))

1335 pos += 2

1336 else:

1337 while pos < data_len:

1338 self.horizontal_page_breaks.append(local_unpack("<HHH", data[pos:pos+6]))

1339 pos += 6

1340 elif rc == XL_VERTICALPAGEBREAKS:

1341 if not fmt_info: continue

1342 num_breaks, = local_unpack("<H", data[:2])

1343 assert num_breaks * (2 + 4 * (bv >= 80)) + 2 == data_len

1344 pos = 2

1345 if bv < 80:

1346 while pos < data_len:

1347 self.vertical_page_breaks.append((local_unpack("<H", data[pos:pos+2])[0], 0, 65535))

1348 pos += 2

1349 else:

1350 while pos < data_len:

1351 self.vertical_page_breaks.append(local_unpack("<HHH", data[pos:pos+6]))

1352 pos += 6

1353 #### all of the following are for BIFF <= 4W

1354 elif bv <= 45:

1355 if rc == XL_FORMAT or rc == XL_FORMAT2:

1356 bk.handle_format(data, rc)

1357 elif rc == XL_FONT or rc == XL_FONT_B3B4:

1358 bk.handle_font(data)

1359 elif rc == XL_STYLE:

1360 if not self.book._xf_epilogue_done:

1361 self.book.xf_epilogue()

1362 bk.handle_style(data)

1363 elif rc == XL_PALETTE:

1364 bk.handle_palette(data)

1365 elif rc == XL_BUILTINFMTCOUNT:

1366 bk.handle_builtinfmtcount(data)

1367 elif rc == XL_XF4 or rc == XL_XF3 or rc == XL_XF2: #### N.B. not XL_XF

1368 bk.handle_xf(data)

1369 elif rc == XL_DATEMODE:

1370 bk.handle_datemode(data)

1371 elif rc == XL_CODEPAGE:

1372 bk.handle_codepage(data)

1373 elif rc == XL_FILEPASS:

1374 bk.handle_filepass(data)

1375 elif rc == XL_WRITEACCESS:

1376 bk.handle_writeaccess(data)

1377 elif rc == XL_IXFE:

1378 self._ixfe = local_unpack('<H', data)[0]

1379 elif rc == XL_NUMBER_B2:

1380 rowx, colx, cell_attr, d = local_unpack('<HH3sd', data)

1381 self_put_cell(rowx, colx, None, d, self.fixed_BIFF2_xfindex(cell_attr, rowx, colx))

1382 elif rc == XL_INTEGER:

1383 rowx, colx, cell_attr, d = local_unpack('<HH3sH', data)

1384 self_put_cell(rowx, colx, None, float(d), self.fixed_BIFF2_xfindex(cell_attr, rowx, colx))

1385 elif rc == XL_LABEL_B2:

1386 rowx, colx, cell_attr = local_unpack('<HH3s', data[0:7])

1387 strg = unpack_string(data, 7, bk.encoding or bk.derive_encoding(), lenlen=1)

1388 self_put_cell(rowx, colx, XL_CELL_TEXT, strg, self.fixed_BIFF2_xfindex(cell_attr, rowx, colx))

1389 elif rc == XL_BOOLERR_B2:

1390 rowx, colx, cell_attr, value, is_err = local_unpack('<HH3sBB', data)

1391 cellty = (XL_CELL_BOOLEAN, XL_CELL_ERROR)[is_err]

1392 # if DEBUG: print "XL_BOOLERR_B2", rowx, colx, cell_attr, value, is_err

1393 self_put_cell(rowx, colx, cellty, value, self.fixed_BIFF2_xfindex(cell_attr, rowx, colx))

1394 elif rc == XL_BLANK_B2:

1395 if not fmt_info: continue

1396 rowx, colx, cell_attr = local_unpack('<HH3s', data[:7])

1397 self_put_cell(rowx, colx, XL_CELL_BLANK, '', self.fixed_BIFF2_xfindex(cell_attr, rowx, colx))

1398 elif rc == XL_EFONT:

1399 bk.handle_efont(data)

1400 elif rc == XL_ROW_B2:

1401 if not fmt_info: continue

1402 rowx, bits1, bits2 = local_unpack('<H4xH2xB', data[0:11])

1403 if not(0 <= rowx < self.utter_max_rows):

1404 print("*** NOTE: ROW_B2 record has row index %d; "

1405 "should have 0 <= rowx < %d -- record ignored!"

1406 % (rowx, self.utter_max_rows), file=self.logfile)

1407 continue

1408 if not (bits2 & 1): # has_default_xf_index is false

1409 xf_index = -1

1410 elif data_len == 18:

1411 # Seems the XF index in the cell_attr is dodgy

1412 xfx = local_unpack('<H', data[16:18])[0]

1413 xf_index = self.fixed_BIFF2_xfindex(cell_attr=None, rowx=rowx, colx=-1, true_xfx=xfx)

1414 else:

1415 cell_attr = data[13:16]

1416 xf_index = self.fixed_BIFF2_xfindex(cell_attr, rowx, colx=-1)

1417 key = (bits1, bits2, xf_index)

1418 r = rowinfo_sharing_dict.get(key)

1419 if r is None:

1420 rowinfo_sharing_dict[key] = r = Rowinfo()

1421 r.height = bits1 & 0x7fff

1422 r.has_default_height = (bits1 >> 15) & 1

1423 r.has_default_xf_index = bits2 & 1

1424 r.xf_index = xf_index

1425 # r.outline_level = 0 # set in __init__

1426 # r.outline_group_starts_ends = 0 # set in __init__

1427 # r.hidden = 0 # set in __init__

1428 # r.height_mismatch = 0 # set in __init__

1429 # r.additional_space_above = 0 # set in __init__

1430 # r.additional_space_below = 0 # set in __init__

1431 self.rowinfo_map[rowx] = r

1432 if 0 and r.xf_index > -1:

1433 fprintf(self.logfile,

1434 "**ROW %d %d %d\n",

1435 self.number, rowx, r.xf_index)

1436 if blah_rows:

1437 print('ROW_B2', rowx, bits1, file=self.logfile)

1438 r.dump(self.logfile,

1439 header="--- sh #%d, rowx=%d ---" % (self.number, rowx))

1440 elif rc == XL_COLWIDTH: # BIFF2 only

1441 if not fmt_info: continue

1442 first_colx, last_colx, width\

1443 = local_unpack("<BBH", data[:4])

1444 if not(first_colx <= last_colx):

1445 print("*** NOTE: COLWIDTH record has first col index %d, last %d; "

1446 "should have first <= last -- record ignored!"

1447 % (first_colx, last_colx), file=self.logfile)

1448 continue

1449 for colx in xrange(first_colx, last_colx+1):

1450 if colx in self.colinfo_map:

1451 c = self.colinfo_map[colx]

1452 else:

1453 c = Colinfo()

1454 self.colinfo_map[colx] = c

1455 c.width = width

1456 if blah:

1457 fprintf(

1458 self.logfile,

1459 "COLWIDTH sheet #%d cols %d-%d: wid=%d\n",

1460 self.number, first_colx, last_colx, width,

1461 )

1462 elif rc == XL_COLUMNDEFAULT: # BIFF2 only

1463 if not fmt_info: continue

1464 first_colx, last_colx = local_unpack("<HH", data[:4])

1465 #### Warning OOo docs wrong; first_colx <= colx < last_colx

1466 if blah:

1467 fprintf(

1468 self.logfile,

1469 "COLUMNDEFAULT sheet #%d cols in range(%d, %d)\n",

1470 self.number, first_colx, last_colx,

1471 )

1472 if not(0 <= first_colx < last_colx <= 256):

1473 print("*** NOTE: COLUMNDEFAULT record has first col index %d, last %d; "

1474 "should have 0 <= first < last <= 256"

1475 % (first_colx, last_colx), file=self.logfile)

1476 last_colx = min(last_colx, 256)

1477 for colx in xrange(first_colx, last_colx):

1478 offset = 4 + 3 * (colx - first_colx)

1479 cell_attr = data[offset:offset+3]

1480 xf_index = self.fixed_BIFF2_xfindex(cell_attr, rowx=-1, colx=colx)

1481 if colx in self.colinfo_map:

1482 c = self.colinfo_map[colx]

1483 else:

1484 c = Colinfo()

1485 self.colinfo_map[colx] = c

1486 c.xf_index = xf_index

1487 elif rc == XL_WINDOW2_B2: # BIFF 2 only

1488 attr_names = ("show_formulas", "show_grid_lines", "show_sheet_headers",

1489 "panes_are_frozen", "show_zero_values")

1490 for attr, char in zip(attr_names, data[0:5]):

1491 setattr(self, attr, int(char != b'\0'))

1492 (

1493 self.first_visible_rowx, self.first_visible_colx,

1494 self.automatic_grid_line_colour,

1495 ) = unpack("<HHB", data[5:10])

1496 self.gridline_colour_rgb = unpack("<BBB", data[10:13])

1497 self.gridline_colour_index = nearest_colour_index(

1498 self.book.colour_map, self.gridline_colour_rgb, debug=0)

1499 else:

1500 # if DEBUG: print "SHEET.READ: Unhandled record type %02x %d bytes %r" % (rc, data_len, data)

1501 pass

1502 if not eof_found:

1503 raise XLRDError("Sheet %d (%r) missing EOF record"

1504 % (self.number, self.name))

1505 self.tidy_dimensions()

1506 self.update_cooked_mag_factors()

1507 bk._position = oldpos

1508 return 1

1509

1510 def string_record_contents(self, data):

1511 bv = self.biff_version

1512 bk = self.book

1513 lenlen = (bv >= 30) + 1

1514 nchars_expected = unpack("<" + "BH"[lenlen - 1], data[:lenlen])[0]

1515 offset = lenlen

1516 if bv < 80:

1517 enc = bk.encoding or bk.derive_encoding()

1518 nchars_found = 0

1519 result = UNICODE_LITERAL("")

1520 while 1:

1521 if bv >= 80:

1522 flag = BYTES_ORD(data[offset]) & 1

1523 enc = ("latin_1", "utf_16_le")[flag]

1524 offset += 1

1525 chunk = unicode(data[offset:], enc)

1526 result += chunk

1527 nchars_found += len(chunk)

1528 if nchars_found == nchars_expected:

1529 return result

1530 if nchars_found > nchars_expected:

1531 msg = ("STRING/CONTINUE: expected %d chars, found %d"

1532 % (nchars_expected, nchars_found))

1533 raise XLRDError(msg)

1534 rc, _unused_len, data = bk.get_record_parts()

1535 if rc != XL_CONTINUE:

1536 raise XLRDError(

1537 "Expected CONTINUE record; found record-type 0x%04X" % rc)

1538 offset = 0

1539

1540 def update_cooked_mag_factors(self):

1541 # Cached values are used ONLY for the non-active view mode.

1542 # When the user switches to the non-active view mode,

1543 # if the cached value for that mode is not valid,

1544 # Excel pops up a window which says:

1545 # "The number must be between 10 and 400. Try again by entering a number in this range."

1546 # When the user hits OK, it drops into the non-active view mode

1547 # but uses the magn from the active mode.

1548 # NOTE: definition of "valid" depends on mode ... see below

1549 blah = DEBUG or self.verbosity > 0

1550 if self.show_in_page_break_preview:

1551 if self.scl_mag_factor is None: # no SCL record

1552 self.cooked_page_break_preview_mag_factor = 100 # Yes, 100, not 60, NOT a typo

1553 else:

1554 self.cooked_page_break_preview_mag_factor = self.scl_mag_factor

1555 zoom = self.cached_normal_view_mag_factor

1556 if not (10 <= zoom <=400):

1557 if blah:

1558 print(

1559 "WARNING *** WINDOW2 rcd sheet %d: Bad cached_normal_view_mag_factor: %d"

1560 % (self.number, self.cached_normal_view_mag_factor),

1561 file=self.logfile,

1562 )

1563 zoom = self.cooked_page_break_preview_mag_factor

1564 self.cooked_normal_view_mag_factor = zoom

1565 else:

1566 # normal view mode

1567 if self.scl_mag_factor is None: # no SCL record

1568 self.cooked_normal_view_mag_factor = 100

1569 else:

1570 self.cooked_normal_view_mag_factor = self.scl_mag_factor

1571 zoom = self.cached_page_break_preview_mag_factor

1572 if not zoom:

1573 # VALID, defaults to 60

1574 zoom = 60

1575 elif not (10 <= zoom <= 400):

1576 if blah:

1577 print(

1578 "WARNING *** WINDOW2 rcd sheet %r: Bad cached_page_break_preview_mag_factor: %r"

1579 % (self.number, self.cached_page_break_preview_mag_factor),

1580 file=self.logfile,

1581 )

1582 zoom = self.cooked_normal_view_mag_factor

1583 self.cooked_page_break_preview_mag_factor = zoom

1584

1585 def fixed_BIFF2_xfindex(self, cell_attr, rowx, colx, true_xfx=None):

1586 DEBUG = 0

1587 blah = DEBUG or self.verbosity >= 2

1588 if self.biff_version == 21:

1589 if self.book.xf_list:

1590 if true_xfx is not None:

1591 xfx = true_xfx

1592 else:

1593 xfx = BYTES_ORD(cell_attr[0]) & 0x3F

1594 if xfx == 0x3F:

1595 if self._ixfe is None:

1596 raise XLRDError("BIFF2 cell record has XF index 63 but no preceding IXFE record.")

1597 xfx = self._ixfe

1598 # OOo docs are capable of interpretation that each

1599 # cell record is preceded immediately by its own IXFE record.

1600 # Empirical evidence is that (sensibly) an IXFE record applies to all

1601 # following cell records until another IXFE comes along.

1602 return xfx

1603 # Have either Excel 2.0, or broken 2.1 w/o XF records -- same effect.

1604 self.biff_version = self.book.biff_version = 20

1605 #### check that XF slot in cell_attr is zero

1606 xfx_slot = BYTES_ORD(cell_attr[0]) & 0x3F

1607 assert xfx_slot == 0

1608 xfx = self._cell_attr_to_xfx.get(cell_attr)

1609 if xfx is not None:

1610 return xfx

1611 if blah:

1612 fprintf(self.logfile, "New cell_attr %r at (%r, %r)\n", cell_attr, rowx, colx)

1613 if not self.book.xf_list:

1614 for xfx in xrange(16):

1615 self.insert_new_BIFF20_xf(cell_attr=b"\x40\x00\x00", style=xfx < 15)

1616 xfx = self.insert_new_BIFF20_xf(cell_attr=cell_attr)

1617 return xfx

1618

1619 def insert_new_BIFF20_xf(self, cell_attr, style=0):

1620 DEBUG = 0

1621 blah = DEBUG or self.verbosity >= 2

1622 book = self.book

1623 xfx = len(book.xf_list)

1624 xf = self.fake_XF_from_BIFF20_cell_attr(cell_attr, style)

1625 xf.xf_index = xfx

1626 book.xf_list.append(xf)

1627 if blah:

1628 xf.dump(self.logfile, header="=== Faked XF %d ===" % xfx, footer="======")

1629 if xf.format_key not in book.format_map:

1630 if xf.format_key:

1631 msg = "ERROR *** XF[%d] unknown format key (%d, 0x%04x)\n"

1632 fprintf(self.logfile, msg,

1633 xf.xf_index, xf.format_key, xf.format_key)

1634 fmt = Format(xf.format_key, FUN, UNICODE_LITERAL("General"))

1635 book.format_map[xf.format_key] = fmt

1636 book.format_list.append(fmt)

1637 cellty_from_fmtty = {

1638 FNU: XL_CELL_NUMBER,

1639 FUN: XL_CELL_NUMBER,

1640 FGE: XL_CELL_NUMBER,

1641 FDT: XL_CELL_DATE,

1642 FTX: XL_CELL_NUMBER, # Yes, a number can be formatted as text.

1643 }

1644 fmt = book.format_map[xf.format_key]

1645 cellty = cellty_from_fmtty[fmt.type]

1646 self._xf_index_to_xl_type_map[xf.xf_index] = cellty

1647 self._cell_attr_to_xfx[cell_attr] = xfx

1648 return xfx

1649

1650 def fake_XF_from_BIFF20_cell_attr(self, cell_attr, style=0):

1651 from .formatting import XF, XFAlignment, XFBorder, XFBackground, XFProtection

1652 xf = XF()

1653 xf.alignment = XFAlignment()

1654 xf.alignment.indent_level = 0

1655 xf.alignment.shrink_to_fit = 0

1656 xf.alignment.text_direction = 0

1657 xf.border = XFBorder()

1658 xf.border.diag_up = 0

1659 xf.border.diag_down = 0

1660 xf.border.diag_colour_index = 0

1661 xf.border.diag_line_style = 0 # no line

1662 xf.background = XFBackground()

1663 xf.protection = XFProtection()

1664 (prot_bits, font_and_format, halign_etc) = unpack('<BBB', cell_attr)

1665 xf.format_key = font_and_format & 0x3F

1666 xf.font_index = (font_and_format & 0xC0) >> 6

1667 upkbits(xf.protection, prot_bits, (

1668 (6, 0x40, 'cell_locked'),

1669 (7, 0x80, 'formula_hidden'),

1670 ))

1671 xf.alignment.hor_align = halign_etc & 0x07

1672 for mask, side in ((0x08, 'left'), (0x10, 'right'), (0x20, 'top'), (0x40, 'bottom')):

1673 if halign_etc & mask:

1674 colour_index, line_style = 8, 1 # black, thin

1675 else:

1676 colour_index, line_style = 0, 0 # none, none

1677 setattr(xf.border, side + '_colour_index', colour_index)

1678 setattr(xf.border, side + '_line_style', line_style)

1679 bg = xf.background

1680 if halign_etc & 0x80:

1681 bg.fill_pattern = 17

1682 else:

1683 bg.fill_pattern = 0

1684 bg.background_colour_index = 9 # white

1685 bg.pattern_colour_index = 8 # black

1686 xf.parent_style_index = (0x0FFF, 0)[style]

1687 xf.alignment.vert_align = 2 # bottom

1688 xf.alignment.rotation = 0

1689 attr_stems = [

1690 'format',

1691 'font',

1692 'alignment',

1693 'border',

1694 'background',

1695 'protection',

1696 ]

1697 for attr_stem in attr_stems:

1698 attr = "_" + attr_stem + "_flag"

1699 setattr(xf, attr, 1)

1700 return xf

1701

1702 def req_fmt_info(self):

1703 if not self.formatting_info:

1704 raise XLRDError("Feature requires open_workbook(..., formatting_info=True)")

1705

1706 def computed_column_width(self, colx):

1707 """

1708 Determine column display width.

1709

1710 :param colx:

1711 Index of the queried column, range 0 to 255.

1712 Note that it is possible to find out the width that will be used to

1713 display columns with no cell information e.g. column IV (colx=255).

1714

1715 :return:

1716 The column width that will be used for displaying

1717 the given column by Excel, in units of 1/256th of the width of a

1718 standard character (the digit zero in the first font).

1719

1720 .. versionadded:: 0.6.1

1721 """

1722 self.req_fmt_info()

1723 if self.biff_version >= 80:

1724 colinfo = self.colinfo_map.get(colx, None)

1725 if colinfo is not None:

1726 return colinfo.width

1727 if self.standardwidth is not None:

1728 return self.standardwidth

1729 elif self.biff_version >= 40:

1730 if self.gcw[colx]:

1731 if self.standardwidth is not None:

1732 return self.standardwidth

1733 else:

1734 colinfo = self.colinfo_map.get(colx, None)

1735 if colinfo is not None:

1736 return colinfo.width

1737 elif self.biff_version == 30:

1738 colinfo = self.colinfo_map.get(colx, None)

1739 if colinfo is not None:

1740 return colinfo.width

1741 # All roads lead to Rome and the DEFCOLWIDTH ...

1742 if self.defcolwidth is not None:

1743 return self.defcolwidth * 256

1744 return 8 * 256 # 8 is what Excel puts in a DEFCOLWIDTH record

1745

1746 def handle_hlink(self, data):

1747 # DEBUG = 1

1748 if DEBUG: print("\n=== hyperlink ===", file=self.logfile)

1749 record_size = len(data)

1750 h = Hyperlink()

1751 h.frowx, h.lrowx, h.fcolx, h.lcolx, guid0, dummy, options = unpack('<HHHH16s4si', data[:32])

1752 assert guid0 == b"\xD0\xC9\xEA\x79\xF9\xBA\xCE\x11\x8C\x82\x00\xAA\x00\x4B\xA9\x0B"

1753 assert dummy == b"\x02\x00\x00\x00"

1754 if DEBUG: print("options: %08X" % options, file=self.logfile)

1755 offset = 32

1756

1757 def get_nul_terminated_unicode(buf, ofs):

1758 nb = unpack('<L', buf[ofs:ofs+4])[0] * 2

1759 ofs += 4

1760 uc = unicode(buf[ofs:ofs+nb], 'UTF-16le')[:-1]

1761 ofs += nb

1762 return uc, ofs

1763

1764 if options & 0x14: # has a description

1765 h.desc, offset = get_nul_terminated_unicode(data, offset)

1766

1767 if options & 0x80: # has a target

1768 h.target, offset = get_nul_terminated_unicode(data, offset)

1769

1770 if (options & 1) and not (options & 0x100): # HasMoniker and not MonikerSavedAsString

1771 # an OLEMoniker structure

1772 clsid, = unpack('<16s', data[offset:offset + 16])

1773 if DEBUG: fprintf(self.logfile, "clsid=%r\n", clsid)

1774 offset += 16

1775 if clsid == b"\xE0\xC9\xEA\x79\xF9\xBA\xCE\x11\x8C\x82\x00\xAA\x00\x4B\xA9\x0B":

1776 # E0H C9H EAH 79H F9H BAH CEH 11H 8CH 82H 00H AAH 00H 4BH A9H 0BH

1777 # URL Moniker

1778 h.type = UNICODE_LITERAL('url')

1779 nbytes = unpack('<L', data[offset:offset + 4])[0]

1780 offset += 4

1781 h.url_or_path = unicode(data[offset:offset + nbytes], 'UTF-16le')

1782 if DEBUG: fprintf(self.logfile, "initial url=%r len=%d\n", h.url_or_path, len(h.url_or_path))

1783 endpos = h.url_or_path.find('\x00')

1784 if DEBUG: print("endpos=%d" % endpos, file=self.logfile)

1785 h.url_or_path = h.url_or_path[:endpos]

1786 true_nbytes = 2 * (endpos + 1)

1787 offset += true_nbytes

1788 extra_nbytes = nbytes - true_nbytes

1789 extra_data = data[offset:offset + extra_nbytes]

1790 offset += extra_nbytes

1791 if DEBUG:

1792 fprintf(

1793 self.logfile,

1794 "url=%r\nextra=%r\nnbytes=%d true_nbytes=%d extra_nbytes=%d\n",

1795 h.url_or_path, extra_data, nbytes, true_nbytes, extra_nbytes,

1796 )

1797 assert extra_nbytes in (24, 0)

1798 elif clsid == b"\x03\x03\x00\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46":

1799 # file moniker

1800 h.type = UNICODE_LITERAL('local file')

1801 uplevels, nbytes = unpack("<Hi", data[offset:offset + 6])

1802 offset += 6

1803 shortpath = b"..\\" * uplevels + data[offset:offset + nbytes - 1] #### BYTES, not unicode

1804 if DEBUG: fprintf(self.logfile, "uplevels=%d shortpath=%r\n", uplevels, shortpath)

1805 offset += nbytes

1806 offset += 24 # OOo: "unknown byte sequence"

1807 # above is version 0xDEAD + 20 reserved zero bytes

1808 sz = unpack('<i', data[offset:offset + 4])[0]

1809 if DEBUG: print("sz=%d" % sz, file=self.logfile)

1810 offset += 4

1811 if sz:

1812 xl = unpack('<i', data[offset:offset + 4])[0]

1813 offset += 4

1814 offset += 2 # "unknown byte sequence" MS: 0x0003

1815 extended_path = unicode(data[offset:offset + xl], 'UTF-16le') # not zero-terminated

1816 offset += xl

1817 h.url_or_path = extended_path

1818 else:

1819 h.url_or_path = shortpath

1820 #### MS KLUDGE WARNING ####

1821 # The "shortpath" is bytes encoded in the **UNKNOWN** creator's "ANSI" encoding.

1822 else:

1823 fprintf(self.logfile, "*** unknown clsid %r\n", clsid)

1824 elif options & 0x163 == 0x103: # UNC

1825 h.type = UNICODE_LITERAL('unc')

1826 h.url_or_path, offset = get_nul_terminated_unicode(data, offset)

1827 elif options & 0x16B == 8:

1828 h.type = UNICODE_LITERAL('workbook')

1829 else:

1830 h.type = UNICODE_LITERAL('unknown')

1831

1832 if options & 0x8: # has textmark

1833 h.textmark, offset = get_nul_terminated_unicode(data, offset)

1834

1835 if DEBUG:

1836 h.dump(header="... object dump ...")

1837 print("offset=%d record_size=%d" % (offset, record_size))

1838

1839 extra_nbytes = record_size - offset

1840 if extra_nbytes > 0:

1841 fprintf(

1842 self.logfile,

1843 "*** WARNING: hyperlink at R%dC%d has %d extra data bytes: %s\n",

1844 h.frowx + 1,

1845 h.fcolx + 1,

1846 extra_nbytes,

1847 REPR(data[-extra_nbytes:]),

1848 )

1849 # Seen: b"\x00\x00" also b"A\x00", b"V\x00"

1850 elif extra_nbytes < 0:

1851 raise XLRDError("Bug or corrupt file, send copy of input file for debugging")

1852

1853 self.hyperlink_list.append(h)

1854 for rowx in xrange(h.frowx, h.lrowx+1):

1855 for colx in xrange(h.fcolx, h.lcolx+1):

1856 self.hyperlink_map[rowx, colx] = h

1857

1858 def handle_quicktip(self, data):

1859 rcx, frowx, lrowx, fcolx, lcolx = unpack('<5H', data[:10])

1860 assert rcx == XL_QUICKTIP

1861 assert self.hyperlink_list

1862 h = self.hyperlink_list[-1]

1863 assert (frowx, lrowx, fcolx, lcolx) == (h.frowx, h.lrowx, h.fcolx, h.lcolx)

1864 assert data[-2:] == b'\x00\x00'

1865 h.quicktip = unicode(data[10:-2], 'utf_16_le')

1866

1867 def handle_msodrawingetc(self, recid, data_len, data):

1868 if not OBJ_MSO_DEBUG:

1869 return

1870 DEBUG = 1

1871 if self.biff_version < 80:

1872 return

1873 o = MSODrawing()

1874 pos = 0

1875 while pos < data_len:

1876 tmp, fbt, cb = unpack('<HHI', data[pos:pos+8])

1877 ver = tmp & 0xF

1878 inst = (tmp >> 4) & 0xFFF

1879 if ver == 0xF:

1880 ndb = 0 # container

1881 else:

1882 ndb = cb

1883 if DEBUG:

1884 hex_char_dump(data, pos, ndb + 8, base=0, fout=self.logfile)

1885 fprintf(self.logfile,

1886 "fbt:0x%04X inst:%d ver:0x%X cb:%d (0x%04X)\n",

1887 fbt, inst, ver, cb, cb)

1888 if fbt == 0xF010: # Client Anchor

1889 assert ndb == 18

1890 (o.anchor_unk,

1891 o.anchor_colx_lo, o.anchor_rowx_lo,

1892 o.anchor_colx_hi, o.anchor_rowx_hi) = unpack('<Hiiii', data[pos+8:pos+8+ndb])

1893 elif fbt == 0xF011: # Client Data

1894 # must be followed by an OBJ record

1895 assert cb == 0

1896 assert pos + 8 == data_len

1897 else:

1898 pass

1899 pos += ndb + 8

1900 else:

1901 # didn't break out of while loop

1902 assert pos == data_len

1903 if DEBUG:

1904 o.dump(self.logfile, header="=== MSODrawing ===", footer= " ")

1905

1906

1907 def handle_obj(self, data):

1908 if self.biff_version < 80:

1909 return None

1910 o = MSObj()

1911 data_len = len(data)

1912 pos = 0

1913 if OBJ_MSO_DEBUG:

1914 fprintf(self.logfile, "... OBJ record len=%d...\n", data_len)

1915 while pos < data_len:

1916 ft, cb = unpack('<HH', data[pos:pos+4])

1917 if OBJ_MSO_DEBUG:

1918 fprintf(self.logfile, "pos=%d ft=0x%04X cb=%d\n", pos, ft, cb)

1919 hex_char_dump(data, pos, cb + 4, base=0, fout=self.logfile)

1920 if pos == 0 and not (ft == 0x15 and cb == 18):

1921 if self.verbosity:

1922 fprintf(self.logfile, "*** WARNING Ignoring antique or corrupt OBJECT record\n")

1923 return None

1924 if ft == 0x15: # ftCmo ... s/b first

1925 assert pos == 0

1926 o.type, o.id, option_flags = unpack('<HHH', data[pos+4:pos+10])

1927 upkbits(o, option_flags, (

1928 ( 0, 0x0001, 'locked'),

1929 ( 4, 0x0010, 'printable'),

1930 ( 8, 0x0100, 'autofilter'), # not documented in Excel 97 dev kit

1931 ( 9, 0x0200, 'scrollbar_flag'), # not documented in Excel 97 dev kit

1932 (13, 0x2000, 'autofill'),

1933 (14, 0x4000, 'autoline'),

1934 ))

1935 elif ft == 0x00:

1936 if data[pos:data_len] == b'\0' * (data_len - pos):

1937 # ignore "optional reserved" data at end of record

1938 break

1939 msg = "Unexpected data at end of OBJECT record"

1940 fprintf(self.logfile, "*** ERROR %s\n" % msg)

1941 hex_char_dump(data, pos, data_len - pos, base=0, fout=self.logfile)

1942 raise XLRDError(msg)

1943 elif ft == 0x0C: # Scrollbar

1944 values = unpack('<5H', data[pos+8:pos+18])

1945 for value, tag in zip(values, ('value', 'min', 'max', 'inc', 'page')):

1946 setattr(o, 'scrollbar_' + tag, value)

1947 elif ft == 0x0D: # "Notes structure" [used for cell comments]

1948 # not documented in Excel 97 dev kit

1949 if OBJ_MSO_DEBUG: fprintf(self.logfile, "*** OBJ record has ft==0x0D 'notes' structure\n")

1950 elif ft == 0x13: # list box data

1951 if o.autofilter: # non standard exit. NOT documented

1952 break

1953 else:

1954 pass

1955 pos += cb + 4

1956 else:

1957 # didn't break out of while loop

1958 pass

1959 if OBJ_MSO_DEBUG:

1960 o.dump(self.logfile, header="=== MSOBj ===", footer= " ")

1961 return o

1962

1963 def handle_note(self, data, txos):

1964 if OBJ_MSO_DEBUG:

1965 fprintf(self.logfile, '... NOTE record ...\n')

1966 hex_char_dump(data, 0, len(data), base=0, fout=self.logfile)

1967 o = Note()

1968 data_len = len(data)

1969 if self.biff_version < 80:

1970 o.rowx, o.colx, expected_bytes = unpack('<HHH', data[:6])

1971 nb = len(data) - 6

1972 assert nb <= expected_bytes

1973 pieces = [data[6:]]

1974 expected_bytes -= nb

1975 while expected_bytes > 0:

1976 rc2, data2_len, data2 = self.book.get_record_parts()

1977 assert rc2 == XL_NOTE

1978 dummy_rowx, nb = unpack('<H2xH', data2[:6])

1979 assert dummy_rowx == 0xFFFF

1980 assert nb == data2_len - 6

1981 pieces.append(data2[6:])

1982 expected_bytes -= nb

1983 assert expected_bytes == 0

1984 enc = self.book.encoding or self.book.derive_encoding()

1985 o.text = unicode(b''.join(pieces), enc)

1986 o.rich_text_runlist = [(0, 0)]

1987 o.show = 0

1988 o.row_hidden = 0

1989 o.col_hidden = 0

1990 o.author = UNICODE_LITERAL('')

1991 o._object_id = None

1992 self.cell_note_map[o.rowx, o.colx] = o

1993 return

1994 # Excel 8.0+

1995 o.rowx, o.colx, option_flags, o._object_id = unpack('<4H', data[:8])

1996 o.show = (option_flags >> 1) & 1

1997 o.row_hidden = (option_flags >> 7) & 1

1998 o.col_hidden = (option_flags >> 8) & 1

1999 # XL97 dev kit book says NULL [sic] bytes padding between string count and string data

2000 # to ensure that string is word-aligned. Appears to be nonsense.

2001 o.author, endpos = unpack_unicode_update_pos(data, 8, lenlen=2)

2002 # There is a random/undefined byte after the author string (not counted in the

2003 # string length).

2004 # Issue 4 on github: Google Spreadsheet doesn't write the undefined byte.

2005 assert (data_len - endpos) in (0, 1)

2006 if OBJ_MSO_DEBUG:

2007 o.dump(self.logfile, header="=== Note ===", footer= " ")

2008 txo = txos.get(o._object_id)

2009 if txo:

2010 o.text = txo.text

2011 o.rich_text_runlist = txo.rich_text_runlist

2012 self.cell_note_map[o.rowx, o.colx] = o

2013

2014 def handle_txo(self, data):

2015 if self.biff_version < 80:

2016 return

2017 o = MSTxo()

2018 fmt = '<HH6sHHH'

2019 fmtsize = calcsize(fmt)

2020 option_flags, o.rot, controlInfo, cchText, cbRuns, o.ifntEmpty = unpack(fmt, data[:fmtsize])

2021 o.fmla = data[fmtsize:]

2022 upkbits(o, option_flags, (

2023 ( 3, 0x000E, 'horz_align'),

2024 ( 6, 0x0070, 'vert_align'),

2025 ( 9, 0x0200, 'lock_text'),

2026 (14, 0x4000, 'just_last'),

2027 (15, 0x8000, 'secret_edit'),

2028 ))

2029 totchars = 0

2030 o.text = UNICODE_LITERAL('')

2031 while totchars < cchText:

2032 rc2, data2_len, data2 = self.book.get_record_parts()

2033 assert rc2 == XL_CONTINUE

2034 if OBJ_MSO_DEBUG:

2035 hex_char_dump(data2, 0, data2_len, base=0, fout=self.logfile)

2036 nb = BYTES_ORD(data2[0]) # 0 means latin1, 1 means utf_16_le

2037 nchars = data2_len - 1

2038 if nb:

2039 assert nchars % 2 == 0

2040 nchars //= 2

2041 utext, endpos = unpack_unicode_update_pos(data2, 0, known_len=nchars)

2042 assert endpos == data2_len

2043 o.text += utext

2044 totchars += nchars

2045 o.rich_text_runlist = []

2046 totruns = 0

2047 while totruns < cbRuns: # counts of BYTES, not runs

2048 rc3, data3_len, data3 = self.book.get_record_parts()

2049 # print totruns, cbRuns, rc3, data3_len, repr(data3)

2050 assert rc3 == XL_CONTINUE

2051 assert data3_len % 8 == 0

2052 for pos in xrange(0, data3_len, 8):

2053 run = unpack('<HH4x', data3[pos:pos+8])

2054 o.rich_text_runlist.append(run)

2055 totruns += 8

2056 # remove trailing entries that point to the end of the string

2057 while o.rich_text_runlist and o.rich_text_runlist[-1][0] == cchText:

2058 del o.rich_text_runlist[-1]

2059 if OBJ_MSO_DEBUG:

2060 o.dump(self.logfile, header="=== MSTxo ===", footer= " ")

2061 print(o.rich_text_runlist, file=self.logfile)

2062 return o

2063

2064 def handle_feat11(self, data):

2065 if not OBJ_MSO_DEBUG:

2066 return

2067 # rt: Record type; this matches the BIFF rt in the first two bytes of the record; =0872h

2068 # grbitFrt: FRT cell reference flag (see table below for details)

2069 # Ref0: Range reference to a worksheet cell region if grbitFrt=1 (bitFrtRef). Otherwise blank.

2070 # isf: Shared feature type index =5 for Table

2071 # fHdr: =0 since this is for feat not feat header

2072 # reserved0: Reserved for future use =0 for Table

2073 # cref: Count of ref ranges this feature is on

2074 # cbFeatData: Count of byte for the current feature data.

2075 # reserved1: =0 currently not used

2076 # Ref1: Repeat of Ref0. UNDOCUMENTED

2077 rt, grbitFrt, Ref0, isf, fHdr, reserved0, cref, cbFeatData, reserved1, Ref1 = unpack('<HH8sHBiHiH8s', data[0:35])

2078 assert reserved0 == 0

2079 assert reserved1 == 0

2080 assert isf == 5

2081 assert rt == 0x872

2082 assert fHdr == 0

2083 assert Ref1 == Ref0

2084 print(self.logfile, "FEAT11: grbitFrt=%d Ref0=%r cref=%d cbFeatData=%d\n", grbitFrt, Ref0, cref, cbFeatData)

2085 # lt: Table data source type:

2086 # =0 for Excel Worksheet Table =1 for read-write SharePoint linked List

2087 # =2 for XML mapper Table =3 for Query Table

2088 # idList: The ID of the Table (unique per worksheet)

2089 # crwHeader: How many header/title rows the Table has at the top

2090 # crwTotals: How many total rows the Table has at the bottom

2091 # idFieldNext: Next id to try when assigning a unique id to a new field

2092 # cbFSData: The size of the Fixed Data portion of the Table data structure.

2093 # rupBuild: the rupBuild that generated the record

2094 # unusedShort: UNUSED short that can be used later. The value is reserved during round-tripping.

2095 # listFlags: Collection of bit flags: (see listFlags' bit setting table below for detail.)

2096 # lPosStmCache: Table data stream position of cached data

2097 # cbStmCache: Count of bytes of cached data

2098 # cchStmCache: Count of characters of uncompressed cached data in the stream

2099 # lem: Table edit mode (see List (Table) Editing Mode (lem) setting table below for details.)

2100 # rgbHashParam: Hash value for SharePoint Table

2101 # cchName: Count of characters in the Table name string rgbName

2102 (lt, idList, crwHeader, crwTotals, idFieldNext, cbFSData,

2103 rupBuild, unusedShort, listFlags, lPosStmCache, cbStmCache,

2104 cchStmCache, lem, rgbHashParam, cchName) = unpack('<iiiiiiHHiiiii16sH', data[35:35+66])

2105 print("lt=%d idList=%d crwHeader=%d crwTotals=%d idFieldNext=%d cbFSData=%d\n"

2106 "rupBuild=%d unusedShort=%d listFlags=%04X lPosStmCache=%d cbStmCache=%d\n"

2107 "cchStmCache=%d lem=%d rgbHashParam=%r cchName=%d" % (

2108 lt, idList, crwHeader, crwTotals, idFieldNext, cbFSData,

2109 rupBuild, unusedShort,listFlags, lPosStmCache, cbStmCache,

2110 cchStmCache, lem, rgbHashParam, cchName), file=self.logfile)

2111

2112 def __repr__(self):

2113 return "Sheet {:>2}:<{}>".format(self.number, self.name)

2114

2115

2116class MSODrawing(BaseObject):

2117 pass

2118

2119

2120class MSObj(BaseObject):

2121 pass

2122

2123

2124class MSTxo(BaseObject):

2125 pass

2126

2127

2128class Note(BaseObject):

2129 """

2130 Represents a user "comment" or "note".

2131 Note objects are accessible through :attr:`Sheet.cell_note_map`.

2132

2133 .. versionadded:: 0.7.2

2134 """

2135

2136 #: Author of note

2137 author = UNICODE_LITERAL('')

2138

2139 #: ``True`` if the containing column is hidden

2140 col_hidden = 0

2141

2142 #: Column index

2143 colx = 0

2144

2145 #: List of ``(offset_in_string, font_index)`` tuples.

2146 #: Unlike :attr:`Sheet.rich_text_runlist_map`, the first offset should

2147 #: always be 0.

2148 rich_text_runlist = None

2149

2150 #: True if the containing row is hidden

2151 row_hidden = 0

2152

2153 #: Row index

2154 rowx = 0

2155

2156 #: True if note is always shown

2157 show = 0

2158

2159 #: Text of the note

2160 text = UNICODE_LITERAL('')

2161

2162

2163class Hyperlink(BaseObject):

2164 """

2165 Contains the attributes of a hyperlink.

2166 Hyperlink objects are accessible through :attr:`Sheet.hyperlink_list`

2167 and :attr:`Sheet.hyperlink_map`.

2168

2169 .. versionadded:: 0.7.2

2170 """

2171

2172 #: Index of first row

2173 frowx = None

2174

2175 #: Index of last row

2176 lrowx = None

2177

2178 #: Index of first column

2179 fcolx = None

2180

2181 #: Index of last column

2182 lcolx = None

2183

2184 #: Type of hyperlink. Unicode string, one of 'url', 'unc',

2185 #: 'local file', 'workbook', 'unknown'

2186 type = None

2187

2188 #: The URL or file-path, depending in the type. Unicode string, except

2189 #: in the rare case of a local but non-existent file with non-ASCII

2190 #: characters in the name, in which case only the "8.3" filename is

2191 #: available, as a :class:`bytes` (3.x) or :class:`str` (2.x) string,

2192 #: *with unknown encoding.*

2193 url_or_path = None

2194

2195 #: Description.

2196 #: This is displayed in the cell,

2197 #: and should be identical to the cell value. Unicode string, or ``None``.

2198 #: It seems impossible NOT to have a description created by the Excel UI.

2199 desc = None

2200

2201 #: Target frame. Unicode string.

2202 #:

2203 #: .. note::

2204 #: No cases of this have been seen in the wild.

2205 #: It seems impossible to create one in the Excel UI.

2206 target = None

2207

2208 #: The piece after the "#" in

2209 #: "http://docs.python.org/library#struct_module", or the ``Sheet1!A1:Z99``

2210 #: part when type is "workbook".

2211 textmark = None

2212

2213 #: The text of the "quick tip" displayed when the cursor

2214 #: hovers over the hyperlink.

2215 quicktip = None

2216

2217# === helpers ===

2218

2219def unpack_RK(rk_str):

2220 flags = BYTES_ORD(rk_str[0])

2221 if flags & 2:

2222 # There's a SIGNED 30-bit integer in there!

2223 i, = unpack('<i', rk_str)

2224 i >>= 2 # div by 4 to drop the 2 flag bits

2225 if flags & 1:

2226 return i / 100.0

2227 return float(i)

2228 else:

2229 # It's the most significant 30 bits of an IEEE 754 64-bit FP number

2230 d, = unpack('<d', b'\0\0\0\0' + BYTES_LITERAL(chr(flags & 252)) + rk_str[1:4])

2231 if flags & 1:

2232 return d / 100.0

2233 return d

2234

2235##### =============== Cell ======================================== #####

2236

2237cellty_from_fmtty = {

2238 FNU: XL_CELL_NUMBER,

2239 FUN: XL_CELL_NUMBER,

2240 FGE: XL_CELL_NUMBER,

2241 FDT: XL_CELL_DATE,

2242 FTX: XL_CELL_NUMBER, # Yes, a number can be formatted as text.

2243}

2244

2245ctype_text = {

2246 XL_CELL_EMPTY: 'empty',

2247 XL_CELL_TEXT: 'text',

2248 XL_CELL_NUMBER: 'number',

2249 XL_CELL_DATE: 'xldate',

2250 XL_CELL_BOOLEAN: 'bool',

2251 XL_CELL_ERROR: 'error',

2252 XL_CELL_BLANK: 'blank',

2253}

2254

2255

2256class Cell(BaseObject):

2257 """

2258 Contains the data for one cell.

2259

2260 .. warning::

2261 You don't call this class yourself. You access :class:`Cell` objects

2262 via methods of the :class:`Sheet` object(s) that you found in the

2263 :class:`~xlrd.book.Book` object that was returned when you called

2264 :func:`~xlrd.open_workbook`

2265

2266 Cell objects have three attributes: ``ctype`` is an int, ``value``

2267 (which depends on ``ctype``) and ``xf_index``.

2268 If ``formatting_info`` is not enabled when the workbook is opened,

2269 ``xf_index`` will be ``None``.

2270

2271 The following table describes the types of cells and how their values

2272 are represented in Python.

2273

2274 .. raw:: html

2275

2276 <table border="1" cellpadding="7">

2277 <tr>

2278 <th>Type symbol</th>

2279 <th>Type number</th>

2280 <th>Python value</th>

2281 </tr>

2282 <tr>

2283 <td>XL_CELL_EMPTY</td>

2284 <td align="center">0</td>

2285 <td>empty string ''</td>

2286 </tr>

2287 <tr>

2288 <td>XL_CELL_TEXT</td>

2289 <td align="center">1</td>

2290 <td>a Unicode string</td>

2291 </tr>

2292 <tr>

2293 <td>XL_CELL_NUMBER</td>

2294 <td align="center">2</td>

2295 <td>float</td>

2296 </tr>

2297 <tr>

2298 <td>XL_CELL_DATE</td>

2299 <td align="center">3</td>

2300 <td>float</td>

2301 </tr>

2302 <tr>

2303 <td>XL_CELL_BOOLEAN</td>

2304 <td align="center">4</td>

2305 <td>int; 1 means TRUE, 0 means FALSE</td>

2306 </tr>

2307 <tr>

2308 <td>XL_CELL_ERROR</td>

2309 <td align="center">5</td>

2310 <td>int representing internal Excel codes; for a text representation,

2311 refer to the supplied dictionary error_text_from_code</td>

2312 </tr>

2313 <tr>

2314 <td>XL_CELL_BLANK</td>

2315 <td align="center">6</td>

2316 <td>empty string ''. Note: this type will appear only when

2317 open_workbook(..., formatting_info=True) is used.</td>

2318 </tr>

2319 </table>

2320 """

2321

2322 __slots__ = ['ctype', 'value', 'xf_index']

2323

2324 def __init__(self, ctype, value, xf_index=None):

2325 self.ctype = ctype

2326 self.value = value

2327 self.xf_index = xf_index

2328

2329 def __repr__(self):

2330 if self.xf_index is None:

2331 return "%s:%r" % (ctype_text[self.ctype], self.value)

2332 else:

2333 return "%s:%r (XF:%r)" % (ctype_text[self.ctype], self.value, self.xf_index)

2334

2335empty_cell = Cell(XL_CELL_EMPTY, UNICODE_LITERAL(''))

2336

2337##### =============== Colinfo and Rowinfo ============================== #####

2338

2339

2340class Colinfo(BaseObject):

2341 """

2342 Width and default formatting information that applies to one or

2343 more columns in a sheet. Derived from ``COLINFO`` records.

2344

2345 Here is the default hierarchy for width, according to the OOo docs:

2346

2347 In BIFF3, if a ``COLINFO`` record is missing for a column,

2348 the width specified in the record ``DEFCOLWIDTH`` is used instead.

2349

2350 In BIFF4-BIFF7, the width set in this ``COLINFO`` record is only used,

2351 if the corresponding bit for this column is cleared in the ``GCW``

2352 record, otherwise the column width set in the ``DEFCOLWIDTH`` record

2353 is used (the ``STANDARDWIDTH`` record is always ignored in this case [#f1]_).

2354

2355 In BIFF8, if a ``COLINFO`` record is missing for a column,

2356 the width specified in the record ``STANDARDWIDTH`` is used.

2357 If this ``STANDARDWIDTH`` record is also missing,

2358 the column width of the record ``DEFCOLWIDTH`` is used instead.

2359

2360 .. [#f1] The docs on the ``GCW`` record say this:

2361

2362 If a bit is set, the corresponding column uses the width set in the

2363 ``STANDARDWIDTH`` record. If a bit is cleared, the corresponding column

2364 uses the width set in the ``COLINFO`` record for this column.

2365

2366 If a bit is set, and the worksheet does not contain the ``STANDARDWIDTH``

2367 record, or if the bit is cleared, and the worksheet does not contain the

2368 ``COLINFO`` record, the ``DEFCOLWIDTH`` record of the worksheet will be

2369 used instead.

2370

2371 xlrd goes with the GCW version of the story.

2372 Reference to the source may be useful: see

2373 :meth:`Sheet.computed_column_width`.

2374

2375 .. versionadded:: 0.6.1

2376 """

2377

2378 #: Width of the column in 1/256 of the width of the zero character,

2379 #: using default font (first ``FONT`` record in the file).

2380 width = 0

2381

2382 #: XF index to be used for formatting empty cells.

2383 xf_index = -1

2384

2385 #: 1 = column is hidden

2386 hidden = 0

2387

2388 #: Value of a 1-bit flag whose purpose is unknown

2389 #: but is often seen set to 1

2390 bit1_flag = 0

2391

2392 #: Outline level of the column, in ``range(7)``.

2393 #: (0 = no outline)

2394 outline_level = 0

2395

2396 #: 1 = column is collapsed

2397 collapsed = 0

2398

2399_USE_SLOTS = 1

2400

2401

2402class Rowinfo(BaseObject):

2403 """

2404 Height and default formatting information that applies to a row in a sheet.

2405 Derived from ``ROW`` records.

2406

2407 .. versionadded:: 0.6.1

2408 """

2409

2410 if _USE_SLOTS: 2410 ↛ 2424line 2410 didn't jump to line 2424, because the condition on line 2410 was never false

2411 __slots__ = (

2412 "height",

2413 "has_default_height",

2414 "outline_level",

2415 "outline_group_starts_ends",

2416 "hidden",

2417 "height_mismatch",

2418 "has_default_xf_index",

2419 "xf_index",

2420 "additional_space_above",

2421 "additional_space_below",

2422 )

2423

2424 def __init__(self):

2425 #: Height of the row, in twips. One twip == 1/20 of a point.

2426 self.height = None

2427

2428 #: 0 = Row has custom height; 1 = Row has default height.

2429 self.has_default_height = None

2430

2431 #: Outline level of the row (0 to 7)

2432 self.outline_level = None

2433

2434 #: 1 = Outline group starts or ends here (depending on where the

2435 #: outline buttons are located, see ``WSBOOL`` record, which is not

2436 #: parsed by xlrd), *and* is collapsed.

2437 self.outline_group_starts_ends = None

2438

2439 #: 1 = Row is hidden (manually, or by a filter or outline group)

2440 self.hidden = None

2441

2442 #: 1 = Row height and default font height do not match.

2443 self.height_mismatch = None

2444

2445 #: 1 = the xf_index attribute is usable; 0 = ignore it.

2446 self.has_default_xf_index = None

2447

2448 #: Index to default :class:`~xlrd.formatting.XF` record for empty cells

2449 #: in this row. Don't use this if ``has_default_xf_index == 0``.

2450 self.xf_index = None

2451

2452 #: This flag is set if the upper border of at least one cell in this

2453 #: row or if the lower border of at least one cell in the row above is

2454 #: formatted with a thick line style. Thin and medium line styles are

2455 #: not taken into account.

2456 self.additional_space_above = None

2457

2458 #: This flag is set if the lower border of at least one cell in this row

2459 #: or if the upper border of at least one cell in the row below is

2460 #: formatted with a medium or thick line style. Thin line styles are not

2461 #: taken into account.

2462 self.additional_space_below = None

2463

2464 def __getstate__(self):

2465 return (

2466 self.height,

2467 self.has_default_height,

2468 self.outline_level,

2469 self.outline_group_starts_ends,

2470 self.hidden,

2471 self.height_mismatch,

2472 self.has_default_xf_index,

2473 self.xf_index,

2474 self.additional_space_above,

2475 self.additional_space_below,

2476 )

2477

2478 def __setstate__(self, state):

2479 (

2480 self.height,

2481 self.has_default_height,

2482 self.outline_level,

2483 self.outline_group_starts_ends,

2484 self.hidden,

2485 self.height_mismatch,

2486 self.has_default_xf_index,

2487 self.xf_index,

2488 self.additional_space_above,

2489 self.additional_space_below,

2490 ) = state