Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/xlrd/sheet.py: 7%

1316 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1# -*- coding: utf-8 -*- 

2# Copyright (c) 2005-2013 Stephen John Machin, Lingfo Pty Ltd 

3# This module is part of the xlrd package, which is released under a 

4# BSD-style licence. 

5 

6from __future__ import print_function 

7 

8from array import array 

9from struct import calcsize, unpack 

10 

11from .biffh import * 

12from .formatting import Format, nearest_colour_index 

13from .formula import ( 

14 FMLA_TYPE_CELL, FMLA_TYPE_SHARED, decompile_formula, dump_formula, 

15 rangename2d, 

16) 

17from .timemachine import * 

18 

19DEBUG = 0 

20OBJ_MSO_DEBUG = 0 

21 

22_WINDOW2_options = ( 

23 # Attribute names and initial values to use in case 

24 # a WINDOW2 record is not written. 

25 ("show_formulas", 0), 

26 ("show_grid_lines", 1), 

27 ("show_sheet_headers", 1), 

28 ("panes_are_frozen", 0), 

29 ("show_zero_values", 1), 

30 ("automatic_grid_line_colour", 1), 

31 ("columns_from_right_to_left", 0), 

32 ("show_outline_symbols", 1), 

33 ("remove_splits_if_pane_freeze_is_removed", 0), 

34 # Multiple sheets can be selected, but only one can be active 

35 # (hold down Ctrl and click multiple tabs in the file in OOo) 

36 ("sheet_selected", 0), 

37 # "sheet_visible" should really be called "sheet_active" 

38 # and is 1 when this sheet is the sheet displayed when the file 

39 # is open. More than likely only one sheet should ever be set as 

40 # visible. 

41 # This would correspond to the Book's sheet_active attribute, but 

42 # that doesn't exist as WINDOW1 records aren't currently processed. 

43 # The real thing is the visibility attribute from the BOUNDSHEET record. 

44 ("sheet_visible", 0), 

45 ("show_in_page_break_preview", 0), 

46) 

47 

48 

49 

50class Sheet(BaseObject): 

51 """ 

52 Contains the data for one worksheet. 

53 

54 In the cell access functions, ``rowx`` is a row index, counting from 

55 zero, and ``colx`` is a column index, counting from zero. 

56 Negative values for row/column indexes and slice positions are supported in 

57 the expected fashion. 

58 

59 For information about cell types and cell values, refer to the documentation 

60 of the :class:`Cell` class. 

61 

62 .. warning:: 

63 

64 You don't instantiate this class yourself. You access :class:`Sheet` 

65 objects via the :class:`~xlrd.book.Book` object that 

66 was returned when you called :func:`xlrd.open_workbook`. 

67 """ 

68 

69 #: Name of sheet. 

70 name = '' 

71 

72 #: A reference to the :class:`~xlrd.book.Book` object to which this sheet 

73 #: belongs. 

74 #: 

75 #: Example usage: ``some_sheet.book.datemode`` 

76 book = None 

77 

78 #: Number of rows in sheet. A row index is in ``range(thesheet.nrows)``. 

79 nrows = 0 

80 

81 #: Nominal number of columns in sheet. It is one more than the maximum 

82 #: column index found, ignoring trailing empty cells. 

83 #: See also the ``ragged_rows`` parameter to :func:`~xlrd.open_workbook` 

84 #: and :meth:`~xlrd.sheet.Sheet.row_len`. 

85 ncols = 0 

86 

87 

88 #: The map from a column index to a :class:`Colinfo` object. Often there is 

89 #: an entry in ``COLINFO`` records for all column indexes in ``range(257)``. 

90 #: 

91 #: .. note:: 

92 #: xlrd ignores the entry for the non-existent 

93 #: 257th column. 

94 #: 

95 #: On the other hand, there may be no entry for unused columns. 

96 #: 

97 #: .. versionadded:: 0.6.1 

98 #: 

99 #: Populated only if ``open_workbook(..., formatting_info=True)`` 

100 colinfo_map = {} 

101 

102 #: The map from a row index to a :class:`Rowinfo` object. 

103 #: 

104 #: ..note:: 

105 #: It is possible to have missing entries -- at least one source of 

106 #: XLS files doesn't bother writing ``ROW`` records. 

107 #: 

108 #: .. versionadded:: 0.6.1 

109 #: 

110 #: Populated only if ``open_workbook(..., formatting_info=True)`` 

111 rowinfo_map = {} 

112 

113 #: List of address ranges of cells containing column labels. 

114 #: These are set up in Excel by Insert > Name > Labels > Columns. 

115 #: 

116 #: .. versionadded:: 0.6.0 

117 #: 

118 #: How to deconstruct the list: 

119 #: 

120 #: .. code-block:: python 

121 #: 

122 #: for crange in thesheet.col_label_ranges: 

123 #: rlo, rhi, clo, chi = crange 

124 #: for rx in xrange(rlo, rhi): 

125 #: for cx in xrange(clo, chi): 

126 #: print "Column label at (rowx=%d, colx=%d) is %r" \ 

127 #: (rx, cx, thesheet.cell_value(rx, cx)) 

128 col_label_ranges = [] 

129 

130 #: List of address ranges of cells containing row labels. 

131 #: For more details, see :attr:`col_label_ranges`. 

132 #: 

133 #: .. versionadded:: 0.6.0 

134 row_label_ranges = [] 

135 

136 #: List of address ranges of cells which have been merged. 

137 #: These are set up in Excel by Format > Cells > Alignment, then ticking 

138 #: the "Merge cells" box. 

139 #: 

140 #: .. note:: 

141 #: The upper limits are exclusive: i.e. ``[2, 3, 7, 9]`` only 

142 #: spans two cells. 

143 #: 

144 #: .. note:: Extracted only if ``open_workbook(..., formatting_info=True)`` 

145 #: 

146 #: .. versionadded:: 0.6.1 

147 #: 

148 #: How to deconstruct the list: 

149 #: 

150 #: .. code-block:: python 

151 #: 

152 #: for crange in thesheet.merged_cells: 

153 #: rlo, rhi, clo, chi = crange 

154 #: for rowx in xrange(rlo, rhi): 

155 #: for colx in xrange(clo, chi): 

156 #: # cell (rlo, clo) (the top left one) will carry the data 

157 #: # and formatting info; the remainder will be recorded as 

158 #: # blank cells, but a renderer will apply the formatting info 

159 #: # for the top left cell (e.g. border, pattern) to all cells in 

160 #: # the range. 

161 merged_cells = [] 

162 

163 #: Mapping of ``(rowx, colx)`` to list of ``(offset, font_index)`` tuples. 

164 #: The offset defines where in the string the font begins to be used. 

165 #: Offsets are expected to be in ascending order. 

166 #: If the first offset is not zero, the meaning is that the cell's ``XF``'s 

167 #: font should be used from offset 0. 

168 #: 

169 #: This is a sparse mapping. There is no entry for cells that are not 

170 #: formatted with rich text. 

171 #: 

172 #: How to use: 

173 #: 

174 #: .. code-block:: python 

175 #: 

176 #: runlist = thesheet.rich_text_runlist_map.get((rowx, colx)) 

177 #: if runlist: 

178 #: for offset, font_index in runlist: 

179 #: # do work here. 

180 #: pass 

181 #: 

182 #: .. versionadded:: 0.7.2 

183 #: 

184 #: Populated only if ``open_workbook(..., formatting_info=True)`` 

185 rich_text_runlist_map = {} 

186 

187 #: Default column width from ``DEFCOLWIDTH`` record, else ``None``. 

188 #: From the OOo docs: 

189 #: 

190 #: Column width in characters, using the width of the zero character 

191 #: from default font (first FONT record in the file). Excel adds some 

192 #: extra space to the default width, depending on the default font and 

193 #: default font size. The algorithm how to exactly calculate the resulting 

194 #: column width is not known. 

195 #: Example: The default width of 8 set in this record results in a column 

196 #: width of 8.43 using Arial font with a size of 10 points. 

197 #: 

198 #: For the default hierarchy, refer to the :class:`Colinfo` class. 

199 #: 

200 #: .. versionadded:: 0.6.1 

201 defcolwidth = None 

202 

203 #: Default column width from ``STANDARDWIDTH`` record, else ``None``. 

204 #: 

205 #: From the OOo docs: 

206 #: 

207 #: Default width of the columns in 1/256 of the width of the zero 

208 #: character, using default font (first FONT record in the file). 

209 #: 

210 #: For the default hierarchy, refer to the :class:`Colinfo` class. 

211 #: 

212 #: .. versionadded:: 0.6.1 

213 standardwidth = None 

214 

215 #: Default value to be used for a row if there is 

216 #: no ``ROW`` record for that row. 

217 #: From the *optional* ``DEFAULTROWHEIGHT`` record. 

218 default_row_height = None 

219 

220 #: Default value to be used for a row if there is 

221 #: no ``ROW`` record for that row. 

222 #: From the *optional* ``DEFAULTROWHEIGHT`` record. 

223 default_row_height_mismatch = None 

224 

225 #: Default value to be used for a row if there is 

226 #: no ``ROW`` record for that row. 

227 #: From the *optional* ``DEFAULTROWHEIGHT`` record. 

228 default_row_hidden = None 

229 

230 #: Default value to be used for a row if there is 

231 #: no ``ROW`` record for that row. 

232 #: From the *optional* ``DEFAULTROWHEIGHT`` record. 

233 default_additional_space_above = None 

234 

235 #: Default value to be used for a row if there is 

236 #: no ``ROW`` record for that row. 

237 #: From the *optional* ``DEFAULTROWHEIGHT`` record. 

238 default_additional_space_below = None 

239 

240 #: Visibility of the sheet: 

241 #: :: 

242 #: 

243 #: 0 = visible 

244 #: 1 = hidden (can be unhidden by user -- Format -> Sheet -> Unhide) 

245 #: 2 = "very hidden" (can be unhidden only by VBA macro). 

246 visibility = 0 

247 

248 #: A 256-element tuple corresponding to the contents of the GCW record for 

249 #: this sheet. If no such record, treat as all bits zero. 

250 #: Applies to BIFF4-7 only. See docs of the :class:`Colinfo` class for 

251 #: discussion. 

252 gcw = (0, ) * 256 

253 

254 #: A list of :class:`Hyperlink` objects corresponding to ``HLINK`` records 

255 #: found in the worksheet. 

256 #: 

257 #: .. versionadded:: 0.7.2 

258 hyperlink_list = [] 

259 

260 #: A sparse mapping from ``(rowx, colx)`` to an item in 

261 #: :attr:`~xlrd.sheet.Sheet.hyperlink_list`. 

262 #: Cells not covered by a hyperlink are not mapped. 

263 #: It is possible using the Excel UI to set up a hyperlink that 

264 #: covers a larger-than-1x1 rectangle of cells. 

265 #: Hyperlink rectangles may overlap (Excel doesn't check). 

266 #: When a multiply-covered cell is clicked on, the hyperlink that is 

267 #: activated 

268 #: (and the one that is mapped here) is the last in 

269 #: :attr:`~xlrd.sheet.Sheet.hyperlink_list`. 

270 #: 

271 #: .. versionadded:: 0.7.2 

272 hyperlink_map = {} 

273 

274 #: A sparse mapping from ``(rowx, colx)`` to a :class:`Note` object. 

275 #: Cells not containing a note ("comment") are not mapped. 

276 #: 

277 #: .. versionadded:: 0.7.2 

278 cell_note_map = {} 

279 

280 #: Number of columns in left pane (frozen panes; for split panes, see 

281 #: comments in code) 

282 vert_split_pos = 0 

283 

284 #: Number of rows in top pane (frozen panes; for split panes, see comments 

285 #: in code) 

286 horz_split_pos = 0 

287 

288 #: Index of first visible row in bottom frozen/split pane 

289 horz_split_first_visible = 0 

290 

291 #: Index of first visible column in right frozen/split pane 

292 vert_split_first_visible = 0 

293 

294 #: Frozen panes: ignore it. Split panes: explanation and diagrams in 

295 #: OOo docs. 

296 split_active_pane = 0 

297 

298 #: Boolean specifying if a ``PANE`` record was present, ignore unless you're 

299 #: ``xlutils.copy`` 

300 has_pane_record = 0 

301 

302 #: A list of the horizontal page breaks in this sheet. 

303 #: Breaks are tuples in the form 

304 #: ``(index of row after break, start col index, end col index)``. 

305 #: 

306 #: Populated only if ``open_workbook(..., formatting_info=True)`` 

307 #: 

308 #: .. versionadded:: 0.7.2 

309 horizontal_page_breaks = [] 

310 

311 #: A list of the vertical page breaks in this sheet. 

312 #: Breaks are tuples in the form 

313 #: ``(index of col after break, start row index, end row index)``. 

314 #: 

315 #: Populated only if ``open_workbook(..., formatting_info=True)`` 

316 #: 

317 #: .. versionadded:: 0.7.2 

318 vertical_page_breaks = [] 

319 

320 def __init__(self, book, position, name, number): 

321 self.book = book 

322 self.biff_version = book.biff_version 

323 self._position = position 

324 self.logfile = book.logfile 

325 self.bt = array('B', [XL_CELL_EMPTY]) 

326 self.bf = array('h', [-1]) 

327 self.name = name 

328 self.number = number 

329 self.verbosity = book.verbosity 

330 self.formatting_info = book.formatting_info 

331 self.ragged_rows = book.ragged_rows 

332 if self.ragged_rows: 

333 self.put_cell = self.put_cell_ragged 

334 else: 

335 self.put_cell = self.put_cell_unragged 

336 self._xf_index_to_xl_type_map = book._xf_index_to_xl_type_map 

337 self.nrows = 0 # actual, including possibly empty cells 

338 self.ncols = 0 

339 self._maxdatarowx = -1 # highest rowx containing a non-empty cell 

340 self._maxdatacolx = -1 # highest colx containing a non-empty cell 

341 self._dimnrows = 0 # as per DIMENSIONS record 

342 self._dimncols = 0 

343 self._cell_values = [] 

344 self._cell_types = [] 

345 self._cell_xf_indexes = [] 

346 self.defcolwidth = None 

347 self.standardwidth = None 

348 self.default_row_height = None 

349 self.default_row_height_mismatch = 0 

350 self.default_row_hidden = 0 

351 self.default_additional_space_above = 0 

352 self.default_additional_space_below = 0 

353 self.colinfo_map = {} 

354 self.rowinfo_map = {} 

355 self.col_label_ranges = [] 

356 self.row_label_ranges = [] 

357 self.merged_cells = [] 

358 self.rich_text_runlist_map = {} 

359 self.horizontal_page_breaks = [] 

360 self.vertical_page_breaks = [] 

361 self._xf_index_stats = [0, 0, 0, 0] 

362 self.visibility = book._sheet_visibility[number] # from BOUNDSHEET record 

363 for attr, defval in _WINDOW2_options: 

364 setattr(self, attr, defval) 

365 self.first_visible_rowx = 0 

366 self.first_visible_colx = 0 

367 self.gridline_colour_index = 0x40 

368 self.gridline_colour_rgb = None # pre-BIFF8 

369 self.hyperlink_list = [] 

370 self.hyperlink_map = {} 

371 self.cell_note_map = {} 

372 

373 # Values calculated by xlrd to predict the mag factors that 

374 # will actually be used by Excel to display your worksheet. 

375 # Pass these values to xlwt when writing XLS files. 

376 # Warning 1: Behaviour of OOo Calc and Gnumeric has been observed to differ from Excel's. 

377 # Warning 2: A value of zero means almost exactly what it says. Your sheet will be 

378 # displayed as a very tiny speck on the screen. xlwt will reject attempts to set 

379 # a mag_factor that is not (10 <= mag_factor <= 400). 

380 self.cooked_page_break_preview_mag_factor = 60 

381 self.cooked_normal_view_mag_factor = 100 

382 

383 # Values (if any) actually stored on the XLS file 

384 self.cached_page_break_preview_mag_factor = 0 # default (60%), from WINDOW2 record 

385 self.cached_normal_view_mag_factor = 0 # default (100%), from WINDOW2 record 

386 self.scl_mag_factor = None # from SCL record 

387 

388 self._ixfe = None # BIFF2 only 

389 self._cell_attr_to_xfx = {} # BIFF2.0 only 

390 

391 if self.biff_version >= 80: 

392 self.utter_max_rows = 65536 

393 else: 

394 self.utter_max_rows = 16384 

395 self.utter_max_cols = 256 

396 

397 self._first_full_rowx = -1 

398 

399 # self._put_cell_exceptions = 0 

400 # self._put_cell_row_widenings = 0 

401 # self._put_cell_rows_appended = 0 

402 # self._put_cell_cells_appended = 0 

403 

404 def cell(self, rowx, colx): 

405 """ 

406 :class:`Cell` object in the given row and column. 

407 """ 

408 if self.formatting_info: 

409 xfx = self.cell_xf_index(rowx, colx) 

410 else: 

411 xfx = None 

412 return Cell( 

413 self._cell_types[rowx][colx], 

414 self._cell_values[rowx][colx], 

415 xfx, 

416 ) 

417 

418 def cell_value(self, rowx, colx): 

419 "Value of the cell in the given row and column." 

420 return self._cell_values[rowx][colx] 

421 

422 def cell_type(self, rowx, colx): 

423 """ 

424 Type of the cell in the given row and column. 

425 

426 Refer to the documentation of the :class:`Cell` class. 

427 """ 

428 return self._cell_types[rowx][colx] 

429 

430 def cell_xf_index(self, rowx, colx): 

431 """ 

432 XF index of the cell in the given row and column. 

433 This is an index into :attr:`~xlrd.book.Book.xf_list`. 

434 

435 .. versionadded:: 0.6.1 

436 """ 

437 self.req_fmt_info() 

438 xfx = self._cell_xf_indexes[rowx][colx] 

439 if xfx > -1: 

440 self._xf_index_stats[0] += 1 

441 return xfx 

442 # Check for a row xf_index 

443 try: 

444 xfx = self.rowinfo_map[rowx].xf_index 

445 if xfx > -1: 

446 self._xf_index_stats[1] += 1 

447 return xfx 

448 except KeyError: 

449 pass 

450 # Check for a column xf_index 

451 try: 

452 xfx = self.colinfo_map[colx].xf_index 

453 if xfx == -1: xfx = 15 

454 self._xf_index_stats[2] += 1 

455 return xfx 

456 except KeyError: 

457 # If all else fails, 15 is used as hardwired global default xf_index. 

458 self._xf_index_stats[3] += 1 

459 return 15 

460 

461 def row_len(self, rowx): 

462 """ 

463 Returns the effective number of cells in the given row. For use with 

464 ``open_workbook(ragged_rows=True)`` which is likely to produce rows 

465 with fewer than :attr:`~Sheet.ncols` cells. 

466 

467 .. versionadded:: 0.7.2 

468 """ 

469 return len(self._cell_values[rowx]) 

470 

471 def row(self, rowx): 

472 """ 

473 Returns a sequence of the :class:`Cell` objects in the given row. 

474 """ 

475 return [ 

476 self.cell(rowx, colx) 

477 for colx in xrange(len(self._cell_values[rowx])) 

478 ] 

479 

480 def __getitem__(self, item): 

481 """ 

482 Takes either rowindex or (rowindex, colindex) as an index, 

483 and returns either row or cell respectively. 

484 """ 

485 try: 

486 rowix, colix = item 

487 except TypeError: 

488 # it's not a tuple (or of right size), let's try indexing as is 

489 # if this is a problem, let this error propagate back 

490 return self.row(item) 

491 else: 

492 return self.cell(rowix, colix) 

493 

494 def get_rows(self): 

495 "Returns a generator for iterating through each row." 

496 return (self.row(index) for index in range(self.nrows)) 

497 

498 # makes `for row in sheet` natural and intuitive 

499 __iter__ = get_rows 

500 

501 def row_types(self, rowx, start_colx=0, end_colx=None): 

502 """ 

503 Returns a slice of the types of the cells in the given row. 

504 """ 

505 if end_colx is None: 

506 return self._cell_types[rowx][start_colx:] 

507 return self._cell_types[rowx][start_colx:end_colx] 

508 

509 def row_values(self, rowx, start_colx=0, end_colx=None): 

510 """ 

511 Returns a slice of the values of the cells in the given row. 

512 """ 

513 if end_colx is None: 

514 return self._cell_values[rowx][start_colx:] 

515 return self._cell_values[rowx][start_colx:end_colx] 

516 

517 def row_slice(self, rowx, start_colx=0, end_colx=None): 

518 """ 

519 Returns a slice of the :class:`Cell` objects in the given row. 

520 """ 

521 nc = len(self._cell_values[rowx]) 

522 if start_colx < 0: 

523 start_colx += nc 

524 if start_colx < 0: 

525 start_colx = 0 

526 if end_colx is None or end_colx > nc: 

527 end_colx = nc 

528 elif end_colx < 0: 

529 end_colx += nc 

530 return [ 

531 self.cell(rowx, colx) 

532 for colx in xrange(start_colx, end_colx) 

533 ] 

534 

535 def col_slice(self, colx, start_rowx=0, end_rowx=None): 

536 """ 

537 Returns a slice of the :class:`Cell` objects in the given column. 

538 """ 

539 nr = self.nrows 

540 if start_rowx < 0: 

541 start_rowx += nr 

542 if start_rowx < 0: 

543 start_rowx = 0 

544 if end_rowx is None or end_rowx > nr: 

545 end_rowx = nr 

546 elif end_rowx < 0: 

547 end_rowx += nr 

548 return [ 

549 self.cell(rowx, colx) 

550 for rowx in xrange(start_rowx, end_rowx) 

551 ] 

552 

553 def col_values(self, colx, start_rowx=0, end_rowx=None): 

554 """ 

555 Returns a slice of the values of the cells in the given column. 

556 """ 

557 nr = self.nrows 

558 if start_rowx < 0: 

559 start_rowx += nr 

560 if start_rowx < 0: 

561 start_rowx = 0 

562 if end_rowx is None or end_rowx > nr: 

563 end_rowx = nr 

564 elif end_rowx < 0: 

565 end_rowx += nr 

566 return [ 

567 self._cell_values[rowx][colx] 

568 for rowx in xrange(start_rowx, end_rowx) 

569 ] 

570 

571 def col_types(self, colx, start_rowx=0, end_rowx=None): 

572 """ 

573 Returns a slice of the types of the cells in the given column. 

574 """ 

575 nr = self.nrows 

576 if start_rowx < 0: 

577 start_rowx += nr 

578 if start_rowx < 0: 

579 start_rowx = 0 

580 if end_rowx is None or end_rowx > nr: 

581 end_rowx = nr 

582 elif end_rowx < 0: 

583 end_rowx += nr 

584 return [ 

585 self._cell_types[rowx][colx] 

586 for rowx in xrange(start_rowx, end_rowx) 

587 ] 

588 

589 col = col_slice 

590 

591 # === Following methods are used in building the worksheet. 

592 # === They are not part of the API. 

593 

594 def tidy_dimensions(self): 

595 if self.verbosity >= 3: 

596 fprintf( 

597 self.logfile, 

598 "tidy_dimensions: nrows=%d ncols=%d \n", 

599 self.nrows, self.ncols, 

600 ) 

601 if 1 and self.merged_cells: 

602 nr = nc = 0 

603 umaxrows = self.utter_max_rows 

604 umaxcols = self.utter_max_cols 

605 for crange in self.merged_cells: 

606 rlo, rhi, clo, chi = crange 

607 if not (0 <= rlo < rhi <= umaxrows) or not (0 <= clo < chi <= umaxcols): 

608 fprintf(self.logfile, 

609 "*** WARNING: sheet #%d (%r), MERGEDCELLS bad range %r\n", 

610 self.number, self.name, crange) 

611 if rhi > nr: nr = rhi 

612 if chi > nc: nc = chi 

613 if nc > self.ncols: 

614 self.ncols = nc 

615 self._first_full_rowx = -2 

616 if nr > self.nrows: 

617 # we put one empty cell at (nr-1,0) to make sure 

618 # we have the right number of rows. The ragged rows 

619 # will sort out the rest if needed. 

620 self.put_cell(nr-1, 0, XL_CELL_EMPTY, UNICODE_LITERAL(''), -1) 

621 if (self.verbosity >= 1 and 

622 (self.nrows != self._dimnrows or self.ncols != self._dimncols)): 

623 fprintf( 

624 self.logfile, 

625 "NOTE *** sheet %d (%r): DIMENSIONS R,C = %d,%d should be %d,%d\n", 

626 self.number, 

627 self.name, 

628 self._dimnrows, 

629 self._dimncols, 

630 self.nrows, 

631 self.ncols, 

632 ) 

633 if not self.ragged_rows: 

634 # fix ragged rows 

635 ncols = self.ncols 

636 s_cell_types = self._cell_types 

637 s_cell_values = self._cell_values 

638 s_cell_xf_indexes = self._cell_xf_indexes 

639 s_fmt_info = self.formatting_info 

640 # for rowx in xrange(self.nrows): 

641 if self._first_full_rowx == -2: 

642 ubound = self.nrows 

643 else: 

644 ubound = self._first_full_rowx 

645 for rowx in xrange(ubound): 

646 trow = s_cell_types[rowx] 

647 rlen = len(trow) 

648 nextra = ncols - rlen 

649 if nextra > 0: 

650 s_cell_values[rowx][rlen:] = [UNICODE_LITERAL('')] * nextra 

651 trow[rlen:] = self.bt * nextra 

652 if s_fmt_info: 

653 s_cell_xf_indexes[rowx][rlen:] = self.bf * nextra 

654 

655 def put_cell_ragged(self, rowx, colx, ctype, value, xf_index): 

656 if ctype is None: 

657 # we have a number, so look up the cell type 

658 ctype = self._xf_index_to_xl_type_map[xf_index] 

659 assert 0 <= colx < self.utter_max_cols 

660 assert 0 <= rowx < self.utter_max_rows 

661 fmt_info = self.formatting_info 

662 

663 try: 

664 nr = rowx + 1 

665 if self.nrows < nr: 

666 

667 scta = self._cell_types.append 

668 scva = self._cell_values.append 

669 scxa = self._cell_xf_indexes.append 

670 bt = self.bt 

671 bf = self.bf 

672 for _unused in xrange(self.nrows, nr): 

673 scta(bt * 0) 

674 scva([]) 

675 if fmt_info: 

676 scxa(bf * 0) 

677 self.nrows = nr 

678 

679 types_row = self._cell_types[rowx] 

680 values_row = self._cell_values[rowx] 

681 if fmt_info: 

682 fmt_row = self._cell_xf_indexes[rowx] 

683 ltr = len(types_row) 

684 if colx >= self.ncols: 

685 self.ncols = colx + 1 

686 num_empty = colx - ltr 

687 if not num_empty: 

688 # most common case: colx == previous colx + 1 

689 # self._put_cell_cells_appended += 1 

690 types_row.append(ctype) 

691 values_row.append(value) 

692 if fmt_info: 

693 fmt_row.append(xf_index) 

694 return 

695 if num_empty > 0: 

696 num_empty += 1 

697 # self._put_cell_row_widenings += 1 

698 # types_row.extend(self.bt * num_empty) 

699 # values_row.extend([UNICODE_LITERAL('')] * num_empty) 

700 # if fmt_info: 

701 # fmt_row.extend(self.bf * num_empty) 

702 types_row[ltr:] = self.bt * num_empty 

703 values_row[ltr:] = [UNICODE_LITERAL('')] * num_empty 

704 if fmt_info: 

705 fmt_row[ltr:] = self.bf * num_empty 

706 types_row[colx] = ctype 

707 values_row[colx] = value 

708 if fmt_info: 

709 fmt_row[colx] = xf_index 

710 except: 

711 print("put_cell", rowx, colx, file=self.logfile) 

712 raise 

713 

714 def put_cell_unragged(self, rowx, colx, ctype, value, xf_index): 

715 if ctype is None: 

716 # we have a number, so look up the cell type 

717 ctype = self._xf_index_to_xl_type_map[xf_index] 

718 # assert 0 <= colx < self.utter_max_cols 

719 # assert 0 <= rowx < self.utter_max_rows 

720 try: 

721 self._cell_types[rowx][colx] = ctype 

722 self._cell_values[rowx][colx] = value 

723 if self.formatting_info: 

724 self._cell_xf_indexes[rowx][colx] = xf_index 

725 except IndexError: 

726 # print >> self.logfile, "put_cell extending", rowx, colx 

727 # self.extend_cells(rowx+1, colx+1) 

728 # self._put_cell_exceptions += 1 

729 nr = rowx + 1 

730 nc = colx + 1 

731 assert 1 <= nc <= self.utter_max_cols 

732 assert 1 <= nr <= self.utter_max_rows 

733 if nc > self.ncols: 

734 self.ncols = nc 

735 # The row self._first_full_rowx and all subsequent rows 

736 # are guaranteed to have length == self.ncols. Thus the 

737 # "fix ragged rows" section of the tidy_dimensions method 

738 # doesn't need to examine them. 

739 if nr < self.nrows: 

740 # cell data is not in non-descending row order *AND* 

741 # self.ncols has been bumped up. 

742 # This very rare case ruins this optimisation. 

743 self._first_full_rowx = -2 

744 elif rowx > self._first_full_rowx > -2: 

745 self._first_full_rowx = rowx 

746 if nr <= self.nrows: 

747 # New cell is in an existing row, so extend that row (if necessary). 

748 # Note that nr < self.nrows means that the cell data 

749 # is not in ascending row order!! 

750 trow = self._cell_types[rowx] 

751 nextra = self.ncols - len(trow) 

752 if nextra > 0: 

753 # self._put_cell_row_widenings += 1 

754 trow.extend(self.bt * nextra) 

755 if self.formatting_info: 

756 self._cell_xf_indexes[rowx].extend(self.bf * nextra) 

757 self._cell_values[rowx].extend([UNICODE_LITERAL('')] * nextra) 

758 else: 

759 scta = self._cell_types.append 

760 scva = self._cell_values.append 

761 scxa = self._cell_xf_indexes.append 

762 fmt_info = self.formatting_info 

763 nc = self.ncols 

764 bt = self.bt 

765 bf = self.bf 

766 for _unused in xrange(self.nrows, nr): 

767 # self._put_cell_rows_appended += 1 

768 scta(bt * nc) 

769 scva([UNICODE_LITERAL('')] * nc) 

770 if fmt_info: 

771 scxa(bf * nc) 

772 self.nrows = nr 

773 # === end of code from extend_cells() 

774 try: 

775 self._cell_types[rowx][colx] = ctype 

776 self._cell_values[rowx][colx] = value 

777 if self.formatting_info: 

778 self._cell_xf_indexes[rowx][colx] = xf_index 

779 except: 

780 print("put_cell", rowx, colx, file=self.logfile) 

781 raise 

782 except: 

783 print("put_cell", rowx, colx, file=self.logfile) 

784 raise 

785 

786 

787 # === Methods after this line neither know nor care about how cells are stored. 

788 

789 def read(self, bk): 

790 global rc_stats 

791 DEBUG = 0 

792 blah = DEBUG or self.verbosity >= 2 

793 blah_rows = DEBUG or self.verbosity >= 4 

794 blah_formulas = 0 and blah 

795 r1c1 = 0 

796 oldpos = bk._position 

797 bk._position = self._position 

798 XL_SHRFMLA_ETC_ETC = ( 

799 XL_SHRFMLA, XL_ARRAY, XL_TABLEOP, XL_TABLEOP2, 

800 XL_ARRAY2, XL_TABLEOP_B2, 

801 ) 

802 self_put_cell = self.put_cell 

803 local_unpack = unpack 

804 bk_get_record_parts = bk.get_record_parts 

805 bv = self.biff_version 

806 fmt_info = self.formatting_info 

807 do_sst_rich_text = fmt_info and bk._rich_text_runlist_map 

808 rowinfo_sharing_dict = {} 

809 txos = {} 

810 eof_found = 0 

811 while 1: 

812 # if DEBUG: print "SHEET.READ: about to read from position %d" % bk._position 

813 rc, data_len, data = bk_get_record_parts() 

814 # if rc in rc_stats: 

815 # rc_stats[rc] += 1 

816 # else: 

817 # rc_stats[rc] = 1 

818 # if DEBUG: print "SHEET.READ: op 0x%04x, %d bytes %r" % (rc, data_len, data) 

819 if rc == XL_NUMBER: 

820 # [:14] in following stmt ignores extraneous rubbish at end of record. 

821 # Sample file testEON-8.xls supplied by Jan Kraus. 

822 rowx, colx, xf_index, d = local_unpack('<HHHd', data[:14]) 

823 # if xf_index == 0: 

824 # fprintf(self.logfile, 

825 # "NUMBER: r=%d c=%d xfx=%d %f\n", rowx, colx, xf_index, d) 

826 self_put_cell(rowx, colx, None, d, xf_index) 

827 elif rc == XL_LABELSST: 

828 rowx, colx, xf_index, sstindex = local_unpack('<HHHi', data) 

829 # print "LABELSST", rowx, colx, sstindex, bk._sharedstrings[sstindex] 

830 self_put_cell(rowx, colx, XL_CELL_TEXT, bk._sharedstrings[sstindex], xf_index) 

831 if do_sst_rich_text: 

832 runlist = bk._rich_text_runlist_map.get(sstindex) 

833 if runlist: 

834 self.rich_text_runlist_map[(rowx, colx)] = runlist 

835 elif rc == XL_LABEL: 

836 rowx, colx, xf_index = local_unpack('<HHH', data[0:6]) 

837 if bv < BIFF_FIRST_UNICODE: 

838 strg = unpack_string(data, 6, bk.encoding or bk.derive_encoding(), lenlen=2) 

839 else: 

840 strg = unpack_unicode(data, 6, lenlen=2) 

841 self_put_cell(rowx, colx, XL_CELL_TEXT, strg, xf_index) 

842 elif rc == XL_RSTRING: 

843 rowx, colx, xf_index = local_unpack('<HHH', data[0:6]) 

844 if bv < BIFF_FIRST_UNICODE: 

845 strg, pos = unpack_string_update_pos(data, 6, bk.encoding or bk.derive_encoding(), lenlen=2) 

846 nrt = BYTES_ORD(data[pos]) 

847 pos += 1 

848 runlist = [] 

849 for _unused in xrange(nrt): 

850 runlist.append(unpack('<BB', data[pos:pos+2])) 

851 pos += 2 

852 assert pos == len(data) 

853 else: 

854 strg, pos = unpack_unicode_update_pos(data, 6, lenlen=2) 

855 nrt = unpack('<H', data[pos:pos+2])[0] 

856 pos += 2 

857 runlist = [] 

858 for _unused in xrange(nrt): 

859 runlist.append(unpack('<HH', data[pos:pos+4])) 

860 pos += 4 

861 assert pos == len(data) 

862 self_put_cell(rowx, colx, XL_CELL_TEXT, strg, xf_index) 

863 self.rich_text_runlist_map[(rowx, colx)] = runlist 

864 elif rc == XL_RK: 

865 rowx, colx, xf_index = local_unpack('<HHH', data[:6]) 

866 d = unpack_RK(data[6:10]) 

867 self_put_cell(rowx, colx, None, d, xf_index) 

868 elif rc == XL_MULRK: 

869 mulrk_row, mulrk_first = local_unpack('<HH', data[0:4]) 

870 mulrk_last, = local_unpack('<H', data[-2:]) 

871 pos = 4 

872 for colx in xrange(mulrk_first, mulrk_last+1): 

873 xf_index, = local_unpack('<H', data[pos:pos+2]) 

874 d = unpack_RK(data[pos+2:pos+6]) 

875 pos += 6 

876 self_put_cell(mulrk_row, colx, None, d, xf_index) 

877 elif rc == XL_ROW: 

878 # Version 0.6.0a3: ROW records are just not worth using (for memory allocation). 

879 # Version 0.6.1: now used for formatting info. 

880 if not fmt_info: continue 

881 rowx, bits1, bits2 = local_unpack('<H4xH4xi', data[0:16]) 

882 if not(0 <= rowx < self.utter_max_rows): 

883 print("*** NOTE: ROW record has row index %d; " 

884 "should have 0 <= rowx < %d -- record ignored!" 

885 % (rowx, self.utter_max_rows), file=self.logfile) 

886 continue 

887 key = (bits1, bits2) 

888 r = rowinfo_sharing_dict.get(key) 

889 if r is None: 

890 rowinfo_sharing_dict[key] = r = Rowinfo() 

891 # Using upkbits() is far too slow on a file 

892 # with 30 sheets each with 10K rows :-( 

893 # upkbits(r, bits1, ( 

894 # ( 0, 0x7FFF, 'height'), 

895 # (15, 0x8000, 'has_default_height'), 

896 # )) 

897 # upkbits(r, bits2, ( 

898 # ( 0, 0x00000007, 'outline_level'), 

899 # ( 4, 0x00000010, 'outline_group_starts_ends'), 

900 # ( 5, 0x00000020, 'hidden'), 

901 # ( 6, 0x00000040, 'height_mismatch'), 

902 # ( 7, 0x00000080, 'has_default_xf_index'), 

903 # (16, 0x0FFF0000, 'xf_index'), 

904 # (28, 0x10000000, 'additional_space_above'), 

905 # (29, 0x20000000, 'additional_space_below'), 

906 # )) 

907 # So: 

908 r.height = bits1 & 0x7fff 

909 r.has_default_height = (bits1 >> 15) & 1 

910 r.outline_level = bits2 & 7 

911 r.outline_group_starts_ends = (bits2 >> 4) & 1 

912 r.hidden = (bits2 >> 5) & 1 

913 r.height_mismatch = (bits2 >> 6) & 1 

914 r.has_default_xf_index = (bits2 >> 7) & 1 

915 r.xf_index = (bits2 >> 16) & 0xfff 

916 r.additional_space_above = (bits2 >> 28) & 1 

917 r.additional_space_below = (bits2 >> 29) & 1 

918 if not r.has_default_xf_index: 

919 r.xf_index = -1 

920 self.rowinfo_map[rowx] = r 

921 if 0 and r.xf_index > -1: 

922 fprintf(self.logfile, 

923 "**ROW %d %d %d\n", 

924 self.number, rowx, r.xf_index) 

925 if blah_rows: 

926 print('ROW', rowx, bits1, bits2, file=self.logfile) 

927 r.dump(self.logfile, 

928 header="--- sh #%d, rowx=%d ---" % (self.number, rowx)) 

929 elif rc in XL_FORMULA_OPCODES: # 06, 0206, 0406 

930 # DEBUG = 1 

931 # if DEBUG: print "FORMULA: rc: 0x%04x data: %r" % (rc, data) 

932 if bv >= 50: 

933 rowx, colx, xf_index, result_str, flags = local_unpack('<HHH8sH', data[0:16]) 

934 elif bv >= 30: 

935 rowx, colx, xf_index, result_str, flags = local_unpack('<HHH8sH', data[0:16]) 

936 else: # BIFF2 

937 rowx, colx, cell_attr, result_str, flags = local_unpack('<HH3s8sB', data[0:16]) 

938 xf_index = self.fixed_BIFF2_xfindex(cell_attr, rowx, colx) 

939 if blah_formulas: # testing formula dumper 

940 #### XXXX FIXME 

941 fprintf(self.logfile, "FORMULA: rowx=%d colx=%d\n", rowx, colx) 

942 fmlalen = local_unpack("<H", data[20:22])[0] 

943 decompile_formula(bk, data[22:], fmlalen, FMLA_TYPE_CELL, 

944 browx=rowx, bcolx=colx, blah=1, r1c1=r1c1) 

945 if result_str[6:8] == b"\xFF\xFF": 

946 first_byte = BYTES_ORD(result_str[0]) 

947 if first_byte == 0: 

948 # need to read next record (STRING) 

949 gotstring = 0 

950 # if flags & 8: 

951 if 1: # "flags & 8" applies only to SHRFMLA 

952 # actually there's an optional SHRFMLA or ARRAY etc record to skip over 

953 rc2, data2_len, data2 = bk.get_record_parts() 

954 if rc2 == XL_STRING or rc2 == XL_STRING_B2: 

955 gotstring = 1 

956 elif rc2 == XL_ARRAY: 

957 row1x, rownx, col1x, colnx, array_flags, tokslen = \ 

958 local_unpack("<HHBBBxxxxxH", data2[:14]) 

959 if blah_formulas: 

960 fprintf(self.logfile, "ARRAY: %d %d %d %d %d\n", 

961 row1x, rownx, col1x, colnx, array_flags) 

962 # dump_formula(bk, data2[14:], tokslen, bv, reldelta=0, blah=1) 

963 elif rc2 == XL_SHRFMLA: 

964 row1x, rownx, col1x, colnx, nfmlas, tokslen = \ 

965 local_unpack("<HHBBxBH", data2[:10]) 

966 if blah_formulas: 

967 fprintf(self.logfile, "SHRFMLA (sub): %d %d %d %d %d\n", 

968 row1x, rownx, col1x, colnx, nfmlas) 

969 decompile_formula(bk, data2[10:], tokslen, FMLA_TYPE_SHARED, 

970 blah=1, browx=rowx, bcolx=colx, r1c1=r1c1) 

971 elif rc2 not in XL_SHRFMLA_ETC_ETC: 

972 raise XLRDError( 

973 "Expected SHRFMLA, ARRAY, TABLEOP* or STRING record; found 0x%04x" % rc2) 

974 # if DEBUG: print "gotstring:", gotstring 

975 # now for the STRING record 

976 if not gotstring: 

977 rc2, _unused_len, data2 = bk.get_record_parts() 

978 if rc2 not in (XL_STRING, XL_STRING_B2): 

979 raise XLRDError("Expected STRING record; found 0x%04x" % rc2) 

980 # if DEBUG: print "STRING: data=%r BIFF=%d cp=%d" % (data2, self.biff_version, bk.encoding) 

981 strg = self.string_record_contents(data2) 

982 self.put_cell(rowx, colx, XL_CELL_TEXT, strg, xf_index) 

983 # if DEBUG: print "FORMULA strg %r" % strg 

984 elif first_byte == 1: 

985 # boolean formula result 

986 value = BYTES_ORD(result_str[2]) 

987 self_put_cell(rowx, colx, XL_CELL_BOOLEAN, value, xf_index) 

988 elif first_byte == 2: 

989 # Error in cell 

990 value = BYTES_ORD(result_str[2]) 

991 self_put_cell(rowx, colx, XL_CELL_ERROR, value, xf_index) 

992 elif first_byte == 3: 

993 # empty ... i.e. empty (zero-length) string, NOT an empty cell. 

994 self_put_cell(rowx, colx, XL_CELL_TEXT, "", xf_index) 

995 else: 

996 raise XLRDError("unexpected special case (0x%02x) in FORMULA" % first_byte) 

997 else: 

998 # it is a number 

999 d = local_unpack('<d', result_str)[0] 

1000 self_put_cell(rowx, colx, None, d, xf_index) 

1001 elif rc == XL_BOOLERR: 

1002 rowx, colx, xf_index, value, is_err = local_unpack('<HHHBB', data[:8]) 

1003 # Note OOo Calc 2.0 writes 9-byte BOOLERR records. 

1004 # OOo docs say 8. Excel writes 8. 

1005 cellty = (XL_CELL_BOOLEAN, XL_CELL_ERROR)[is_err] 

1006 # if DEBUG: print "XL_BOOLERR", rowx, colx, xf_index, value, is_err 

1007 self_put_cell(rowx, colx, cellty, value, xf_index) 

1008 elif rc == XL_COLINFO: 

1009 if not fmt_info: continue 

1010 c = Colinfo() 

1011 first_colx, last_colx, c.width, c.xf_index, flags \ 

1012 = local_unpack("<HHHHH", data[:10]) 

1013 #### Colinfo.width is denominated in 256ths of a character, 

1014 #### *not* in characters. 

1015 if not(0 <= first_colx <= last_colx <= 256): 

1016 # Note: 256 instead of 255 is a common mistake. 

1017 # We silently ignore the non-existing 257th column in that case. 

1018 print("*** NOTE: COLINFO record has first col index %d, last %d; " 

1019 "should have 0 <= first <= last <= 255 -- record ignored!" 

1020 % (first_colx, last_colx), file=self.logfile) 

1021 del c 

1022 continue 

1023 upkbits(c, flags, ( 

1024 ( 0, 0x0001, 'hidden'), 

1025 ( 1, 0x0002, 'bit1_flag'), 

1026 # *ALL* colinfos created by Excel in "default" cases are 0x0002!! 

1027 # Maybe it's "locked" by analogy with XFProtection data. 

1028 ( 8, 0x0700, 'outline_level'), 

1029 (12, 0x1000, 'collapsed'), 

1030 )) 

1031 for colx in xrange(first_colx, last_colx+1): 

1032 if colx > 255: break # Excel does 0 to 256 inclusive 

1033 self.colinfo_map[colx] = c 

1034 if 0: 

1035 fprintf(self.logfile, 

1036 "**COL %d %d %d\n", 

1037 self.number, colx, c.xf_index) 

1038 if blah: 

1039 fprintf( 

1040 self.logfile, 

1041 "COLINFO sheet #%d cols %d-%d: wid=%d xf_index=%d flags=0x%04x\n", 

1042 self.number, first_colx, last_colx, c.width, c.xf_index, flags, 

1043 ) 

1044 c.dump(self.logfile, header='===') 

1045 elif rc == XL_DEFCOLWIDTH: 

1046 self.defcolwidth, = local_unpack("<H", data[:2]) 

1047 if 0: print('DEFCOLWIDTH', self.defcolwidth, file=self.logfile) 

1048 elif rc == XL_STANDARDWIDTH: 

1049 if data_len != 2: 

1050 print('*** ERROR *** STANDARDWIDTH', data_len, repr(data), file=self.logfile) 

1051 self.standardwidth, = local_unpack("<H", data[:2]) 

1052 if 0: print('STANDARDWIDTH', self.standardwidth, file=self.logfile) 

1053 elif rc == XL_GCW: 

1054 if not fmt_info: continue # useless w/o COLINFO 

1055 assert data_len == 34 

1056 assert data[0:2] == b"\x20\x00" 

1057 iguff = unpack("<8i", data[2:34]) 

1058 gcw = [] 

1059 for bits in iguff: 

1060 for j in xrange(32): 

1061 gcw.append(bits & 1) 

1062 bits >>= 1 

1063 self.gcw = tuple(gcw) 

1064 if 0: 

1065 showgcw = "".join(map(lambda x: "F "[x], gcw)).rstrip().replace(' ', '.') 

1066 print("GCW:", showgcw, file=self.logfile) 

1067 elif rc == XL_BLANK: 

1068 if not fmt_info: continue 

1069 rowx, colx, xf_index = local_unpack('<HHH', data[:6]) 

1070 # if 0: print >> self.logfile, "BLANK", rowx, colx, xf_index 

1071 self_put_cell(rowx, colx, XL_CELL_BLANK, '', xf_index) 

1072 elif rc == XL_MULBLANK: # 00BE 

1073 if not fmt_info: continue 

1074 nitems = data_len >> 1 

1075 result = local_unpack("<%dH" % nitems, data) 

1076 rowx, mul_first = result[:2] 

1077 mul_last = result[-1] 

1078 # print >> self.logfile, "MULBLANK", rowx, mul_first, mul_last, data_len, nitems, mul_last + 4 - mul_first 

1079 assert nitems == mul_last + 4 - mul_first 

1080 pos = 2 

1081 for colx in xrange(mul_first, mul_last + 1): 

1082 self_put_cell(rowx, colx, XL_CELL_BLANK, '', result[pos]) 

1083 pos += 1 

1084 elif rc == XL_DIMENSION or rc == XL_DIMENSION2: 

1085 if data_len == 0: 

1086 # Four zero bytes after some other record. See github issue 64. 

1087 continue 

1088 # if data_len == 10: 

1089 # Was crashing on BIFF 4.0 file w/o the two trailing unused bytes. 

1090 # Reported by Ralph Heimburger. 

1091 if bv < 80: 

1092 dim_tuple = local_unpack('<HxxH', data[2:8]) 

1093 else: 

1094 dim_tuple = local_unpack('<ixxH', data[4:12]) 

1095 self.nrows, self.ncols = 0, 0 

1096 self._dimnrows, self._dimncols = dim_tuple 

1097 if bv in (21, 30, 40) and self.book.xf_list and not self.book._xf_epilogue_done: 

1098 self.book.xf_epilogue() 

1099 if blah: 

1100 fprintf( 

1101 self.logfile, 

1102 "sheet %d(%r) DIMENSIONS: ncols=%d nrows=%d\n", 

1103 self.number, self.name, self._dimncols, self._dimnrows 

1104 ) 

1105 elif rc == XL_HLINK: 

1106 self.handle_hlink(data) 

1107 elif rc == XL_QUICKTIP: 

1108 self.handle_quicktip(data) 

1109 elif rc == XL_EOF: 

1110 DEBUG = 0 

1111 if DEBUG: print("SHEET.READ: EOF", file=self.logfile) 

1112 eof_found = 1 

1113 break 

1114 elif rc == XL_OBJ: 

1115 # handle SHEET-level objects; note there's a separate Book.handle_obj 

1116 saved_obj = self.handle_obj(data) 

1117 if saved_obj: saved_obj_id = saved_obj.id 

1118 else: saved_obj_id = None 

1119 elif rc == XL_MSO_DRAWING: 

1120 self.handle_msodrawingetc(rc, data_len, data) 

1121 elif rc == XL_TXO: 

1122 txo = self.handle_txo(data) 

1123 if txo and saved_obj_id: 

1124 txos[saved_obj_id] = txo 

1125 saved_obj_id = None 

1126 elif rc == XL_NOTE: 

1127 self.handle_note(data, txos) 

1128 elif rc == XL_FEAT11: 

1129 self.handle_feat11(data) 

1130 elif rc in bofcodes: ##### EMBEDDED BOF ##### 

1131 version, boftype = local_unpack('<HH', data[0:4]) 

1132 if boftype != 0x20: # embedded chart 

1133 print("*** Unexpected embedded BOF (0x%04x) at offset %d: version=0x%04x type=0x%04x" 

1134 % (rc, bk._position - data_len - 4, version, boftype), file=self.logfile) 

1135 while 1: 

1136 code, data_len, data = bk.get_record_parts() 

1137 if code == XL_EOF: 

1138 break 

1139 if DEBUG: print("---> found EOF", file=self.logfile) 

1140 elif rc == XL_COUNTRY: 

1141 bk.handle_country(data) 

1142 elif rc == XL_LABELRANGES: 

1143 pos = 0 

1144 pos = unpack_cell_range_address_list_update_pos( 

1145 self.row_label_ranges, data, pos, bv, addr_size=8, 

1146 ) 

1147 pos = unpack_cell_range_address_list_update_pos( 

1148 self.col_label_ranges, data, pos, bv, addr_size=8, 

1149 ) 

1150 assert pos == data_len 

1151 elif rc == XL_ARRAY: 

1152 row1x, rownx, col1x, colnx, array_flags, tokslen = \ 

1153 local_unpack("<HHBBBxxxxxH", data[:14]) 

1154 if blah_formulas: 

1155 print("ARRAY:", row1x, rownx, col1x, colnx, array_flags, file=self.logfile) 

1156 # dump_formula(bk, data[14:], tokslen, bv, reldelta=0, blah=1) 

1157 elif rc == XL_SHRFMLA: 

1158 row1x, rownx, col1x, colnx, nfmlas, tokslen = \ 

1159 local_unpack("<HHBBxBH", data[:10]) 

1160 if blah_formulas: 

1161 print("SHRFMLA (main):", row1x, rownx, col1x, colnx, nfmlas, file=self.logfile) 

1162 decompile_formula(bk, data[10:], tokslen, FMLA_TYPE_SHARED, 

1163 blah=1, browx=rowx, bcolx=colx, r1c1=r1c1) 

1164 elif rc == XL_CONDFMT: 

1165 if not fmt_info: continue 

1166 assert bv >= 80 

1167 num_CFs, needs_recalc, browx1, browx2, bcolx1, bcolx2 = \ 

1168 unpack("<6H", data[0:12]) 

1169 if self.verbosity >= 1: 

1170 fprintf( 

1171 self.logfile, 

1172 "\n*** WARNING: Ignoring CONDFMT (conditional formatting) record\n" 

1173 "*** in Sheet %d (%r).\n" 

1174 "*** %d CF record(s); needs_recalc_or_redraw = %d\n" 

1175 "*** Bounding box is %s\n", 

1176 self.number, self.name, num_CFs, needs_recalc, 

1177 rangename2d(browx1, browx2+1, bcolx1, bcolx2+1), 

1178 ) 

1179 olist = [] # updated by the function 

1180 pos = unpack_cell_range_address_list_update_pos( 

1181 olist, data, 12, bv, addr_size=8) 

1182 # print >> self.logfile, repr(result), len(result) 

1183 if self.verbosity >= 1: 

1184 fprintf( 

1185 self.logfile, 

1186 "*** %d individual range(s):\n" 

1187 "*** %s\n", 

1188 len(olist), 

1189 ", ".join(rangename2d(*coords) for coords in olist), 

1190 ) 

1191 elif rc == XL_CF: 

1192 if not fmt_info: continue 

1193 cf_type, cmp_op, sz1, sz2, flags = unpack("<BBHHi", data[0:10]) 

1194 font_block = (flags >> 26) & 1 

1195 bord_block = (flags >> 28) & 1 

1196 patt_block = (flags >> 29) & 1 

1197 if self.verbosity >= 1: 

1198 fprintf( 

1199 self.logfile, 

1200 "\n*** WARNING: Ignoring CF (conditional formatting) sub-record.\n" 

1201 "*** cf_type=%d, cmp_op=%d, sz1=%d, sz2=%d, flags=0x%08x\n" 

1202 "*** optional data blocks: font=%d, border=%d, pattern=%d\n", 

1203 cf_type, cmp_op, sz1, sz2, flags, 

1204 font_block, bord_block, patt_block, 

1205 ) 

1206 # hex_char_dump(data, 0, data_len, fout=self.logfile) 

1207 pos = 12 

1208 if font_block: 

1209 (font_height, font_options, weight, escapement, underline, 

1210 font_colour_index, two_bits, font_esc, font_underl) = unpack("<64x i i H H B 3x i 4x i i i 18x", data[pos:pos+118]) 

1211 font_style = (two_bits > 1) & 1 

1212 posture = (font_options > 1) & 1 

1213 font_canc = (two_bits > 7) & 1 

1214 cancellation = (font_options > 7) & 1 

1215 if self.verbosity >= 1: 

1216 fprintf( 

1217 self.logfile, 

1218 "*** Font info: height=%d, weight=%d, escapement=%d,\n" 

1219 "*** underline=%d, colour_index=%d, esc=%d, underl=%d,\n" 

1220 "*** style=%d, posture=%d, canc=%d, cancellation=%d\n", 

1221 font_height, weight, escapement, underline, 

1222 font_colour_index, font_esc, font_underl, 

1223 font_style, posture, font_canc, cancellation, 

1224 ) 

1225 pos += 118 

1226 if bord_block: 

1227 pos += 8 

1228 if patt_block: 

1229 pos += 4 

1230 fmla1 = data[pos:pos+sz1] 

1231 pos += sz1 

1232 if blah and sz1: 

1233 fprintf(self.logfile, "*** formula 1:\n") 

1234 dump_formula(bk, fmla1, sz1, bv, reldelta=0, blah=1) 

1235 fmla2 = data[pos:pos+sz2] 

1236 pos += sz2 

1237 assert pos == data_len 

1238 if blah and sz2: 

1239 fprintf(self.logfile, "*** formula 2:\n") 

1240 dump_formula(bk, fmla2, sz2, bv, reldelta=0, blah=1) 

1241 elif rc == XL_DEFAULTROWHEIGHT: 

1242 if data_len == 4: 

1243 bits, self.default_row_height = unpack("<HH", data[:4]) 

1244 elif data_len == 2: 

1245 self.default_row_height, = unpack("<H", data) 

1246 bits = 0 

1247 fprintf(self.logfile, 

1248 "*** WARNING: DEFAULTROWHEIGHT record len is 2, " 

1249 "should be 4; assuming BIFF2 format\n") 

1250 else: 

1251 bits = 0 

1252 fprintf(self.logfile, 

1253 "*** WARNING: DEFAULTROWHEIGHT record len is %d, " 

1254 "should be 4; ignoring this record\n", 

1255 data_len) 

1256 self.default_row_height_mismatch = bits & 1 

1257 self.default_row_hidden = (bits >> 1) & 1 

1258 self.default_additional_space_above = (bits >> 2) & 1 

1259 self.default_additional_space_below = (bits >> 3) & 1 

1260 elif rc == XL_MERGEDCELLS: 

1261 if not fmt_info: continue 

1262 pos = unpack_cell_range_address_list_update_pos( 

1263 self.merged_cells, data, 0, bv, addr_size=8) 

1264 if blah: 

1265 fprintf(self.logfile, 

1266 "MERGEDCELLS: %d ranges\n", (pos - 2) // 8) 

1267 assert pos == data_len, \ 

1268 "MERGEDCELLS: pos=%d data_len=%d" % (pos, data_len) 

1269 elif rc == XL_WINDOW2: 

1270 if bv >= 80 and data_len >= 14: 

1271 ( 

1272 options, 

1273 self.first_visible_rowx, self.first_visible_colx, 

1274 self.gridline_colour_index, 

1275 self.cached_page_break_preview_mag_factor, 

1276 self.cached_normal_view_mag_factor 

1277 ) = unpack("<HHHHxxHH", data[:14]) 

1278 else: 

1279 assert bv >= 30 # BIFF3-7 

1280 ( 

1281 options, 

1282 self.first_visible_rowx, self.first_visible_colx, 

1283 ) = unpack("<HHH", data[:6]) 

1284 self.gridline_colour_rgb = unpack("<BBB", data[6:9]) 

1285 self.gridline_colour_index = nearest_colour_index( 

1286 self.book.colour_map, self.gridline_colour_rgb, debug=0) 

1287 # options -- Bit, Mask, Contents: 

1288 # 0 0001H 0 = Show formula results 1 = Show formulas 

1289 # 1 0002H 0 = Do not show grid lines 1 = Show grid lines 

1290 # 2 0004H 0 = Do not show sheet headers 1 = Show sheet headers 

1291 # 3 0008H 0 = Panes are not frozen 1 = Panes are frozen (freeze) 

1292 # 4 0010H 0 = Show zero values as empty cells 1 = Show zero values 

1293 # 5 0020H 0 = Manual grid line colour 1 = Automatic grid line colour 

1294 # 6 0040H 0 = Columns from left to right 1 = Columns from right to left 

1295 # 7 0080H 0 = Do not show outline symbols 1 = Show outline symbols 

1296 # 8 0100H 0 = Keep splits if pane freeze is removed 1 = Remove splits if pane freeze is removed 

1297 # 9 0200H 0 = Sheet not selected 1 = Sheet selected (BIFF5-BIFF8) 

1298 # 10 0400H 0 = Sheet not visible 1 = Sheet visible (BIFF5-BIFF8) 

1299 # 11 0800H 0 = Show in normal view 1 = Show in page break preview (BIFF8) 

1300 # The freeze flag specifies, if a following PANE record (6.71) describes unfrozen or frozen panes. 

1301 for attr, _unused_defval in _WINDOW2_options: 

1302 setattr(self, attr, options & 1) 

1303 options >>= 1 

1304 elif rc == XL_SCL: 

1305 num, den = unpack("<HH", data) 

1306 result = 0 

1307 if den: 

1308 result = (num * 100) // den 

1309 if not(10 <= result <= 400): 

1310 if DEBUG or self.verbosity >= 0: 

1311 print( 

1312 "WARNING *** SCL rcd sheet %d: should have 0.1 <= num/den <= 4; got %d/%d" 

1313 % (self.number, num, den), 

1314 file=self.logfile, 

1315 ) 

1316 result = 100 

1317 self.scl_mag_factor = result 

1318 elif rc == XL_PANE: 

1319 ( 

1320 self.vert_split_pos, 

1321 self.horz_split_pos, 

1322 self.horz_split_first_visible, 

1323 self.vert_split_first_visible, 

1324 self.split_active_pane, 

1325 ) = unpack("<HHHHB", data[:9]) 

1326 self.has_pane_record = 1 

1327 elif rc == XL_HORIZONTALPAGEBREAKS: 

1328 if not fmt_info: continue 

1329 num_breaks, = local_unpack("<H", data[:2]) 

1330 assert num_breaks * (2 + 4 * (bv >= 80)) + 2 == data_len 

1331 pos = 2 

1332 if bv < 80: 

1333 while pos < data_len: 

1334 self.horizontal_page_breaks.append((local_unpack("<H", data[pos:pos+2])[0], 0, 255)) 

1335 pos += 2 

1336 else: 

1337 while pos < data_len: 

1338 self.horizontal_page_breaks.append(local_unpack("<HHH", data[pos:pos+6])) 

1339 pos += 6 

1340 elif rc == XL_VERTICALPAGEBREAKS: 

1341 if not fmt_info: continue 

1342 num_breaks, = local_unpack("<H", data[:2]) 

1343 assert num_breaks * (2 + 4 * (bv >= 80)) + 2 == data_len 

1344 pos = 2 

1345 if bv < 80: 

1346 while pos < data_len: 

1347 self.vertical_page_breaks.append((local_unpack("<H", data[pos:pos+2])[0], 0, 65535)) 

1348 pos += 2 

1349 else: 

1350 while pos < data_len: 

1351 self.vertical_page_breaks.append(local_unpack("<HHH", data[pos:pos+6])) 

1352 pos += 6 

1353 #### all of the following are for BIFF <= 4W 

1354 elif bv <= 45: 

1355 if rc == XL_FORMAT or rc == XL_FORMAT2: 

1356 bk.handle_format(data, rc) 

1357 elif rc == XL_FONT or rc == XL_FONT_B3B4: 

1358 bk.handle_font(data) 

1359 elif rc == XL_STYLE: 

1360 if not self.book._xf_epilogue_done: 

1361 self.book.xf_epilogue() 

1362 bk.handle_style(data) 

1363 elif rc == XL_PALETTE: 

1364 bk.handle_palette(data) 

1365 elif rc == XL_BUILTINFMTCOUNT: 

1366 bk.handle_builtinfmtcount(data) 

1367 elif rc == XL_XF4 or rc == XL_XF3 or rc == XL_XF2: #### N.B. not XL_XF 

1368 bk.handle_xf(data) 

1369 elif rc == XL_DATEMODE: 

1370 bk.handle_datemode(data) 

1371 elif rc == XL_CODEPAGE: 

1372 bk.handle_codepage(data) 

1373 elif rc == XL_FILEPASS: 

1374 bk.handle_filepass(data) 

1375 elif rc == XL_WRITEACCESS: 

1376 bk.handle_writeaccess(data) 

1377 elif rc == XL_IXFE: 

1378 self._ixfe = local_unpack('<H', data)[0] 

1379 elif rc == XL_NUMBER_B2: 

1380 rowx, colx, cell_attr, d = local_unpack('<HH3sd', data) 

1381 self_put_cell(rowx, colx, None, d, self.fixed_BIFF2_xfindex(cell_attr, rowx, colx)) 

1382 elif rc == XL_INTEGER: 

1383 rowx, colx, cell_attr, d = local_unpack('<HH3sH', data) 

1384 self_put_cell(rowx, colx, None, float(d), self.fixed_BIFF2_xfindex(cell_attr, rowx, colx)) 

1385 elif rc == XL_LABEL_B2: 

1386 rowx, colx, cell_attr = local_unpack('<HH3s', data[0:7]) 

1387 strg = unpack_string(data, 7, bk.encoding or bk.derive_encoding(), lenlen=1) 

1388 self_put_cell(rowx, colx, XL_CELL_TEXT, strg, self.fixed_BIFF2_xfindex(cell_attr, rowx, colx)) 

1389 elif rc == XL_BOOLERR_B2: 

1390 rowx, colx, cell_attr, value, is_err = local_unpack('<HH3sBB', data) 

1391 cellty = (XL_CELL_BOOLEAN, XL_CELL_ERROR)[is_err] 

1392 # if DEBUG: print "XL_BOOLERR_B2", rowx, colx, cell_attr, value, is_err 

1393 self_put_cell(rowx, colx, cellty, value, self.fixed_BIFF2_xfindex(cell_attr, rowx, colx)) 

1394 elif rc == XL_BLANK_B2: 

1395 if not fmt_info: continue 

1396 rowx, colx, cell_attr = local_unpack('<HH3s', data[:7]) 

1397 self_put_cell(rowx, colx, XL_CELL_BLANK, '', self.fixed_BIFF2_xfindex(cell_attr, rowx, colx)) 

1398 elif rc == XL_EFONT: 

1399 bk.handle_efont(data) 

1400 elif rc == XL_ROW_B2: 

1401 if not fmt_info: continue 

1402 rowx, bits1, bits2 = local_unpack('<H4xH2xB', data[0:11]) 

1403 if not(0 <= rowx < self.utter_max_rows): 

1404 print("*** NOTE: ROW_B2 record has row index %d; " 

1405 "should have 0 <= rowx < %d -- record ignored!" 

1406 % (rowx, self.utter_max_rows), file=self.logfile) 

1407 continue 

1408 if not (bits2 & 1): # has_default_xf_index is false 

1409 xf_index = -1 

1410 elif data_len == 18: 

1411 # Seems the XF index in the cell_attr is dodgy 

1412 xfx = local_unpack('<H', data[16:18])[0] 

1413 xf_index = self.fixed_BIFF2_xfindex(cell_attr=None, rowx=rowx, colx=-1, true_xfx=xfx) 

1414 else: 

1415 cell_attr = data[13:16] 

1416 xf_index = self.fixed_BIFF2_xfindex(cell_attr, rowx, colx=-1) 

1417 key = (bits1, bits2, xf_index) 

1418 r = rowinfo_sharing_dict.get(key) 

1419 if r is None: 

1420 rowinfo_sharing_dict[key] = r = Rowinfo() 

1421 r.height = bits1 & 0x7fff 

1422 r.has_default_height = (bits1 >> 15) & 1 

1423 r.has_default_xf_index = bits2 & 1 

1424 r.xf_index = xf_index 

1425 # r.outline_level = 0 # set in __init__ 

1426 # r.outline_group_starts_ends = 0 # set in __init__ 

1427 # r.hidden = 0 # set in __init__ 

1428 # r.height_mismatch = 0 # set in __init__ 

1429 # r.additional_space_above = 0 # set in __init__ 

1430 # r.additional_space_below = 0 # set in __init__ 

1431 self.rowinfo_map[rowx] = r 

1432 if 0 and r.xf_index > -1: 

1433 fprintf(self.logfile, 

1434 "**ROW %d %d %d\n", 

1435 self.number, rowx, r.xf_index) 

1436 if blah_rows: 

1437 print('ROW_B2', rowx, bits1, file=self.logfile) 

1438 r.dump(self.logfile, 

1439 header="--- sh #%d, rowx=%d ---" % (self.number, rowx)) 

1440 elif rc == XL_COLWIDTH: # BIFF2 only 

1441 if not fmt_info: continue 

1442 first_colx, last_colx, width\ 

1443 = local_unpack("<BBH", data[:4]) 

1444 if not(first_colx <= last_colx): 

1445 print("*** NOTE: COLWIDTH record has first col index %d, last %d; " 

1446 "should have first <= last -- record ignored!" 

1447 % (first_colx, last_colx), file=self.logfile) 

1448 continue 

1449 for colx in xrange(first_colx, last_colx+1): 

1450 if colx in self.colinfo_map: 

1451 c = self.colinfo_map[colx] 

1452 else: 

1453 c = Colinfo() 

1454 self.colinfo_map[colx] = c 

1455 c.width = width 

1456 if blah: 

1457 fprintf( 

1458 self.logfile, 

1459 "COLWIDTH sheet #%d cols %d-%d: wid=%d\n", 

1460 self.number, first_colx, last_colx, width, 

1461 ) 

1462 elif rc == XL_COLUMNDEFAULT: # BIFF2 only 

1463 if not fmt_info: continue 

1464 first_colx, last_colx = local_unpack("<HH", data[:4]) 

1465 #### Warning OOo docs wrong; first_colx <= colx < last_colx 

1466 if blah: 

1467 fprintf( 

1468 self.logfile, 

1469 "COLUMNDEFAULT sheet #%d cols in range(%d, %d)\n", 

1470 self.number, first_colx, last_colx, 

1471 ) 

1472 if not(0 <= first_colx < last_colx <= 256): 

1473 print("*** NOTE: COLUMNDEFAULT record has first col index %d, last %d; " 

1474 "should have 0 <= first < last <= 256" 

1475 % (first_colx, last_colx), file=self.logfile) 

1476 last_colx = min(last_colx, 256) 

1477 for colx in xrange(first_colx, last_colx): 

1478 offset = 4 + 3 * (colx - first_colx) 

1479 cell_attr = data[offset:offset+3] 

1480 xf_index = self.fixed_BIFF2_xfindex(cell_attr, rowx=-1, colx=colx) 

1481 if colx in self.colinfo_map: 

1482 c = self.colinfo_map[colx] 

1483 else: 

1484 c = Colinfo() 

1485 self.colinfo_map[colx] = c 

1486 c.xf_index = xf_index 

1487 elif rc == XL_WINDOW2_B2: # BIFF 2 only 

1488 attr_names = ("show_formulas", "show_grid_lines", "show_sheet_headers", 

1489 "panes_are_frozen", "show_zero_values") 

1490 for attr, char in zip(attr_names, data[0:5]): 

1491 setattr(self, attr, int(char != b'\0')) 

1492 ( 

1493 self.first_visible_rowx, self.first_visible_colx, 

1494 self.automatic_grid_line_colour, 

1495 ) = unpack("<HHB", data[5:10]) 

1496 self.gridline_colour_rgb = unpack("<BBB", data[10:13]) 

1497 self.gridline_colour_index = nearest_colour_index( 

1498 self.book.colour_map, self.gridline_colour_rgb, debug=0) 

1499 else: 

1500 # if DEBUG: print "SHEET.READ: Unhandled record type %02x %d bytes %r" % (rc, data_len, data) 

1501 pass 

1502 if not eof_found: 

1503 raise XLRDError("Sheet %d (%r) missing EOF record" 

1504 % (self.number, self.name)) 

1505 self.tidy_dimensions() 

1506 self.update_cooked_mag_factors() 

1507 bk._position = oldpos 

1508 return 1 

1509 

1510 def string_record_contents(self, data): 

1511 bv = self.biff_version 

1512 bk = self.book 

1513 lenlen = (bv >= 30) + 1 

1514 nchars_expected = unpack("<" + "BH"[lenlen - 1], data[:lenlen])[0] 

1515 offset = lenlen 

1516 if bv < 80: 

1517 enc = bk.encoding or bk.derive_encoding() 

1518 nchars_found = 0 

1519 result = UNICODE_LITERAL("") 

1520 while 1: 

1521 if bv >= 80: 

1522 flag = BYTES_ORD(data[offset]) & 1 

1523 enc = ("latin_1", "utf_16_le")[flag] 

1524 offset += 1 

1525 chunk = unicode(data[offset:], enc) 

1526 result += chunk 

1527 nchars_found += len(chunk) 

1528 if nchars_found == nchars_expected: 

1529 return result 

1530 if nchars_found > nchars_expected: 

1531 msg = ("STRING/CONTINUE: expected %d chars, found %d" 

1532 % (nchars_expected, nchars_found)) 

1533 raise XLRDError(msg) 

1534 rc, _unused_len, data = bk.get_record_parts() 

1535 if rc != XL_CONTINUE: 

1536 raise XLRDError( 

1537 "Expected CONTINUE record; found record-type 0x%04X" % rc) 

1538 offset = 0 

1539 

1540 def update_cooked_mag_factors(self): 

1541 # Cached values are used ONLY for the non-active view mode. 

1542 # When the user switches to the non-active view mode, 

1543 # if the cached value for that mode is not valid, 

1544 # Excel pops up a window which says: 

1545 # "The number must be between 10 and 400. Try again by entering a number in this range." 

1546 # When the user hits OK, it drops into the non-active view mode 

1547 # but uses the magn from the active mode. 

1548 # NOTE: definition of "valid" depends on mode ... see below 

1549 blah = DEBUG or self.verbosity > 0 

1550 if self.show_in_page_break_preview: 

1551 if self.scl_mag_factor is None: # no SCL record 

1552 self.cooked_page_break_preview_mag_factor = 100 # Yes, 100, not 60, NOT a typo 

1553 else: 

1554 self.cooked_page_break_preview_mag_factor = self.scl_mag_factor 

1555 zoom = self.cached_normal_view_mag_factor 

1556 if not (10 <= zoom <=400): 

1557 if blah: 

1558 print( 

1559 "WARNING *** WINDOW2 rcd sheet %d: Bad cached_normal_view_mag_factor: %d" 

1560 % (self.number, self.cached_normal_view_mag_factor), 

1561 file=self.logfile, 

1562 ) 

1563 zoom = self.cooked_page_break_preview_mag_factor 

1564 self.cooked_normal_view_mag_factor = zoom 

1565 else: 

1566 # normal view mode 

1567 if self.scl_mag_factor is None: # no SCL record 

1568 self.cooked_normal_view_mag_factor = 100 

1569 else: 

1570 self.cooked_normal_view_mag_factor = self.scl_mag_factor 

1571 zoom = self.cached_page_break_preview_mag_factor 

1572 if not zoom: 

1573 # VALID, defaults to 60 

1574 zoom = 60 

1575 elif not (10 <= zoom <= 400): 

1576 if blah: 

1577 print( 

1578 "WARNING *** WINDOW2 rcd sheet %r: Bad cached_page_break_preview_mag_factor: %r" 

1579 % (self.number, self.cached_page_break_preview_mag_factor), 

1580 file=self.logfile, 

1581 ) 

1582 zoom = self.cooked_normal_view_mag_factor 

1583 self.cooked_page_break_preview_mag_factor = zoom 

1584 

1585 def fixed_BIFF2_xfindex(self, cell_attr, rowx, colx, true_xfx=None): 

1586 DEBUG = 0 

1587 blah = DEBUG or self.verbosity >= 2 

1588 if self.biff_version == 21: 

1589 if self.book.xf_list: 

1590 if true_xfx is not None: 

1591 xfx = true_xfx 

1592 else: 

1593 xfx = BYTES_ORD(cell_attr[0]) & 0x3F 

1594 if xfx == 0x3F: 

1595 if self._ixfe is None: 

1596 raise XLRDError("BIFF2 cell record has XF index 63 but no preceding IXFE record.") 

1597 xfx = self._ixfe 

1598 # OOo docs are capable of interpretation that each 

1599 # cell record is preceded immediately by its own IXFE record. 

1600 # Empirical evidence is that (sensibly) an IXFE record applies to all 

1601 # following cell records until another IXFE comes along. 

1602 return xfx 

1603 # Have either Excel 2.0, or broken 2.1 w/o XF records -- same effect. 

1604 self.biff_version = self.book.biff_version = 20 

1605 #### check that XF slot in cell_attr is zero 

1606 xfx_slot = BYTES_ORD(cell_attr[0]) & 0x3F 

1607 assert xfx_slot == 0 

1608 xfx = self._cell_attr_to_xfx.get(cell_attr) 

1609 if xfx is not None: 

1610 return xfx 

1611 if blah: 

1612 fprintf(self.logfile, "New cell_attr %r at (%r, %r)\n", cell_attr, rowx, colx) 

1613 if not self.book.xf_list: 

1614 for xfx in xrange(16): 

1615 self.insert_new_BIFF20_xf(cell_attr=b"\x40\x00\x00", style=xfx < 15) 

1616 xfx = self.insert_new_BIFF20_xf(cell_attr=cell_attr) 

1617 return xfx 

1618 

1619 def insert_new_BIFF20_xf(self, cell_attr, style=0): 

1620 DEBUG = 0 

1621 blah = DEBUG or self.verbosity >= 2 

1622 book = self.book 

1623 xfx = len(book.xf_list) 

1624 xf = self.fake_XF_from_BIFF20_cell_attr(cell_attr, style) 

1625 xf.xf_index = xfx 

1626 book.xf_list.append(xf) 

1627 if blah: 

1628 xf.dump(self.logfile, header="=== Faked XF %d ===" % xfx, footer="======") 

1629 if xf.format_key not in book.format_map: 

1630 if xf.format_key: 

1631 msg = "ERROR *** XF[%d] unknown format key (%d, 0x%04x)\n" 

1632 fprintf(self.logfile, msg, 

1633 xf.xf_index, xf.format_key, xf.format_key) 

1634 fmt = Format(xf.format_key, FUN, UNICODE_LITERAL("General")) 

1635 book.format_map[xf.format_key] = fmt 

1636 book.format_list.append(fmt) 

1637 cellty_from_fmtty = { 

1638 FNU: XL_CELL_NUMBER, 

1639 FUN: XL_CELL_NUMBER, 

1640 FGE: XL_CELL_NUMBER, 

1641 FDT: XL_CELL_DATE, 

1642 FTX: XL_CELL_NUMBER, # Yes, a number can be formatted as text. 

1643 } 

1644 fmt = book.format_map[xf.format_key] 

1645 cellty = cellty_from_fmtty[fmt.type] 

1646 self._xf_index_to_xl_type_map[xf.xf_index] = cellty 

1647 self._cell_attr_to_xfx[cell_attr] = xfx 

1648 return xfx 

1649 

1650 def fake_XF_from_BIFF20_cell_attr(self, cell_attr, style=0): 

1651 from .formatting import XF, XFAlignment, XFBorder, XFBackground, XFProtection 

1652 xf = XF() 

1653 xf.alignment = XFAlignment() 

1654 xf.alignment.indent_level = 0 

1655 xf.alignment.shrink_to_fit = 0 

1656 xf.alignment.text_direction = 0 

1657 xf.border = XFBorder() 

1658 xf.border.diag_up = 0 

1659 xf.border.diag_down = 0 

1660 xf.border.diag_colour_index = 0 

1661 xf.border.diag_line_style = 0 # no line 

1662 xf.background = XFBackground() 

1663 xf.protection = XFProtection() 

1664 (prot_bits, font_and_format, halign_etc) = unpack('<BBB', cell_attr) 

1665 xf.format_key = font_and_format & 0x3F 

1666 xf.font_index = (font_and_format & 0xC0) >> 6 

1667 upkbits(xf.protection, prot_bits, ( 

1668 (6, 0x40, 'cell_locked'), 

1669 (7, 0x80, 'formula_hidden'), 

1670 )) 

1671 xf.alignment.hor_align = halign_etc & 0x07 

1672 for mask, side in ((0x08, 'left'), (0x10, 'right'), (0x20, 'top'), (0x40, 'bottom')): 

1673 if halign_etc & mask: 

1674 colour_index, line_style = 8, 1 # black, thin 

1675 else: 

1676 colour_index, line_style = 0, 0 # none, none 

1677 setattr(xf.border, side + '_colour_index', colour_index) 

1678 setattr(xf.border, side + '_line_style', line_style) 

1679 bg = xf.background 

1680 if halign_etc & 0x80: 

1681 bg.fill_pattern = 17 

1682 else: 

1683 bg.fill_pattern = 0 

1684 bg.background_colour_index = 9 # white 

1685 bg.pattern_colour_index = 8 # black 

1686 xf.parent_style_index = (0x0FFF, 0)[style] 

1687 xf.alignment.vert_align = 2 # bottom 

1688 xf.alignment.rotation = 0 

1689 attr_stems = [ 

1690 'format', 

1691 'font', 

1692 'alignment', 

1693 'border', 

1694 'background', 

1695 'protection', 

1696 ] 

1697 for attr_stem in attr_stems: 

1698 attr = "_" + attr_stem + "_flag" 

1699 setattr(xf, attr, 1) 

1700 return xf 

1701 

1702 def req_fmt_info(self): 

1703 if not self.formatting_info: 

1704 raise XLRDError("Feature requires open_workbook(..., formatting_info=True)") 

1705 

1706 def computed_column_width(self, colx): 

1707 """ 

1708 Determine column display width. 

1709 

1710 :param colx: 

1711 Index of the queried column, range 0 to 255. 

1712 Note that it is possible to find out the width that will be used to 

1713 display columns with no cell information e.g. column IV (colx=255). 

1714 

1715 :return: 

1716 The column width that will be used for displaying 

1717 the given column by Excel, in units of 1/256th of the width of a 

1718 standard character (the digit zero in the first font). 

1719 

1720 .. versionadded:: 0.6.1 

1721 """ 

1722 self.req_fmt_info() 

1723 if self.biff_version >= 80: 

1724 colinfo = self.colinfo_map.get(colx, None) 

1725 if colinfo is not None: 

1726 return colinfo.width 

1727 if self.standardwidth is not None: 

1728 return self.standardwidth 

1729 elif self.biff_version >= 40: 

1730 if self.gcw[colx]: 

1731 if self.standardwidth is not None: 

1732 return self.standardwidth 

1733 else: 

1734 colinfo = self.colinfo_map.get(colx, None) 

1735 if colinfo is not None: 

1736 return colinfo.width 

1737 elif self.biff_version == 30: 

1738 colinfo = self.colinfo_map.get(colx, None) 

1739 if colinfo is not None: 

1740 return colinfo.width 

1741 # All roads lead to Rome and the DEFCOLWIDTH ... 

1742 if self.defcolwidth is not None: 

1743 return self.defcolwidth * 256 

1744 return 8 * 256 # 8 is what Excel puts in a DEFCOLWIDTH record 

1745 

1746 def handle_hlink(self, data): 

1747 # DEBUG = 1 

1748 if DEBUG: print("\n=== hyperlink ===", file=self.logfile) 

1749 record_size = len(data) 

1750 h = Hyperlink() 

1751 h.frowx, h.lrowx, h.fcolx, h.lcolx, guid0, dummy, options = unpack('<HHHH16s4si', data[:32]) 

1752 assert guid0 == b"\xD0\xC9\xEA\x79\xF9\xBA\xCE\x11\x8C\x82\x00\xAA\x00\x4B\xA9\x0B" 

1753 assert dummy == b"\x02\x00\x00\x00" 

1754 if DEBUG: print("options: %08X" % options, file=self.logfile) 

1755 offset = 32 

1756 

1757 def get_nul_terminated_unicode(buf, ofs): 

1758 nb = unpack('<L', buf[ofs:ofs+4])[0] * 2 

1759 ofs += 4 

1760 uc = unicode(buf[ofs:ofs+nb], 'UTF-16le')[:-1] 

1761 ofs += nb 

1762 return uc, ofs 

1763 

1764 if options & 0x14: # has a description 

1765 h.desc, offset = get_nul_terminated_unicode(data, offset) 

1766 

1767 if options & 0x80: # has a target 

1768 h.target, offset = get_nul_terminated_unicode(data, offset) 

1769 

1770 if (options & 1) and not (options & 0x100): # HasMoniker and not MonikerSavedAsString 

1771 # an OLEMoniker structure 

1772 clsid, = unpack('<16s', data[offset:offset + 16]) 

1773 if DEBUG: fprintf(self.logfile, "clsid=%r\n", clsid) 

1774 offset += 16 

1775 if clsid == b"\xE0\xC9\xEA\x79\xF9\xBA\xCE\x11\x8C\x82\x00\xAA\x00\x4B\xA9\x0B": 

1776 # E0H C9H EAH 79H F9H BAH CEH 11H 8CH 82H 00H AAH 00H 4BH A9H 0BH 

1777 # URL Moniker 

1778 h.type = UNICODE_LITERAL('url') 

1779 nbytes = unpack('<L', data[offset:offset + 4])[0] 

1780 offset += 4 

1781 h.url_or_path = unicode(data[offset:offset + nbytes], 'UTF-16le') 

1782 if DEBUG: fprintf(self.logfile, "initial url=%r len=%d\n", h.url_or_path, len(h.url_or_path)) 

1783 endpos = h.url_or_path.find('\x00') 

1784 if DEBUG: print("endpos=%d" % endpos, file=self.logfile) 

1785 h.url_or_path = h.url_or_path[:endpos] 

1786 true_nbytes = 2 * (endpos + 1) 

1787 offset += true_nbytes 

1788 extra_nbytes = nbytes - true_nbytes 

1789 extra_data = data[offset:offset + extra_nbytes] 

1790 offset += extra_nbytes 

1791 if DEBUG: 

1792 fprintf( 

1793 self.logfile, 

1794 "url=%r\nextra=%r\nnbytes=%d true_nbytes=%d extra_nbytes=%d\n", 

1795 h.url_or_path, extra_data, nbytes, true_nbytes, extra_nbytes, 

1796 ) 

1797 assert extra_nbytes in (24, 0) 

1798 elif clsid == b"\x03\x03\x00\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46": 

1799 # file moniker 

1800 h.type = UNICODE_LITERAL('local file') 

1801 uplevels, nbytes = unpack("<Hi", data[offset:offset + 6]) 

1802 offset += 6 

1803 shortpath = b"..\\" * uplevels + data[offset:offset + nbytes - 1] #### BYTES, not unicode 

1804 if DEBUG: fprintf(self.logfile, "uplevels=%d shortpath=%r\n", uplevels, shortpath) 

1805 offset += nbytes 

1806 offset += 24 # OOo: "unknown byte sequence" 

1807 # above is version 0xDEAD + 20 reserved zero bytes 

1808 sz = unpack('<i', data[offset:offset + 4])[0] 

1809 if DEBUG: print("sz=%d" % sz, file=self.logfile) 

1810 offset += 4 

1811 if sz: 

1812 xl = unpack('<i', data[offset:offset + 4])[0] 

1813 offset += 4 

1814 offset += 2 # "unknown byte sequence" MS: 0x0003 

1815 extended_path = unicode(data[offset:offset + xl], 'UTF-16le') # not zero-terminated 

1816 offset += xl 

1817 h.url_or_path = extended_path 

1818 else: 

1819 h.url_or_path = shortpath 

1820 #### MS KLUDGE WARNING #### 

1821 # The "shortpath" is bytes encoded in the **UNKNOWN** creator's "ANSI" encoding. 

1822 else: 

1823 fprintf(self.logfile, "*** unknown clsid %r\n", clsid) 

1824 elif options & 0x163 == 0x103: # UNC 

1825 h.type = UNICODE_LITERAL('unc') 

1826 h.url_or_path, offset = get_nul_terminated_unicode(data, offset) 

1827 elif options & 0x16B == 8: 

1828 h.type = UNICODE_LITERAL('workbook') 

1829 else: 

1830 h.type = UNICODE_LITERAL('unknown') 

1831 

1832 if options & 0x8: # has textmark 

1833 h.textmark, offset = get_nul_terminated_unicode(data, offset) 

1834 

1835 if DEBUG: 

1836 h.dump(header="... object dump ...") 

1837 print("offset=%d record_size=%d" % (offset, record_size)) 

1838 

1839 extra_nbytes = record_size - offset 

1840 if extra_nbytes > 0: 

1841 fprintf( 

1842 self.logfile, 

1843 "*** WARNING: hyperlink at R%dC%d has %d extra data bytes: %s\n", 

1844 h.frowx + 1, 

1845 h.fcolx + 1, 

1846 extra_nbytes, 

1847 REPR(data[-extra_nbytes:]), 

1848 ) 

1849 # Seen: b"\x00\x00" also b"A\x00", b"V\x00" 

1850 elif extra_nbytes < 0: 

1851 raise XLRDError("Bug or corrupt file, send copy of input file for debugging") 

1852 

1853 self.hyperlink_list.append(h) 

1854 for rowx in xrange(h.frowx, h.lrowx+1): 

1855 for colx in xrange(h.fcolx, h.lcolx+1): 

1856 self.hyperlink_map[rowx, colx] = h 

1857 

1858 def handle_quicktip(self, data): 

1859 rcx, frowx, lrowx, fcolx, lcolx = unpack('<5H', data[:10]) 

1860 assert rcx == XL_QUICKTIP 

1861 assert self.hyperlink_list 

1862 h = self.hyperlink_list[-1] 

1863 assert (frowx, lrowx, fcolx, lcolx) == (h.frowx, h.lrowx, h.fcolx, h.lcolx) 

1864 assert data[-2:] == b'\x00\x00' 

1865 h.quicktip = unicode(data[10:-2], 'utf_16_le') 

1866 

1867 def handle_msodrawingetc(self, recid, data_len, data): 

1868 if not OBJ_MSO_DEBUG: 

1869 return 

1870 DEBUG = 1 

1871 if self.biff_version < 80: 

1872 return 

1873 o = MSODrawing() 

1874 pos = 0 

1875 while pos < data_len: 

1876 tmp, fbt, cb = unpack('<HHI', data[pos:pos+8]) 

1877 ver = tmp & 0xF 

1878 inst = (tmp >> 4) & 0xFFF 

1879 if ver == 0xF: 

1880 ndb = 0 # container 

1881 else: 

1882 ndb = cb 

1883 if DEBUG: 

1884 hex_char_dump(data, pos, ndb + 8, base=0, fout=self.logfile) 

1885 fprintf(self.logfile, 

1886 "fbt:0x%04X inst:%d ver:0x%X cb:%d (0x%04X)\n", 

1887 fbt, inst, ver, cb, cb) 

1888 if fbt == 0xF010: # Client Anchor 

1889 assert ndb == 18 

1890 (o.anchor_unk, 

1891 o.anchor_colx_lo, o.anchor_rowx_lo, 

1892 o.anchor_colx_hi, o.anchor_rowx_hi) = unpack('<Hiiii', data[pos+8:pos+8+ndb]) 

1893 elif fbt == 0xF011: # Client Data 

1894 # must be followed by an OBJ record 

1895 assert cb == 0 

1896 assert pos + 8 == data_len 

1897 else: 

1898 pass 

1899 pos += ndb + 8 

1900 else: 

1901 # didn't break out of while loop 

1902 assert pos == data_len 

1903 if DEBUG: 

1904 o.dump(self.logfile, header="=== MSODrawing ===", footer= " ") 

1905 

1906 

1907 def handle_obj(self, data): 

1908 if self.biff_version < 80: 

1909 return None 

1910 o = MSObj() 

1911 data_len = len(data) 

1912 pos = 0 

1913 if OBJ_MSO_DEBUG: 

1914 fprintf(self.logfile, "... OBJ record len=%d...\n", data_len) 

1915 while pos < data_len: 

1916 ft, cb = unpack('<HH', data[pos:pos+4]) 

1917 if OBJ_MSO_DEBUG: 

1918 fprintf(self.logfile, "pos=%d ft=0x%04X cb=%d\n", pos, ft, cb) 

1919 hex_char_dump(data, pos, cb + 4, base=0, fout=self.logfile) 

1920 if pos == 0 and not (ft == 0x15 and cb == 18): 

1921 if self.verbosity: 

1922 fprintf(self.logfile, "*** WARNING Ignoring antique or corrupt OBJECT record\n") 

1923 return None 

1924 if ft == 0x15: # ftCmo ... s/b first 

1925 assert pos == 0 

1926 o.type, o.id, option_flags = unpack('<HHH', data[pos+4:pos+10]) 

1927 upkbits(o, option_flags, ( 

1928 ( 0, 0x0001, 'locked'), 

1929 ( 4, 0x0010, 'printable'), 

1930 ( 8, 0x0100, 'autofilter'), # not documented in Excel 97 dev kit 

1931 ( 9, 0x0200, 'scrollbar_flag'), # not documented in Excel 97 dev kit 

1932 (13, 0x2000, 'autofill'), 

1933 (14, 0x4000, 'autoline'), 

1934 )) 

1935 elif ft == 0x00: 

1936 if data[pos:data_len] == b'\0' * (data_len - pos): 

1937 # ignore "optional reserved" data at end of record 

1938 break 

1939 msg = "Unexpected data at end of OBJECT record" 

1940 fprintf(self.logfile, "*** ERROR %s\n" % msg) 

1941 hex_char_dump(data, pos, data_len - pos, base=0, fout=self.logfile) 

1942 raise XLRDError(msg) 

1943 elif ft == 0x0C: # Scrollbar 

1944 values = unpack('<5H', data[pos+8:pos+18]) 

1945 for value, tag in zip(values, ('value', 'min', 'max', 'inc', 'page')): 

1946 setattr(o, 'scrollbar_' + tag, value) 

1947 elif ft == 0x0D: # "Notes structure" [used for cell comments] 

1948 # not documented in Excel 97 dev kit 

1949 if OBJ_MSO_DEBUG: fprintf(self.logfile, "*** OBJ record has ft==0x0D 'notes' structure\n") 

1950 elif ft == 0x13: # list box data 

1951 if o.autofilter: # non standard exit. NOT documented 

1952 break 

1953 else: 

1954 pass 

1955 pos += cb + 4 

1956 else: 

1957 # didn't break out of while loop 

1958 pass 

1959 if OBJ_MSO_DEBUG: 

1960 o.dump(self.logfile, header="=== MSOBj ===", footer= " ") 

1961 return o 

1962 

1963 def handle_note(self, data, txos): 

1964 if OBJ_MSO_DEBUG: 

1965 fprintf(self.logfile, '... NOTE record ...\n') 

1966 hex_char_dump(data, 0, len(data), base=0, fout=self.logfile) 

1967 o = Note() 

1968 data_len = len(data) 

1969 if self.biff_version < 80: 

1970 o.rowx, o.colx, expected_bytes = unpack('<HHH', data[:6]) 

1971 nb = len(data) - 6 

1972 assert nb <= expected_bytes 

1973 pieces = [data[6:]] 

1974 expected_bytes -= nb 

1975 while expected_bytes > 0: 

1976 rc2, data2_len, data2 = self.book.get_record_parts() 

1977 assert rc2 == XL_NOTE 

1978 dummy_rowx, nb = unpack('<H2xH', data2[:6]) 

1979 assert dummy_rowx == 0xFFFF 

1980 assert nb == data2_len - 6 

1981 pieces.append(data2[6:]) 

1982 expected_bytes -= nb 

1983 assert expected_bytes == 0 

1984 enc = self.book.encoding or self.book.derive_encoding() 

1985 o.text = unicode(b''.join(pieces), enc) 

1986 o.rich_text_runlist = [(0, 0)] 

1987 o.show = 0 

1988 o.row_hidden = 0 

1989 o.col_hidden = 0 

1990 o.author = UNICODE_LITERAL('') 

1991 o._object_id = None 

1992 self.cell_note_map[o.rowx, o.colx] = o 

1993 return 

1994 # Excel 8.0+ 

1995 o.rowx, o.colx, option_flags, o._object_id = unpack('<4H', data[:8]) 

1996 o.show = (option_flags >> 1) & 1 

1997 o.row_hidden = (option_flags >> 7) & 1 

1998 o.col_hidden = (option_flags >> 8) & 1 

1999 # XL97 dev kit book says NULL [sic] bytes padding between string count and string data 

2000 # to ensure that string is word-aligned. Appears to be nonsense. 

2001 o.author, endpos = unpack_unicode_update_pos(data, 8, lenlen=2) 

2002 # There is a random/undefined byte after the author string (not counted in the 

2003 # string length). 

2004 # Issue 4 on github: Google Spreadsheet doesn't write the undefined byte. 

2005 assert (data_len - endpos) in (0, 1) 

2006 if OBJ_MSO_DEBUG: 

2007 o.dump(self.logfile, header="=== Note ===", footer= " ") 

2008 txo = txos.get(o._object_id) 

2009 if txo: 

2010 o.text = txo.text 

2011 o.rich_text_runlist = txo.rich_text_runlist 

2012 self.cell_note_map[o.rowx, o.colx] = o 

2013 

2014 def handle_txo(self, data): 

2015 if self.biff_version < 80: 

2016 return 

2017 o = MSTxo() 

2018 fmt = '<HH6sHHH' 

2019 fmtsize = calcsize(fmt) 

2020 option_flags, o.rot, controlInfo, cchText, cbRuns, o.ifntEmpty = unpack(fmt, data[:fmtsize]) 

2021 o.fmla = data[fmtsize:] 

2022 upkbits(o, option_flags, ( 

2023 ( 3, 0x000E, 'horz_align'), 

2024 ( 6, 0x0070, 'vert_align'), 

2025 ( 9, 0x0200, 'lock_text'), 

2026 (14, 0x4000, 'just_last'), 

2027 (15, 0x8000, 'secret_edit'), 

2028 )) 

2029 totchars = 0 

2030 o.text = UNICODE_LITERAL('') 

2031 while totchars < cchText: 

2032 rc2, data2_len, data2 = self.book.get_record_parts() 

2033 assert rc2 == XL_CONTINUE 

2034 if OBJ_MSO_DEBUG: 

2035 hex_char_dump(data2, 0, data2_len, base=0, fout=self.logfile) 

2036 nb = BYTES_ORD(data2[0]) # 0 means latin1, 1 means utf_16_le 

2037 nchars = data2_len - 1 

2038 if nb: 

2039 assert nchars % 2 == 0 

2040 nchars //= 2 

2041 utext, endpos = unpack_unicode_update_pos(data2, 0, known_len=nchars) 

2042 assert endpos == data2_len 

2043 o.text += utext 

2044 totchars += nchars 

2045 o.rich_text_runlist = [] 

2046 totruns = 0 

2047 while totruns < cbRuns: # counts of BYTES, not runs 

2048 rc3, data3_len, data3 = self.book.get_record_parts() 

2049 # print totruns, cbRuns, rc3, data3_len, repr(data3) 

2050 assert rc3 == XL_CONTINUE 

2051 assert data3_len % 8 == 0 

2052 for pos in xrange(0, data3_len, 8): 

2053 run = unpack('<HH4x', data3[pos:pos+8]) 

2054 o.rich_text_runlist.append(run) 

2055 totruns += 8 

2056 # remove trailing entries that point to the end of the string 

2057 while o.rich_text_runlist and o.rich_text_runlist[-1][0] == cchText: 

2058 del o.rich_text_runlist[-1] 

2059 if OBJ_MSO_DEBUG: 

2060 o.dump(self.logfile, header="=== MSTxo ===", footer= " ") 

2061 print(o.rich_text_runlist, file=self.logfile) 

2062 return o 

2063 

2064 def handle_feat11(self, data): 

2065 if not OBJ_MSO_DEBUG: 

2066 return 

2067 # rt: Record type; this matches the BIFF rt in the first two bytes of the record; =0872h 

2068 # grbitFrt: FRT cell reference flag (see table below for details) 

2069 # Ref0: Range reference to a worksheet cell region if grbitFrt=1 (bitFrtRef). Otherwise blank. 

2070 # isf: Shared feature type index =5 for Table 

2071 # fHdr: =0 since this is for feat not feat header 

2072 # reserved0: Reserved for future use =0 for Table 

2073 # cref: Count of ref ranges this feature is on 

2074 # cbFeatData: Count of byte for the current feature data. 

2075 # reserved1: =0 currently not used 

2076 # Ref1: Repeat of Ref0. UNDOCUMENTED 

2077 rt, grbitFrt, Ref0, isf, fHdr, reserved0, cref, cbFeatData, reserved1, Ref1 = unpack('<HH8sHBiHiH8s', data[0:35]) 

2078 assert reserved0 == 0 

2079 assert reserved1 == 0 

2080 assert isf == 5 

2081 assert rt == 0x872 

2082 assert fHdr == 0 

2083 assert Ref1 == Ref0 

2084 print(self.logfile, "FEAT11: grbitFrt=%d Ref0=%r cref=%d cbFeatData=%d\n", grbitFrt, Ref0, cref, cbFeatData) 

2085 # lt: Table data source type: 

2086 # =0 for Excel Worksheet Table =1 for read-write SharePoint linked List 

2087 # =2 for XML mapper Table =3 for Query Table 

2088 # idList: The ID of the Table (unique per worksheet) 

2089 # crwHeader: How many header/title rows the Table has at the top 

2090 # crwTotals: How many total rows the Table has at the bottom 

2091 # idFieldNext: Next id to try when assigning a unique id to a new field 

2092 # cbFSData: The size of the Fixed Data portion of the Table data structure. 

2093 # rupBuild: the rupBuild that generated the record 

2094 # unusedShort: UNUSED short that can be used later. The value is reserved during round-tripping. 

2095 # listFlags: Collection of bit flags: (see listFlags' bit setting table below for detail.) 

2096 # lPosStmCache: Table data stream position of cached data 

2097 # cbStmCache: Count of bytes of cached data 

2098 # cchStmCache: Count of characters of uncompressed cached data in the stream 

2099 # lem: Table edit mode (see List (Table) Editing Mode (lem) setting table below for details.) 

2100 # rgbHashParam: Hash value for SharePoint Table 

2101 # cchName: Count of characters in the Table name string rgbName 

2102 (lt, idList, crwHeader, crwTotals, idFieldNext, cbFSData, 

2103 rupBuild, unusedShort, listFlags, lPosStmCache, cbStmCache, 

2104 cchStmCache, lem, rgbHashParam, cchName) = unpack('<iiiiiiHHiiiii16sH', data[35:35+66]) 

2105 print("lt=%d idList=%d crwHeader=%d crwTotals=%d idFieldNext=%d cbFSData=%d\n" 

2106 "rupBuild=%d unusedShort=%d listFlags=%04X lPosStmCache=%d cbStmCache=%d\n" 

2107 "cchStmCache=%d lem=%d rgbHashParam=%r cchName=%d" % ( 

2108 lt, idList, crwHeader, crwTotals, idFieldNext, cbFSData, 

2109 rupBuild, unusedShort,listFlags, lPosStmCache, cbStmCache, 

2110 cchStmCache, lem, rgbHashParam, cchName), file=self.logfile) 

2111 

2112 def __repr__(self): 

2113 return "Sheet {:>2}:<{}>".format(self.number, self.name) 

2114 

2115 

2116class MSODrawing(BaseObject): 

2117 pass 

2118 

2119 

2120class MSObj(BaseObject): 

2121 pass 

2122 

2123 

2124class MSTxo(BaseObject): 

2125 pass 

2126 

2127 

2128class Note(BaseObject): 

2129 """ 

2130 Represents a user "comment" or "note". 

2131 Note objects are accessible through :attr:`Sheet.cell_note_map`. 

2132 

2133 .. versionadded:: 0.7.2 

2134 """ 

2135 

2136 #: Author of note 

2137 author = UNICODE_LITERAL('') 

2138 

2139 #: ``True`` if the containing column is hidden 

2140 col_hidden = 0 

2141 

2142 #: Column index 

2143 colx = 0 

2144 

2145 #: List of ``(offset_in_string, font_index)`` tuples. 

2146 #: Unlike :attr:`Sheet.rich_text_runlist_map`, the first offset should 

2147 #: always be 0. 

2148 rich_text_runlist = None 

2149 

2150 #: True if the containing row is hidden 

2151 row_hidden = 0 

2152 

2153 #: Row index 

2154 rowx = 0 

2155 

2156 #: True if note is always shown 

2157 show = 0 

2158 

2159 #: Text of the note 

2160 text = UNICODE_LITERAL('') 

2161 

2162 

2163class Hyperlink(BaseObject): 

2164 """ 

2165 Contains the attributes of a hyperlink. 

2166 Hyperlink objects are accessible through :attr:`Sheet.hyperlink_list` 

2167 and :attr:`Sheet.hyperlink_map`. 

2168 

2169 .. versionadded:: 0.7.2 

2170 """ 

2171 

2172 #: Index of first row 

2173 frowx = None 

2174 

2175 #: Index of last row 

2176 lrowx = None 

2177 

2178 #: Index of first column 

2179 fcolx = None 

2180 

2181 #: Index of last column 

2182 lcolx = None 

2183 

2184 #: Type of hyperlink. Unicode string, one of 'url', 'unc', 

2185 #: 'local file', 'workbook', 'unknown' 

2186 type = None 

2187 

2188 #: The URL or file-path, depending in the type. Unicode string, except 

2189 #: in the rare case of a local but non-existent file with non-ASCII 

2190 #: characters in the name, in which case only the "8.3" filename is 

2191 #: available, as a :class:`bytes` (3.x) or :class:`str` (2.x) string, 

2192 #: *with unknown encoding.* 

2193 url_or_path = None 

2194 

2195 #: Description. 

2196 #: This is displayed in the cell, 

2197 #: and should be identical to the cell value. Unicode string, or ``None``. 

2198 #: It seems impossible NOT to have a description created by the Excel UI. 

2199 desc = None 

2200 

2201 #: Target frame. Unicode string. 

2202 #: 

2203 #: .. note:: 

2204 #: No cases of this have been seen in the wild. 

2205 #: It seems impossible to create one in the Excel UI. 

2206 target = None 

2207 

2208 #: The piece after the "#" in 

2209 #: "http://docs.python.org/library#struct_module", or the ``Sheet1!A1:Z99`` 

2210 #: part when type is "workbook". 

2211 textmark = None 

2212 

2213 #: The text of the "quick tip" displayed when the cursor 

2214 #: hovers over the hyperlink. 

2215 quicktip = None 

2216 

2217# === helpers === 

2218 

2219def unpack_RK(rk_str): 

2220 flags = BYTES_ORD(rk_str[0]) 

2221 if flags & 2: 

2222 # There's a SIGNED 30-bit integer in there! 

2223 i, = unpack('<i', rk_str) 

2224 i >>= 2 # div by 4 to drop the 2 flag bits 

2225 if flags & 1: 

2226 return i / 100.0 

2227 return float(i) 

2228 else: 

2229 # It's the most significant 30 bits of an IEEE 754 64-bit FP number 

2230 d, = unpack('<d', b'\0\0\0\0' + BYTES_LITERAL(chr(flags & 252)) + rk_str[1:4]) 

2231 if flags & 1: 

2232 return d / 100.0 

2233 return d 

2234 

2235##### =============== Cell ======================================== ##### 

2236 

2237cellty_from_fmtty = { 

2238 FNU: XL_CELL_NUMBER, 

2239 FUN: XL_CELL_NUMBER, 

2240 FGE: XL_CELL_NUMBER, 

2241 FDT: XL_CELL_DATE, 

2242 FTX: XL_CELL_NUMBER, # Yes, a number can be formatted as text. 

2243} 

2244 

2245ctype_text = { 

2246 XL_CELL_EMPTY: 'empty', 

2247 XL_CELL_TEXT: 'text', 

2248 XL_CELL_NUMBER: 'number', 

2249 XL_CELL_DATE: 'xldate', 

2250 XL_CELL_BOOLEAN: 'bool', 

2251 XL_CELL_ERROR: 'error', 

2252 XL_CELL_BLANK: 'blank', 

2253} 

2254 

2255 

2256class Cell(BaseObject): 

2257 """ 

2258 Contains the data for one cell. 

2259 

2260 .. warning:: 

2261 You don't call this class yourself. You access :class:`Cell` objects 

2262 via methods of the :class:`Sheet` object(s) that you found in the 

2263 :class:`~xlrd.book.Book` object that was returned when you called 

2264 :func:`~xlrd.open_workbook` 

2265 

2266 Cell objects have three attributes: ``ctype`` is an int, ``value`` 

2267 (which depends on ``ctype``) and ``xf_index``. 

2268 If ``formatting_info`` is not enabled when the workbook is opened, 

2269 ``xf_index`` will be ``None``. 

2270 

2271 The following table describes the types of cells and how their values 

2272 are represented in Python. 

2273 

2274 .. raw:: html 

2275 

2276 <table border="1" cellpadding="7"> 

2277 <tr> 

2278 <th>Type symbol</th> 

2279 <th>Type number</th> 

2280 <th>Python value</th> 

2281 </tr> 

2282 <tr> 

2283 <td>XL_CELL_EMPTY</td> 

2284 <td align="center">0</td> 

2285 <td>empty string ''</td> 

2286 </tr> 

2287 <tr> 

2288 <td>XL_CELL_TEXT</td> 

2289 <td align="center">1</td> 

2290 <td>a Unicode string</td> 

2291 </tr> 

2292 <tr> 

2293 <td>XL_CELL_NUMBER</td> 

2294 <td align="center">2</td> 

2295 <td>float</td> 

2296 </tr> 

2297 <tr> 

2298 <td>XL_CELL_DATE</td> 

2299 <td align="center">3</td> 

2300 <td>float</td> 

2301 </tr> 

2302 <tr> 

2303 <td>XL_CELL_BOOLEAN</td> 

2304 <td align="center">4</td> 

2305 <td>int; 1 means TRUE, 0 means FALSE</td> 

2306 </tr> 

2307 <tr> 

2308 <td>XL_CELL_ERROR</td> 

2309 <td align="center">5</td> 

2310 <td>int representing internal Excel codes; for a text representation, 

2311 refer to the supplied dictionary error_text_from_code</td> 

2312 </tr> 

2313 <tr> 

2314 <td>XL_CELL_BLANK</td> 

2315 <td align="center">6</td> 

2316 <td>empty string ''. Note: this type will appear only when 

2317 open_workbook(..., formatting_info=True) is used.</td> 

2318 </tr> 

2319 </table> 

2320 """ 

2321 

2322 __slots__ = ['ctype', 'value', 'xf_index'] 

2323 

2324 def __init__(self, ctype, value, xf_index=None): 

2325 self.ctype = ctype 

2326 self.value = value 

2327 self.xf_index = xf_index 

2328 

2329 def __repr__(self): 

2330 if self.xf_index is None: 

2331 return "%s:%r" % (ctype_text[self.ctype], self.value) 

2332 else: 

2333 return "%s:%r (XF:%r)" % (ctype_text[self.ctype], self.value, self.xf_index) 

2334 

2335empty_cell = Cell(XL_CELL_EMPTY, UNICODE_LITERAL('')) 

2336 

2337##### =============== Colinfo and Rowinfo ============================== ##### 

2338 

2339 

2340class Colinfo(BaseObject): 

2341 """ 

2342 Width and default formatting information that applies to one or 

2343 more columns in a sheet. Derived from ``COLINFO`` records. 

2344 

2345 Here is the default hierarchy for width, according to the OOo docs: 

2346 

2347 In BIFF3, if a ``COLINFO`` record is missing for a column, 

2348 the width specified in the record ``DEFCOLWIDTH`` is used instead. 

2349 

2350 In BIFF4-BIFF7, the width set in this ``COLINFO`` record is only used, 

2351 if the corresponding bit for this column is cleared in the ``GCW`` 

2352 record, otherwise the column width set in the ``DEFCOLWIDTH`` record 

2353 is used (the ``STANDARDWIDTH`` record is always ignored in this case [#f1]_). 

2354 

2355 In BIFF8, if a ``COLINFO`` record is missing for a column, 

2356 the width specified in the record ``STANDARDWIDTH`` is used. 

2357 If this ``STANDARDWIDTH`` record is also missing, 

2358 the column width of the record ``DEFCOLWIDTH`` is used instead. 

2359 

2360 .. [#f1] The docs on the ``GCW`` record say this: 

2361 

2362 If a bit is set, the corresponding column uses the width set in the 

2363 ``STANDARDWIDTH`` record. If a bit is cleared, the corresponding column 

2364 uses the width set in the ``COLINFO`` record for this column. 

2365 

2366 If a bit is set, and the worksheet does not contain the ``STANDARDWIDTH`` 

2367 record, or if the bit is cleared, and the worksheet does not contain the 

2368 ``COLINFO`` record, the ``DEFCOLWIDTH`` record of the worksheet will be 

2369 used instead. 

2370 

2371 xlrd goes with the GCW version of the story. 

2372 Reference to the source may be useful: see 

2373 :meth:`Sheet.computed_column_width`. 

2374 

2375 .. versionadded:: 0.6.1 

2376 """ 

2377 

2378 #: Width of the column in 1/256 of the width of the zero character, 

2379 #: using default font (first ``FONT`` record in the file). 

2380 width = 0 

2381 

2382 #: XF index to be used for formatting empty cells. 

2383 xf_index = -1 

2384 

2385 #: 1 = column is hidden 

2386 hidden = 0 

2387 

2388 #: Value of a 1-bit flag whose purpose is unknown 

2389 #: but is often seen set to 1 

2390 bit1_flag = 0 

2391 

2392 #: Outline level of the column, in ``range(7)``. 

2393 #: (0 = no outline) 

2394 outline_level = 0 

2395 

2396 #: 1 = column is collapsed 

2397 collapsed = 0 

2398 

2399_USE_SLOTS = 1 

2400 

2401 

2402class Rowinfo(BaseObject): 

2403 """ 

2404 Height and default formatting information that applies to a row in a sheet. 

2405 Derived from ``ROW`` records. 

2406 

2407 .. versionadded:: 0.6.1 

2408 """ 

2409 

2410 if _USE_SLOTS: 2410 ↛ 2424line 2410 didn't jump to line 2424, because the condition on line 2410 was never false

2411 __slots__ = ( 

2412 "height", 

2413 "has_default_height", 

2414 "outline_level", 

2415 "outline_group_starts_ends", 

2416 "hidden", 

2417 "height_mismatch", 

2418 "has_default_xf_index", 

2419 "xf_index", 

2420 "additional_space_above", 

2421 "additional_space_below", 

2422 ) 

2423 

2424 def __init__(self): 

2425 #: Height of the row, in twips. One twip == 1/20 of a point. 

2426 self.height = None 

2427 

2428 #: 0 = Row has custom height; 1 = Row has default height. 

2429 self.has_default_height = None 

2430 

2431 #: Outline level of the row (0 to 7) 

2432 self.outline_level = None 

2433 

2434 #: 1 = Outline group starts or ends here (depending on where the 

2435 #: outline buttons are located, see ``WSBOOL`` record, which is not 

2436 #: parsed by xlrd), *and* is collapsed. 

2437 self.outline_group_starts_ends = None 

2438 

2439 #: 1 = Row is hidden (manually, or by a filter or outline group) 

2440 self.hidden = None 

2441 

2442 #: 1 = Row height and default font height do not match. 

2443 self.height_mismatch = None 

2444 

2445 #: 1 = the xf_index attribute is usable; 0 = ignore it. 

2446 self.has_default_xf_index = None 

2447 

2448 #: Index to default :class:`~xlrd.formatting.XF` record for empty cells 

2449 #: in this row. Don't use this if ``has_default_xf_index == 0``. 

2450 self.xf_index = None 

2451 

2452 #: This flag is set if the upper border of at least one cell in this 

2453 #: row or if the lower border of at least one cell in the row above is 

2454 #: formatted with a thick line style. Thin and medium line styles are 

2455 #: not taken into account. 

2456 self.additional_space_above = None 

2457 

2458 #: This flag is set if the lower border of at least one cell in this row 

2459 #: or if the upper border of at least one cell in the row below is 

2460 #: formatted with a medium or thick line style. Thin line styles are not 

2461 #: taken into account. 

2462 self.additional_space_below = None 

2463 

2464 def __getstate__(self): 

2465 return ( 

2466 self.height, 

2467 self.has_default_height, 

2468 self.outline_level, 

2469 self.outline_group_starts_ends, 

2470 self.hidden, 

2471 self.height_mismatch, 

2472 self.has_default_xf_index, 

2473 self.xf_index, 

2474 self.additional_space_above, 

2475 self.additional_space_below, 

2476 ) 

2477 

2478 def __setstate__(self, state): 

2479 ( 

2480 self.height, 

2481 self.has_default_height, 

2482 self.outline_level, 

2483 self.outline_group_starts_ends, 

2484 self.hidden, 

2485 self.height_mismatch, 

2486 self.has_default_xf_index, 

2487 self.xf_index, 

2488 self.additional_space_above, 

2489 self.additional_space_below, 

2490 ) = state