Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/xlrd/sheet.py: 7%
1316 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1# -*- coding: utf-8 -*-
2# Copyright (c) 2005-2013 Stephen John Machin, Lingfo Pty Ltd
3# This module is part of the xlrd package, which is released under a
4# BSD-style licence.
6from __future__ import print_function
8from array import array
9from struct import calcsize, unpack
11from .biffh import *
12from .formatting import Format, nearest_colour_index
13from .formula import (
14 FMLA_TYPE_CELL, FMLA_TYPE_SHARED, decompile_formula, dump_formula,
15 rangename2d,
16)
17from .timemachine import *
19DEBUG = 0
20OBJ_MSO_DEBUG = 0
22_WINDOW2_options = (
23 # Attribute names and initial values to use in case
24 # a WINDOW2 record is not written.
25 ("show_formulas", 0),
26 ("show_grid_lines", 1),
27 ("show_sheet_headers", 1),
28 ("panes_are_frozen", 0),
29 ("show_zero_values", 1),
30 ("automatic_grid_line_colour", 1),
31 ("columns_from_right_to_left", 0),
32 ("show_outline_symbols", 1),
33 ("remove_splits_if_pane_freeze_is_removed", 0),
34 # Multiple sheets can be selected, but only one can be active
35 # (hold down Ctrl and click multiple tabs in the file in OOo)
36 ("sheet_selected", 0),
37 # "sheet_visible" should really be called "sheet_active"
38 # and is 1 when this sheet is the sheet displayed when the file
39 # is open. More than likely only one sheet should ever be set as
40 # visible.
41 # This would correspond to the Book's sheet_active attribute, but
42 # that doesn't exist as WINDOW1 records aren't currently processed.
43 # The real thing is the visibility attribute from the BOUNDSHEET record.
44 ("sheet_visible", 0),
45 ("show_in_page_break_preview", 0),
46)
50class Sheet(BaseObject):
51 """
52 Contains the data for one worksheet.
54 In the cell access functions, ``rowx`` is a row index, counting from
55 zero, and ``colx`` is a column index, counting from zero.
56 Negative values for row/column indexes and slice positions are supported in
57 the expected fashion.
59 For information about cell types and cell values, refer to the documentation
60 of the :class:`Cell` class.
62 .. warning::
64 You don't instantiate this class yourself. You access :class:`Sheet`
65 objects via the :class:`~xlrd.book.Book` object that
66 was returned when you called :func:`xlrd.open_workbook`.
67 """
69 #: Name of sheet.
70 name = ''
72 #: A reference to the :class:`~xlrd.book.Book` object to which this sheet
73 #: belongs.
74 #:
75 #: Example usage: ``some_sheet.book.datemode``
76 book = None
78 #: Number of rows in sheet. A row index is in ``range(thesheet.nrows)``.
79 nrows = 0
81 #: Nominal number of columns in sheet. It is one more than the maximum
82 #: column index found, ignoring trailing empty cells.
83 #: See also the ``ragged_rows`` parameter to :func:`~xlrd.open_workbook`
84 #: and :meth:`~xlrd.sheet.Sheet.row_len`.
85 ncols = 0
88 #: The map from a column index to a :class:`Colinfo` object. Often there is
89 #: an entry in ``COLINFO`` records for all column indexes in ``range(257)``.
90 #:
91 #: .. note::
92 #: xlrd ignores the entry for the non-existent
93 #: 257th column.
94 #:
95 #: On the other hand, there may be no entry for unused columns.
96 #:
97 #: .. versionadded:: 0.6.1
98 #:
99 #: Populated only if ``open_workbook(..., formatting_info=True)``
100 colinfo_map = {}
102 #: The map from a row index to a :class:`Rowinfo` object.
103 #:
104 #: ..note::
105 #: It is possible to have missing entries -- at least one source of
106 #: XLS files doesn't bother writing ``ROW`` records.
107 #:
108 #: .. versionadded:: 0.6.1
109 #:
110 #: Populated only if ``open_workbook(..., formatting_info=True)``
111 rowinfo_map = {}
113 #: List of address ranges of cells containing column labels.
114 #: These are set up in Excel by Insert > Name > Labels > Columns.
115 #:
116 #: .. versionadded:: 0.6.0
117 #:
118 #: How to deconstruct the list:
119 #:
120 #: .. code-block:: python
121 #:
122 #: for crange in thesheet.col_label_ranges:
123 #: rlo, rhi, clo, chi = crange
124 #: for rx in xrange(rlo, rhi):
125 #: for cx in xrange(clo, chi):
126 #: print "Column label at (rowx=%d, colx=%d) is %r" \
127 #: (rx, cx, thesheet.cell_value(rx, cx))
128 col_label_ranges = []
130 #: List of address ranges of cells containing row labels.
131 #: For more details, see :attr:`col_label_ranges`.
132 #:
133 #: .. versionadded:: 0.6.0
134 row_label_ranges = []
136 #: List of address ranges of cells which have been merged.
137 #: These are set up in Excel by Format > Cells > Alignment, then ticking
138 #: the "Merge cells" box.
139 #:
140 #: .. note::
141 #: The upper limits are exclusive: i.e. ``[2, 3, 7, 9]`` only
142 #: spans two cells.
143 #:
144 #: .. note:: Extracted only if ``open_workbook(..., formatting_info=True)``
145 #:
146 #: .. versionadded:: 0.6.1
147 #:
148 #: How to deconstruct the list:
149 #:
150 #: .. code-block:: python
151 #:
152 #: for crange in thesheet.merged_cells:
153 #: rlo, rhi, clo, chi = crange
154 #: for rowx in xrange(rlo, rhi):
155 #: for colx in xrange(clo, chi):
156 #: # cell (rlo, clo) (the top left one) will carry the data
157 #: # and formatting info; the remainder will be recorded as
158 #: # blank cells, but a renderer will apply the formatting info
159 #: # for the top left cell (e.g. border, pattern) to all cells in
160 #: # the range.
161 merged_cells = []
163 #: Mapping of ``(rowx, colx)`` to list of ``(offset, font_index)`` tuples.
164 #: The offset defines where in the string the font begins to be used.
165 #: Offsets are expected to be in ascending order.
166 #: If the first offset is not zero, the meaning is that the cell's ``XF``'s
167 #: font should be used from offset 0.
168 #:
169 #: This is a sparse mapping. There is no entry for cells that are not
170 #: formatted with rich text.
171 #:
172 #: How to use:
173 #:
174 #: .. code-block:: python
175 #:
176 #: runlist = thesheet.rich_text_runlist_map.get((rowx, colx))
177 #: if runlist:
178 #: for offset, font_index in runlist:
179 #: # do work here.
180 #: pass
181 #:
182 #: .. versionadded:: 0.7.2
183 #:
184 #: Populated only if ``open_workbook(..., formatting_info=True)``
185 rich_text_runlist_map = {}
187 #: Default column width from ``DEFCOLWIDTH`` record, else ``None``.
188 #: From the OOo docs:
189 #:
190 #: Column width in characters, using the width of the zero character
191 #: from default font (first FONT record in the file). Excel adds some
192 #: extra space to the default width, depending on the default font and
193 #: default font size. The algorithm how to exactly calculate the resulting
194 #: column width is not known.
195 #: Example: The default width of 8 set in this record results in a column
196 #: width of 8.43 using Arial font with a size of 10 points.
197 #:
198 #: For the default hierarchy, refer to the :class:`Colinfo` class.
199 #:
200 #: .. versionadded:: 0.6.1
201 defcolwidth = None
203 #: Default column width from ``STANDARDWIDTH`` record, else ``None``.
204 #:
205 #: From the OOo docs:
206 #:
207 #: Default width of the columns in 1/256 of the width of the zero
208 #: character, using default font (first FONT record in the file).
209 #:
210 #: For the default hierarchy, refer to the :class:`Colinfo` class.
211 #:
212 #: .. versionadded:: 0.6.1
213 standardwidth = None
215 #: Default value to be used for a row if there is
216 #: no ``ROW`` record for that row.
217 #: From the *optional* ``DEFAULTROWHEIGHT`` record.
218 default_row_height = None
220 #: Default value to be used for a row if there is
221 #: no ``ROW`` record for that row.
222 #: From the *optional* ``DEFAULTROWHEIGHT`` record.
223 default_row_height_mismatch = None
225 #: Default value to be used for a row if there is
226 #: no ``ROW`` record for that row.
227 #: From the *optional* ``DEFAULTROWHEIGHT`` record.
228 default_row_hidden = None
230 #: Default value to be used for a row if there is
231 #: no ``ROW`` record for that row.
232 #: From the *optional* ``DEFAULTROWHEIGHT`` record.
233 default_additional_space_above = None
235 #: Default value to be used for a row if there is
236 #: no ``ROW`` record for that row.
237 #: From the *optional* ``DEFAULTROWHEIGHT`` record.
238 default_additional_space_below = None
240 #: Visibility of the sheet:
241 #: ::
242 #:
243 #: 0 = visible
244 #: 1 = hidden (can be unhidden by user -- Format -> Sheet -> Unhide)
245 #: 2 = "very hidden" (can be unhidden only by VBA macro).
246 visibility = 0
248 #: A 256-element tuple corresponding to the contents of the GCW record for
249 #: this sheet. If no such record, treat as all bits zero.
250 #: Applies to BIFF4-7 only. See docs of the :class:`Colinfo` class for
251 #: discussion.
252 gcw = (0, ) * 256
254 #: A list of :class:`Hyperlink` objects corresponding to ``HLINK`` records
255 #: found in the worksheet.
256 #:
257 #: .. versionadded:: 0.7.2
258 hyperlink_list = []
260 #: A sparse mapping from ``(rowx, colx)`` to an item in
261 #: :attr:`~xlrd.sheet.Sheet.hyperlink_list`.
262 #: Cells not covered by a hyperlink are not mapped.
263 #: It is possible using the Excel UI to set up a hyperlink that
264 #: covers a larger-than-1x1 rectangle of cells.
265 #: Hyperlink rectangles may overlap (Excel doesn't check).
266 #: When a multiply-covered cell is clicked on, the hyperlink that is
267 #: activated
268 #: (and the one that is mapped here) is the last in
269 #: :attr:`~xlrd.sheet.Sheet.hyperlink_list`.
270 #:
271 #: .. versionadded:: 0.7.2
272 hyperlink_map = {}
274 #: A sparse mapping from ``(rowx, colx)`` to a :class:`Note` object.
275 #: Cells not containing a note ("comment") are not mapped.
276 #:
277 #: .. versionadded:: 0.7.2
278 cell_note_map = {}
280 #: Number of columns in left pane (frozen panes; for split panes, see
281 #: comments in code)
282 vert_split_pos = 0
284 #: Number of rows in top pane (frozen panes; for split panes, see comments
285 #: in code)
286 horz_split_pos = 0
288 #: Index of first visible row in bottom frozen/split pane
289 horz_split_first_visible = 0
291 #: Index of first visible column in right frozen/split pane
292 vert_split_first_visible = 0
294 #: Frozen panes: ignore it. Split panes: explanation and diagrams in
295 #: OOo docs.
296 split_active_pane = 0
298 #: Boolean specifying if a ``PANE`` record was present, ignore unless you're
299 #: ``xlutils.copy``
300 has_pane_record = 0
302 #: A list of the horizontal page breaks in this sheet.
303 #: Breaks are tuples in the form
304 #: ``(index of row after break, start col index, end col index)``.
305 #:
306 #: Populated only if ``open_workbook(..., formatting_info=True)``
307 #:
308 #: .. versionadded:: 0.7.2
309 horizontal_page_breaks = []
311 #: A list of the vertical page breaks in this sheet.
312 #: Breaks are tuples in the form
313 #: ``(index of col after break, start row index, end row index)``.
314 #:
315 #: Populated only if ``open_workbook(..., formatting_info=True)``
316 #:
317 #: .. versionadded:: 0.7.2
318 vertical_page_breaks = []
320 def __init__(self, book, position, name, number):
321 self.book = book
322 self.biff_version = book.biff_version
323 self._position = position
324 self.logfile = book.logfile
325 self.bt = array('B', [XL_CELL_EMPTY])
326 self.bf = array('h', [-1])
327 self.name = name
328 self.number = number
329 self.verbosity = book.verbosity
330 self.formatting_info = book.formatting_info
331 self.ragged_rows = book.ragged_rows
332 if self.ragged_rows:
333 self.put_cell = self.put_cell_ragged
334 else:
335 self.put_cell = self.put_cell_unragged
336 self._xf_index_to_xl_type_map = book._xf_index_to_xl_type_map
337 self.nrows = 0 # actual, including possibly empty cells
338 self.ncols = 0
339 self._maxdatarowx = -1 # highest rowx containing a non-empty cell
340 self._maxdatacolx = -1 # highest colx containing a non-empty cell
341 self._dimnrows = 0 # as per DIMENSIONS record
342 self._dimncols = 0
343 self._cell_values = []
344 self._cell_types = []
345 self._cell_xf_indexes = []
346 self.defcolwidth = None
347 self.standardwidth = None
348 self.default_row_height = None
349 self.default_row_height_mismatch = 0
350 self.default_row_hidden = 0
351 self.default_additional_space_above = 0
352 self.default_additional_space_below = 0
353 self.colinfo_map = {}
354 self.rowinfo_map = {}
355 self.col_label_ranges = []
356 self.row_label_ranges = []
357 self.merged_cells = []
358 self.rich_text_runlist_map = {}
359 self.horizontal_page_breaks = []
360 self.vertical_page_breaks = []
361 self._xf_index_stats = [0, 0, 0, 0]
362 self.visibility = book._sheet_visibility[number] # from BOUNDSHEET record
363 for attr, defval in _WINDOW2_options:
364 setattr(self, attr, defval)
365 self.first_visible_rowx = 0
366 self.first_visible_colx = 0
367 self.gridline_colour_index = 0x40
368 self.gridline_colour_rgb = None # pre-BIFF8
369 self.hyperlink_list = []
370 self.hyperlink_map = {}
371 self.cell_note_map = {}
373 # Values calculated by xlrd to predict the mag factors that
374 # will actually be used by Excel to display your worksheet.
375 # Pass these values to xlwt when writing XLS files.
376 # Warning 1: Behaviour of OOo Calc and Gnumeric has been observed to differ from Excel's.
377 # Warning 2: A value of zero means almost exactly what it says. Your sheet will be
378 # displayed as a very tiny speck on the screen. xlwt will reject attempts to set
379 # a mag_factor that is not (10 <= mag_factor <= 400).
380 self.cooked_page_break_preview_mag_factor = 60
381 self.cooked_normal_view_mag_factor = 100
383 # Values (if any) actually stored on the XLS file
384 self.cached_page_break_preview_mag_factor = 0 # default (60%), from WINDOW2 record
385 self.cached_normal_view_mag_factor = 0 # default (100%), from WINDOW2 record
386 self.scl_mag_factor = None # from SCL record
388 self._ixfe = None # BIFF2 only
389 self._cell_attr_to_xfx = {} # BIFF2.0 only
391 if self.biff_version >= 80:
392 self.utter_max_rows = 65536
393 else:
394 self.utter_max_rows = 16384
395 self.utter_max_cols = 256
397 self._first_full_rowx = -1
399 # self._put_cell_exceptions = 0
400 # self._put_cell_row_widenings = 0
401 # self._put_cell_rows_appended = 0
402 # self._put_cell_cells_appended = 0
404 def cell(self, rowx, colx):
405 """
406 :class:`Cell` object in the given row and column.
407 """
408 if self.formatting_info:
409 xfx = self.cell_xf_index(rowx, colx)
410 else:
411 xfx = None
412 return Cell(
413 self._cell_types[rowx][colx],
414 self._cell_values[rowx][colx],
415 xfx,
416 )
418 def cell_value(self, rowx, colx):
419 "Value of the cell in the given row and column."
420 return self._cell_values[rowx][colx]
422 def cell_type(self, rowx, colx):
423 """
424 Type of the cell in the given row and column.
426 Refer to the documentation of the :class:`Cell` class.
427 """
428 return self._cell_types[rowx][colx]
430 def cell_xf_index(self, rowx, colx):
431 """
432 XF index of the cell in the given row and column.
433 This is an index into :attr:`~xlrd.book.Book.xf_list`.
435 .. versionadded:: 0.6.1
436 """
437 self.req_fmt_info()
438 xfx = self._cell_xf_indexes[rowx][colx]
439 if xfx > -1:
440 self._xf_index_stats[0] += 1
441 return xfx
442 # Check for a row xf_index
443 try:
444 xfx = self.rowinfo_map[rowx].xf_index
445 if xfx > -1:
446 self._xf_index_stats[1] += 1
447 return xfx
448 except KeyError:
449 pass
450 # Check for a column xf_index
451 try:
452 xfx = self.colinfo_map[colx].xf_index
453 if xfx == -1: xfx = 15
454 self._xf_index_stats[2] += 1
455 return xfx
456 except KeyError:
457 # If all else fails, 15 is used as hardwired global default xf_index.
458 self._xf_index_stats[3] += 1
459 return 15
461 def row_len(self, rowx):
462 """
463 Returns the effective number of cells in the given row. For use with
464 ``open_workbook(ragged_rows=True)`` which is likely to produce rows
465 with fewer than :attr:`~Sheet.ncols` cells.
467 .. versionadded:: 0.7.2
468 """
469 return len(self._cell_values[rowx])
471 def row(self, rowx):
472 """
473 Returns a sequence of the :class:`Cell` objects in the given row.
474 """
475 return [
476 self.cell(rowx, colx)
477 for colx in xrange(len(self._cell_values[rowx]))
478 ]
480 def __getitem__(self, item):
481 """
482 Takes either rowindex or (rowindex, colindex) as an index,
483 and returns either row or cell respectively.
484 """
485 try:
486 rowix, colix = item
487 except TypeError:
488 # it's not a tuple (or of right size), let's try indexing as is
489 # if this is a problem, let this error propagate back
490 return self.row(item)
491 else:
492 return self.cell(rowix, colix)
494 def get_rows(self):
495 "Returns a generator for iterating through each row."
496 return (self.row(index) for index in range(self.nrows))
498 # makes `for row in sheet` natural and intuitive
499 __iter__ = get_rows
501 def row_types(self, rowx, start_colx=0, end_colx=None):
502 """
503 Returns a slice of the types of the cells in the given row.
504 """
505 if end_colx is None:
506 return self._cell_types[rowx][start_colx:]
507 return self._cell_types[rowx][start_colx:end_colx]
509 def row_values(self, rowx, start_colx=0, end_colx=None):
510 """
511 Returns a slice of the values of the cells in the given row.
512 """
513 if end_colx is None:
514 return self._cell_values[rowx][start_colx:]
515 return self._cell_values[rowx][start_colx:end_colx]
517 def row_slice(self, rowx, start_colx=0, end_colx=None):
518 """
519 Returns a slice of the :class:`Cell` objects in the given row.
520 """
521 nc = len(self._cell_values[rowx])
522 if start_colx < 0:
523 start_colx += nc
524 if start_colx < 0:
525 start_colx = 0
526 if end_colx is None or end_colx > nc:
527 end_colx = nc
528 elif end_colx < 0:
529 end_colx += nc
530 return [
531 self.cell(rowx, colx)
532 for colx in xrange(start_colx, end_colx)
533 ]
535 def col_slice(self, colx, start_rowx=0, end_rowx=None):
536 """
537 Returns a slice of the :class:`Cell` objects in the given column.
538 """
539 nr = self.nrows
540 if start_rowx < 0:
541 start_rowx += nr
542 if start_rowx < 0:
543 start_rowx = 0
544 if end_rowx is None or end_rowx > nr:
545 end_rowx = nr
546 elif end_rowx < 0:
547 end_rowx += nr
548 return [
549 self.cell(rowx, colx)
550 for rowx in xrange(start_rowx, end_rowx)
551 ]
553 def col_values(self, colx, start_rowx=0, end_rowx=None):
554 """
555 Returns a slice of the values of the cells in the given column.
556 """
557 nr = self.nrows
558 if start_rowx < 0:
559 start_rowx += nr
560 if start_rowx < 0:
561 start_rowx = 0
562 if end_rowx is None or end_rowx > nr:
563 end_rowx = nr
564 elif end_rowx < 0:
565 end_rowx += nr
566 return [
567 self._cell_values[rowx][colx]
568 for rowx in xrange(start_rowx, end_rowx)
569 ]
571 def col_types(self, colx, start_rowx=0, end_rowx=None):
572 """
573 Returns a slice of the types of the cells in the given column.
574 """
575 nr = self.nrows
576 if start_rowx < 0:
577 start_rowx += nr
578 if start_rowx < 0:
579 start_rowx = 0
580 if end_rowx is None or end_rowx > nr:
581 end_rowx = nr
582 elif end_rowx < 0:
583 end_rowx += nr
584 return [
585 self._cell_types[rowx][colx]
586 for rowx in xrange(start_rowx, end_rowx)
587 ]
589 col = col_slice
591 # === Following methods are used in building the worksheet.
592 # === They are not part of the API.
594 def tidy_dimensions(self):
595 if self.verbosity >= 3:
596 fprintf(
597 self.logfile,
598 "tidy_dimensions: nrows=%d ncols=%d \n",
599 self.nrows, self.ncols,
600 )
601 if 1 and self.merged_cells:
602 nr = nc = 0
603 umaxrows = self.utter_max_rows
604 umaxcols = self.utter_max_cols
605 for crange in self.merged_cells:
606 rlo, rhi, clo, chi = crange
607 if not (0 <= rlo < rhi <= umaxrows) or not (0 <= clo < chi <= umaxcols):
608 fprintf(self.logfile,
609 "*** WARNING: sheet #%d (%r), MERGEDCELLS bad range %r\n",
610 self.number, self.name, crange)
611 if rhi > nr: nr = rhi
612 if chi > nc: nc = chi
613 if nc > self.ncols:
614 self.ncols = nc
615 self._first_full_rowx = -2
616 if nr > self.nrows:
617 # we put one empty cell at (nr-1,0) to make sure
618 # we have the right number of rows. The ragged rows
619 # will sort out the rest if needed.
620 self.put_cell(nr-1, 0, XL_CELL_EMPTY, UNICODE_LITERAL(''), -1)
621 if (self.verbosity >= 1 and
622 (self.nrows != self._dimnrows or self.ncols != self._dimncols)):
623 fprintf(
624 self.logfile,
625 "NOTE *** sheet %d (%r): DIMENSIONS R,C = %d,%d should be %d,%d\n",
626 self.number,
627 self.name,
628 self._dimnrows,
629 self._dimncols,
630 self.nrows,
631 self.ncols,
632 )
633 if not self.ragged_rows:
634 # fix ragged rows
635 ncols = self.ncols
636 s_cell_types = self._cell_types
637 s_cell_values = self._cell_values
638 s_cell_xf_indexes = self._cell_xf_indexes
639 s_fmt_info = self.formatting_info
640 # for rowx in xrange(self.nrows):
641 if self._first_full_rowx == -2:
642 ubound = self.nrows
643 else:
644 ubound = self._first_full_rowx
645 for rowx in xrange(ubound):
646 trow = s_cell_types[rowx]
647 rlen = len(trow)
648 nextra = ncols - rlen
649 if nextra > 0:
650 s_cell_values[rowx][rlen:] = [UNICODE_LITERAL('')] * nextra
651 trow[rlen:] = self.bt * nextra
652 if s_fmt_info:
653 s_cell_xf_indexes[rowx][rlen:] = self.bf * nextra
655 def put_cell_ragged(self, rowx, colx, ctype, value, xf_index):
656 if ctype is None:
657 # we have a number, so look up the cell type
658 ctype = self._xf_index_to_xl_type_map[xf_index]
659 assert 0 <= colx < self.utter_max_cols
660 assert 0 <= rowx < self.utter_max_rows
661 fmt_info = self.formatting_info
663 try:
664 nr = rowx + 1
665 if self.nrows < nr:
667 scta = self._cell_types.append
668 scva = self._cell_values.append
669 scxa = self._cell_xf_indexes.append
670 bt = self.bt
671 bf = self.bf
672 for _unused in xrange(self.nrows, nr):
673 scta(bt * 0)
674 scva([])
675 if fmt_info:
676 scxa(bf * 0)
677 self.nrows = nr
679 types_row = self._cell_types[rowx]
680 values_row = self._cell_values[rowx]
681 if fmt_info:
682 fmt_row = self._cell_xf_indexes[rowx]
683 ltr = len(types_row)
684 if colx >= self.ncols:
685 self.ncols = colx + 1
686 num_empty = colx - ltr
687 if not num_empty:
688 # most common case: colx == previous colx + 1
689 # self._put_cell_cells_appended += 1
690 types_row.append(ctype)
691 values_row.append(value)
692 if fmt_info:
693 fmt_row.append(xf_index)
694 return
695 if num_empty > 0:
696 num_empty += 1
697 # self._put_cell_row_widenings += 1
698 # types_row.extend(self.bt * num_empty)
699 # values_row.extend([UNICODE_LITERAL('')] * num_empty)
700 # if fmt_info:
701 # fmt_row.extend(self.bf * num_empty)
702 types_row[ltr:] = self.bt * num_empty
703 values_row[ltr:] = [UNICODE_LITERAL('')] * num_empty
704 if fmt_info:
705 fmt_row[ltr:] = self.bf * num_empty
706 types_row[colx] = ctype
707 values_row[colx] = value
708 if fmt_info:
709 fmt_row[colx] = xf_index
710 except:
711 print("put_cell", rowx, colx, file=self.logfile)
712 raise
714 def put_cell_unragged(self, rowx, colx, ctype, value, xf_index):
715 if ctype is None:
716 # we have a number, so look up the cell type
717 ctype = self._xf_index_to_xl_type_map[xf_index]
718 # assert 0 <= colx < self.utter_max_cols
719 # assert 0 <= rowx < self.utter_max_rows
720 try:
721 self._cell_types[rowx][colx] = ctype
722 self._cell_values[rowx][colx] = value
723 if self.formatting_info:
724 self._cell_xf_indexes[rowx][colx] = xf_index
725 except IndexError:
726 # print >> self.logfile, "put_cell extending", rowx, colx
727 # self.extend_cells(rowx+1, colx+1)
728 # self._put_cell_exceptions += 1
729 nr = rowx + 1
730 nc = colx + 1
731 assert 1 <= nc <= self.utter_max_cols
732 assert 1 <= nr <= self.utter_max_rows
733 if nc > self.ncols:
734 self.ncols = nc
735 # The row self._first_full_rowx and all subsequent rows
736 # are guaranteed to have length == self.ncols. Thus the
737 # "fix ragged rows" section of the tidy_dimensions method
738 # doesn't need to examine them.
739 if nr < self.nrows:
740 # cell data is not in non-descending row order *AND*
741 # self.ncols has been bumped up.
742 # This very rare case ruins this optimisation.
743 self._first_full_rowx = -2
744 elif rowx > self._first_full_rowx > -2:
745 self._first_full_rowx = rowx
746 if nr <= self.nrows:
747 # New cell is in an existing row, so extend that row (if necessary).
748 # Note that nr < self.nrows means that the cell data
749 # is not in ascending row order!!
750 trow = self._cell_types[rowx]
751 nextra = self.ncols - len(trow)
752 if nextra > 0:
753 # self._put_cell_row_widenings += 1
754 trow.extend(self.bt * nextra)
755 if self.formatting_info:
756 self._cell_xf_indexes[rowx].extend(self.bf * nextra)
757 self._cell_values[rowx].extend([UNICODE_LITERAL('')] * nextra)
758 else:
759 scta = self._cell_types.append
760 scva = self._cell_values.append
761 scxa = self._cell_xf_indexes.append
762 fmt_info = self.formatting_info
763 nc = self.ncols
764 bt = self.bt
765 bf = self.bf
766 for _unused in xrange(self.nrows, nr):
767 # self._put_cell_rows_appended += 1
768 scta(bt * nc)
769 scva([UNICODE_LITERAL('')] * nc)
770 if fmt_info:
771 scxa(bf * nc)
772 self.nrows = nr
773 # === end of code from extend_cells()
774 try:
775 self._cell_types[rowx][colx] = ctype
776 self._cell_values[rowx][colx] = value
777 if self.formatting_info:
778 self._cell_xf_indexes[rowx][colx] = xf_index
779 except:
780 print("put_cell", rowx, colx, file=self.logfile)
781 raise
782 except:
783 print("put_cell", rowx, colx, file=self.logfile)
784 raise
787 # === Methods after this line neither know nor care about how cells are stored.
789 def read(self, bk):
790 global rc_stats
791 DEBUG = 0
792 blah = DEBUG or self.verbosity >= 2
793 blah_rows = DEBUG or self.verbosity >= 4
794 blah_formulas = 0 and blah
795 r1c1 = 0
796 oldpos = bk._position
797 bk._position = self._position
798 XL_SHRFMLA_ETC_ETC = (
799 XL_SHRFMLA, XL_ARRAY, XL_TABLEOP, XL_TABLEOP2,
800 XL_ARRAY2, XL_TABLEOP_B2,
801 )
802 self_put_cell = self.put_cell
803 local_unpack = unpack
804 bk_get_record_parts = bk.get_record_parts
805 bv = self.biff_version
806 fmt_info = self.formatting_info
807 do_sst_rich_text = fmt_info and bk._rich_text_runlist_map
808 rowinfo_sharing_dict = {}
809 txos = {}
810 eof_found = 0
811 while 1:
812 # if DEBUG: print "SHEET.READ: about to read from position %d" % bk._position
813 rc, data_len, data = bk_get_record_parts()
814 # if rc in rc_stats:
815 # rc_stats[rc] += 1
816 # else:
817 # rc_stats[rc] = 1
818 # if DEBUG: print "SHEET.READ: op 0x%04x, %d bytes %r" % (rc, data_len, data)
819 if rc == XL_NUMBER:
820 # [:14] in following stmt ignores extraneous rubbish at end of record.
821 # Sample file testEON-8.xls supplied by Jan Kraus.
822 rowx, colx, xf_index, d = local_unpack('<HHHd', data[:14])
823 # if xf_index == 0:
824 # fprintf(self.logfile,
825 # "NUMBER: r=%d c=%d xfx=%d %f\n", rowx, colx, xf_index, d)
826 self_put_cell(rowx, colx, None, d, xf_index)
827 elif rc == XL_LABELSST:
828 rowx, colx, xf_index, sstindex = local_unpack('<HHHi', data)
829 # print "LABELSST", rowx, colx, sstindex, bk._sharedstrings[sstindex]
830 self_put_cell(rowx, colx, XL_CELL_TEXT, bk._sharedstrings[sstindex], xf_index)
831 if do_sst_rich_text:
832 runlist = bk._rich_text_runlist_map.get(sstindex)
833 if runlist:
834 self.rich_text_runlist_map[(rowx, colx)] = runlist
835 elif rc == XL_LABEL:
836 rowx, colx, xf_index = local_unpack('<HHH', data[0:6])
837 if bv < BIFF_FIRST_UNICODE:
838 strg = unpack_string(data, 6, bk.encoding or bk.derive_encoding(), lenlen=2)
839 else:
840 strg = unpack_unicode(data, 6, lenlen=2)
841 self_put_cell(rowx, colx, XL_CELL_TEXT, strg, xf_index)
842 elif rc == XL_RSTRING:
843 rowx, colx, xf_index = local_unpack('<HHH', data[0:6])
844 if bv < BIFF_FIRST_UNICODE:
845 strg, pos = unpack_string_update_pos(data, 6, bk.encoding or bk.derive_encoding(), lenlen=2)
846 nrt = BYTES_ORD(data[pos])
847 pos += 1
848 runlist = []
849 for _unused in xrange(nrt):
850 runlist.append(unpack('<BB', data[pos:pos+2]))
851 pos += 2
852 assert pos == len(data)
853 else:
854 strg, pos = unpack_unicode_update_pos(data, 6, lenlen=2)
855 nrt = unpack('<H', data[pos:pos+2])[0]
856 pos += 2
857 runlist = []
858 for _unused in xrange(nrt):
859 runlist.append(unpack('<HH', data[pos:pos+4]))
860 pos += 4
861 assert pos == len(data)
862 self_put_cell(rowx, colx, XL_CELL_TEXT, strg, xf_index)
863 self.rich_text_runlist_map[(rowx, colx)] = runlist
864 elif rc == XL_RK:
865 rowx, colx, xf_index = local_unpack('<HHH', data[:6])
866 d = unpack_RK(data[6:10])
867 self_put_cell(rowx, colx, None, d, xf_index)
868 elif rc == XL_MULRK:
869 mulrk_row, mulrk_first = local_unpack('<HH', data[0:4])
870 mulrk_last, = local_unpack('<H', data[-2:])
871 pos = 4
872 for colx in xrange(mulrk_first, mulrk_last+1):
873 xf_index, = local_unpack('<H', data[pos:pos+2])
874 d = unpack_RK(data[pos+2:pos+6])
875 pos += 6
876 self_put_cell(mulrk_row, colx, None, d, xf_index)
877 elif rc == XL_ROW:
878 # Version 0.6.0a3: ROW records are just not worth using (for memory allocation).
879 # Version 0.6.1: now used for formatting info.
880 if not fmt_info: continue
881 rowx, bits1, bits2 = local_unpack('<H4xH4xi', data[0:16])
882 if not(0 <= rowx < self.utter_max_rows):
883 print("*** NOTE: ROW record has row index %d; "
884 "should have 0 <= rowx < %d -- record ignored!"
885 % (rowx, self.utter_max_rows), file=self.logfile)
886 continue
887 key = (bits1, bits2)
888 r = rowinfo_sharing_dict.get(key)
889 if r is None:
890 rowinfo_sharing_dict[key] = r = Rowinfo()
891 # Using upkbits() is far too slow on a file
892 # with 30 sheets each with 10K rows :-(
893 # upkbits(r, bits1, (
894 # ( 0, 0x7FFF, 'height'),
895 # (15, 0x8000, 'has_default_height'),
896 # ))
897 # upkbits(r, bits2, (
898 # ( 0, 0x00000007, 'outline_level'),
899 # ( 4, 0x00000010, 'outline_group_starts_ends'),
900 # ( 5, 0x00000020, 'hidden'),
901 # ( 6, 0x00000040, 'height_mismatch'),
902 # ( 7, 0x00000080, 'has_default_xf_index'),
903 # (16, 0x0FFF0000, 'xf_index'),
904 # (28, 0x10000000, 'additional_space_above'),
905 # (29, 0x20000000, 'additional_space_below'),
906 # ))
907 # So:
908 r.height = bits1 & 0x7fff
909 r.has_default_height = (bits1 >> 15) & 1
910 r.outline_level = bits2 & 7
911 r.outline_group_starts_ends = (bits2 >> 4) & 1
912 r.hidden = (bits2 >> 5) & 1
913 r.height_mismatch = (bits2 >> 6) & 1
914 r.has_default_xf_index = (bits2 >> 7) & 1
915 r.xf_index = (bits2 >> 16) & 0xfff
916 r.additional_space_above = (bits2 >> 28) & 1
917 r.additional_space_below = (bits2 >> 29) & 1
918 if not r.has_default_xf_index:
919 r.xf_index = -1
920 self.rowinfo_map[rowx] = r
921 if 0 and r.xf_index > -1:
922 fprintf(self.logfile,
923 "**ROW %d %d %d\n",
924 self.number, rowx, r.xf_index)
925 if blah_rows:
926 print('ROW', rowx, bits1, bits2, file=self.logfile)
927 r.dump(self.logfile,
928 header="--- sh #%d, rowx=%d ---" % (self.number, rowx))
929 elif rc in XL_FORMULA_OPCODES: # 06, 0206, 0406
930 # DEBUG = 1
931 # if DEBUG: print "FORMULA: rc: 0x%04x data: %r" % (rc, data)
932 if bv >= 50:
933 rowx, colx, xf_index, result_str, flags = local_unpack('<HHH8sH', data[0:16])
934 elif bv >= 30:
935 rowx, colx, xf_index, result_str, flags = local_unpack('<HHH8sH', data[0:16])
936 else: # BIFF2
937 rowx, colx, cell_attr, result_str, flags = local_unpack('<HH3s8sB', data[0:16])
938 xf_index = self.fixed_BIFF2_xfindex(cell_attr, rowx, colx)
939 if blah_formulas: # testing formula dumper
940 #### XXXX FIXME
941 fprintf(self.logfile, "FORMULA: rowx=%d colx=%d\n", rowx, colx)
942 fmlalen = local_unpack("<H", data[20:22])[0]
943 decompile_formula(bk, data[22:], fmlalen, FMLA_TYPE_CELL,
944 browx=rowx, bcolx=colx, blah=1, r1c1=r1c1)
945 if result_str[6:8] == b"\xFF\xFF":
946 first_byte = BYTES_ORD(result_str[0])
947 if first_byte == 0:
948 # need to read next record (STRING)
949 gotstring = 0
950 # if flags & 8:
951 if 1: # "flags & 8" applies only to SHRFMLA
952 # actually there's an optional SHRFMLA or ARRAY etc record to skip over
953 rc2, data2_len, data2 = bk.get_record_parts()
954 if rc2 == XL_STRING or rc2 == XL_STRING_B2:
955 gotstring = 1
956 elif rc2 == XL_ARRAY:
957 row1x, rownx, col1x, colnx, array_flags, tokslen = \
958 local_unpack("<HHBBBxxxxxH", data2[:14])
959 if blah_formulas:
960 fprintf(self.logfile, "ARRAY: %d %d %d %d %d\n",
961 row1x, rownx, col1x, colnx, array_flags)
962 # dump_formula(bk, data2[14:], tokslen, bv, reldelta=0, blah=1)
963 elif rc2 == XL_SHRFMLA:
964 row1x, rownx, col1x, colnx, nfmlas, tokslen = \
965 local_unpack("<HHBBxBH", data2[:10])
966 if blah_formulas:
967 fprintf(self.logfile, "SHRFMLA (sub): %d %d %d %d %d\n",
968 row1x, rownx, col1x, colnx, nfmlas)
969 decompile_formula(bk, data2[10:], tokslen, FMLA_TYPE_SHARED,
970 blah=1, browx=rowx, bcolx=colx, r1c1=r1c1)
971 elif rc2 not in XL_SHRFMLA_ETC_ETC:
972 raise XLRDError(
973 "Expected SHRFMLA, ARRAY, TABLEOP* or STRING record; found 0x%04x" % rc2)
974 # if DEBUG: print "gotstring:", gotstring
975 # now for the STRING record
976 if not gotstring:
977 rc2, _unused_len, data2 = bk.get_record_parts()
978 if rc2 not in (XL_STRING, XL_STRING_B2):
979 raise XLRDError("Expected STRING record; found 0x%04x" % rc2)
980 # if DEBUG: print "STRING: data=%r BIFF=%d cp=%d" % (data2, self.biff_version, bk.encoding)
981 strg = self.string_record_contents(data2)
982 self.put_cell(rowx, colx, XL_CELL_TEXT, strg, xf_index)
983 # if DEBUG: print "FORMULA strg %r" % strg
984 elif first_byte == 1:
985 # boolean formula result
986 value = BYTES_ORD(result_str[2])
987 self_put_cell(rowx, colx, XL_CELL_BOOLEAN, value, xf_index)
988 elif first_byte == 2:
989 # Error in cell
990 value = BYTES_ORD(result_str[2])
991 self_put_cell(rowx, colx, XL_CELL_ERROR, value, xf_index)
992 elif first_byte == 3:
993 # empty ... i.e. empty (zero-length) string, NOT an empty cell.
994 self_put_cell(rowx, colx, XL_CELL_TEXT, "", xf_index)
995 else:
996 raise XLRDError("unexpected special case (0x%02x) in FORMULA" % first_byte)
997 else:
998 # it is a number
999 d = local_unpack('<d', result_str)[0]
1000 self_put_cell(rowx, colx, None, d, xf_index)
1001 elif rc == XL_BOOLERR:
1002 rowx, colx, xf_index, value, is_err = local_unpack('<HHHBB', data[:8])
1003 # Note OOo Calc 2.0 writes 9-byte BOOLERR records.
1004 # OOo docs say 8. Excel writes 8.
1005 cellty = (XL_CELL_BOOLEAN, XL_CELL_ERROR)[is_err]
1006 # if DEBUG: print "XL_BOOLERR", rowx, colx, xf_index, value, is_err
1007 self_put_cell(rowx, colx, cellty, value, xf_index)
1008 elif rc == XL_COLINFO:
1009 if not fmt_info: continue
1010 c = Colinfo()
1011 first_colx, last_colx, c.width, c.xf_index, flags \
1012 = local_unpack("<HHHHH", data[:10])
1013 #### Colinfo.width is denominated in 256ths of a character,
1014 #### *not* in characters.
1015 if not(0 <= first_colx <= last_colx <= 256):
1016 # Note: 256 instead of 255 is a common mistake.
1017 # We silently ignore the non-existing 257th column in that case.
1018 print("*** NOTE: COLINFO record has first col index %d, last %d; "
1019 "should have 0 <= first <= last <= 255 -- record ignored!"
1020 % (first_colx, last_colx), file=self.logfile)
1021 del c
1022 continue
1023 upkbits(c, flags, (
1024 ( 0, 0x0001, 'hidden'),
1025 ( 1, 0x0002, 'bit1_flag'),
1026 # *ALL* colinfos created by Excel in "default" cases are 0x0002!!
1027 # Maybe it's "locked" by analogy with XFProtection data.
1028 ( 8, 0x0700, 'outline_level'),
1029 (12, 0x1000, 'collapsed'),
1030 ))
1031 for colx in xrange(first_colx, last_colx+1):
1032 if colx > 255: break # Excel does 0 to 256 inclusive
1033 self.colinfo_map[colx] = c
1034 if 0:
1035 fprintf(self.logfile,
1036 "**COL %d %d %d\n",
1037 self.number, colx, c.xf_index)
1038 if blah:
1039 fprintf(
1040 self.logfile,
1041 "COLINFO sheet #%d cols %d-%d: wid=%d xf_index=%d flags=0x%04x\n",
1042 self.number, first_colx, last_colx, c.width, c.xf_index, flags,
1043 )
1044 c.dump(self.logfile, header='===')
1045 elif rc == XL_DEFCOLWIDTH:
1046 self.defcolwidth, = local_unpack("<H", data[:2])
1047 if 0: print('DEFCOLWIDTH', self.defcolwidth, file=self.logfile)
1048 elif rc == XL_STANDARDWIDTH:
1049 if data_len != 2:
1050 print('*** ERROR *** STANDARDWIDTH', data_len, repr(data), file=self.logfile)
1051 self.standardwidth, = local_unpack("<H", data[:2])
1052 if 0: print('STANDARDWIDTH', self.standardwidth, file=self.logfile)
1053 elif rc == XL_GCW:
1054 if not fmt_info: continue # useless w/o COLINFO
1055 assert data_len == 34
1056 assert data[0:2] == b"\x20\x00"
1057 iguff = unpack("<8i", data[2:34])
1058 gcw = []
1059 for bits in iguff:
1060 for j in xrange(32):
1061 gcw.append(bits & 1)
1062 bits >>= 1
1063 self.gcw = tuple(gcw)
1064 if 0:
1065 showgcw = "".join(map(lambda x: "F "[x], gcw)).rstrip().replace(' ', '.')
1066 print("GCW:", showgcw, file=self.logfile)
1067 elif rc == XL_BLANK:
1068 if not fmt_info: continue
1069 rowx, colx, xf_index = local_unpack('<HHH', data[:6])
1070 # if 0: print >> self.logfile, "BLANK", rowx, colx, xf_index
1071 self_put_cell(rowx, colx, XL_CELL_BLANK, '', xf_index)
1072 elif rc == XL_MULBLANK: # 00BE
1073 if not fmt_info: continue
1074 nitems = data_len >> 1
1075 result = local_unpack("<%dH" % nitems, data)
1076 rowx, mul_first = result[:2]
1077 mul_last = result[-1]
1078 # print >> self.logfile, "MULBLANK", rowx, mul_first, mul_last, data_len, nitems, mul_last + 4 - mul_first
1079 assert nitems == mul_last + 4 - mul_first
1080 pos = 2
1081 for colx in xrange(mul_first, mul_last + 1):
1082 self_put_cell(rowx, colx, XL_CELL_BLANK, '', result[pos])
1083 pos += 1
1084 elif rc == XL_DIMENSION or rc == XL_DIMENSION2:
1085 if data_len == 0:
1086 # Four zero bytes after some other record. See github issue 64.
1087 continue
1088 # if data_len == 10:
1089 # Was crashing on BIFF 4.0 file w/o the two trailing unused bytes.
1090 # Reported by Ralph Heimburger.
1091 if bv < 80:
1092 dim_tuple = local_unpack('<HxxH', data[2:8])
1093 else:
1094 dim_tuple = local_unpack('<ixxH', data[4:12])
1095 self.nrows, self.ncols = 0, 0
1096 self._dimnrows, self._dimncols = dim_tuple
1097 if bv in (21, 30, 40) and self.book.xf_list and not self.book._xf_epilogue_done:
1098 self.book.xf_epilogue()
1099 if blah:
1100 fprintf(
1101 self.logfile,
1102 "sheet %d(%r) DIMENSIONS: ncols=%d nrows=%d\n",
1103 self.number, self.name, self._dimncols, self._dimnrows
1104 )
1105 elif rc == XL_HLINK:
1106 self.handle_hlink(data)
1107 elif rc == XL_QUICKTIP:
1108 self.handle_quicktip(data)
1109 elif rc == XL_EOF:
1110 DEBUG = 0
1111 if DEBUG: print("SHEET.READ: EOF", file=self.logfile)
1112 eof_found = 1
1113 break
1114 elif rc == XL_OBJ:
1115 # handle SHEET-level objects; note there's a separate Book.handle_obj
1116 saved_obj = self.handle_obj(data)
1117 if saved_obj: saved_obj_id = saved_obj.id
1118 else: saved_obj_id = None
1119 elif rc == XL_MSO_DRAWING:
1120 self.handle_msodrawingetc(rc, data_len, data)
1121 elif rc == XL_TXO:
1122 txo = self.handle_txo(data)
1123 if txo and saved_obj_id:
1124 txos[saved_obj_id] = txo
1125 saved_obj_id = None
1126 elif rc == XL_NOTE:
1127 self.handle_note(data, txos)
1128 elif rc == XL_FEAT11:
1129 self.handle_feat11(data)
1130 elif rc in bofcodes: ##### EMBEDDED BOF #####
1131 version, boftype = local_unpack('<HH', data[0:4])
1132 if boftype != 0x20: # embedded chart
1133 print("*** Unexpected embedded BOF (0x%04x) at offset %d: version=0x%04x type=0x%04x"
1134 % (rc, bk._position - data_len - 4, version, boftype), file=self.logfile)
1135 while 1:
1136 code, data_len, data = bk.get_record_parts()
1137 if code == XL_EOF:
1138 break
1139 if DEBUG: print("---> found EOF", file=self.logfile)
1140 elif rc == XL_COUNTRY:
1141 bk.handle_country(data)
1142 elif rc == XL_LABELRANGES:
1143 pos = 0
1144 pos = unpack_cell_range_address_list_update_pos(
1145 self.row_label_ranges, data, pos, bv, addr_size=8,
1146 )
1147 pos = unpack_cell_range_address_list_update_pos(
1148 self.col_label_ranges, data, pos, bv, addr_size=8,
1149 )
1150 assert pos == data_len
1151 elif rc == XL_ARRAY:
1152 row1x, rownx, col1x, colnx, array_flags, tokslen = \
1153 local_unpack("<HHBBBxxxxxH", data[:14])
1154 if blah_formulas:
1155 print("ARRAY:", row1x, rownx, col1x, colnx, array_flags, file=self.logfile)
1156 # dump_formula(bk, data[14:], tokslen, bv, reldelta=0, blah=1)
1157 elif rc == XL_SHRFMLA:
1158 row1x, rownx, col1x, colnx, nfmlas, tokslen = \
1159 local_unpack("<HHBBxBH", data[:10])
1160 if blah_formulas:
1161 print("SHRFMLA (main):", row1x, rownx, col1x, colnx, nfmlas, file=self.logfile)
1162 decompile_formula(bk, data[10:], tokslen, FMLA_TYPE_SHARED,
1163 blah=1, browx=rowx, bcolx=colx, r1c1=r1c1)
1164 elif rc == XL_CONDFMT:
1165 if not fmt_info: continue
1166 assert bv >= 80
1167 num_CFs, needs_recalc, browx1, browx2, bcolx1, bcolx2 = \
1168 unpack("<6H", data[0:12])
1169 if self.verbosity >= 1:
1170 fprintf(
1171 self.logfile,
1172 "\n*** WARNING: Ignoring CONDFMT (conditional formatting) record\n"
1173 "*** in Sheet %d (%r).\n"
1174 "*** %d CF record(s); needs_recalc_or_redraw = %d\n"
1175 "*** Bounding box is %s\n",
1176 self.number, self.name, num_CFs, needs_recalc,
1177 rangename2d(browx1, browx2+1, bcolx1, bcolx2+1),
1178 )
1179 olist = [] # updated by the function
1180 pos = unpack_cell_range_address_list_update_pos(
1181 olist, data, 12, bv, addr_size=8)
1182 # print >> self.logfile, repr(result), len(result)
1183 if self.verbosity >= 1:
1184 fprintf(
1185 self.logfile,
1186 "*** %d individual range(s):\n"
1187 "*** %s\n",
1188 len(olist),
1189 ", ".join(rangename2d(*coords) for coords in olist),
1190 )
1191 elif rc == XL_CF:
1192 if not fmt_info: continue
1193 cf_type, cmp_op, sz1, sz2, flags = unpack("<BBHHi", data[0:10])
1194 font_block = (flags >> 26) & 1
1195 bord_block = (flags >> 28) & 1
1196 patt_block = (flags >> 29) & 1
1197 if self.verbosity >= 1:
1198 fprintf(
1199 self.logfile,
1200 "\n*** WARNING: Ignoring CF (conditional formatting) sub-record.\n"
1201 "*** cf_type=%d, cmp_op=%d, sz1=%d, sz2=%d, flags=0x%08x\n"
1202 "*** optional data blocks: font=%d, border=%d, pattern=%d\n",
1203 cf_type, cmp_op, sz1, sz2, flags,
1204 font_block, bord_block, patt_block,
1205 )
1206 # hex_char_dump(data, 0, data_len, fout=self.logfile)
1207 pos = 12
1208 if font_block:
1209 (font_height, font_options, weight, escapement, underline,
1210 font_colour_index, two_bits, font_esc, font_underl) = unpack("<64x i i H H B 3x i 4x i i i 18x", data[pos:pos+118])
1211 font_style = (two_bits > 1) & 1
1212 posture = (font_options > 1) & 1
1213 font_canc = (two_bits > 7) & 1
1214 cancellation = (font_options > 7) & 1
1215 if self.verbosity >= 1:
1216 fprintf(
1217 self.logfile,
1218 "*** Font info: height=%d, weight=%d, escapement=%d,\n"
1219 "*** underline=%d, colour_index=%d, esc=%d, underl=%d,\n"
1220 "*** style=%d, posture=%d, canc=%d, cancellation=%d\n",
1221 font_height, weight, escapement, underline,
1222 font_colour_index, font_esc, font_underl,
1223 font_style, posture, font_canc, cancellation,
1224 )
1225 pos += 118
1226 if bord_block:
1227 pos += 8
1228 if patt_block:
1229 pos += 4
1230 fmla1 = data[pos:pos+sz1]
1231 pos += sz1
1232 if blah and sz1:
1233 fprintf(self.logfile, "*** formula 1:\n")
1234 dump_formula(bk, fmla1, sz1, bv, reldelta=0, blah=1)
1235 fmla2 = data[pos:pos+sz2]
1236 pos += sz2
1237 assert pos == data_len
1238 if blah and sz2:
1239 fprintf(self.logfile, "*** formula 2:\n")
1240 dump_formula(bk, fmla2, sz2, bv, reldelta=0, blah=1)
1241 elif rc == XL_DEFAULTROWHEIGHT:
1242 if data_len == 4:
1243 bits, self.default_row_height = unpack("<HH", data[:4])
1244 elif data_len == 2:
1245 self.default_row_height, = unpack("<H", data)
1246 bits = 0
1247 fprintf(self.logfile,
1248 "*** WARNING: DEFAULTROWHEIGHT record len is 2, "
1249 "should be 4; assuming BIFF2 format\n")
1250 else:
1251 bits = 0
1252 fprintf(self.logfile,
1253 "*** WARNING: DEFAULTROWHEIGHT record len is %d, "
1254 "should be 4; ignoring this record\n",
1255 data_len)
1256 self.default_row_height_mismatch = bits & 1
1257 self.default_row_hidden = (bits >> 1) & 1
1258 self.default_additional_space_above = (bits >> 2) & 1
1259 self.default_additional_space_below = (bits >> 3) & 1
1260 elif rc == XL_MERGEDCELLS:
1261 if not fmt_info: continue
1262 pos = unpack_cell_range_address_list_update_pos(
1263 self.merged_cells, data, 0, bv, addr_size=8)
1264 if blah:
1265 fprintf(self.logfile,
1266 "MERGEDCELLS: %d ranges\n", (pos - 2) // 8)
1267 assert pos == data_len, \
1268 "MERGEDCELLS: pos=%d data_len=%d" % (pos, data_len)
1269 elif rc == XL_WINDOW2:
1270 if bv >= 80 and data_len >= 14:
1271 (
1272 options,
1273 self.first_visible_rowx, self.first_visible_colx,
1274 self.gridline_colour_index,
1275 self.cached_page_break_preview_mag_factor,
1276 self.cached_normal_view_mag_factor
1277 ) = unpack("<HHHHxxHH", data[:14])
1278 else:
1279 assert bv >= 30 # BIFF3-7
1280 (
1281 options,
1282 self.first_visible_rowx, self.first_visible_colx,
1283 ) = unpack("<HHH", data[:6])
1284 self.gridline_colour_rgb = unpack("<BBB", data[6:9])
1285 self.gridline_colour_index = nearest_colour_index(
1286 self.book.colour_map, self.gridline_colour_rgb, debug=0)
1287 # options -- Bit, Mask, Contents:
1288 # 0 0001H 0 = Show formula results 1 = Show formulas
1289 # 1 0002H 0 = Do not show grid lines 1 = Show grid lines
1290 # 2 0004H 0 = Do not show sheet headers 1 = Show sheet headers
1291 # 3 0008H 0 = Panes are not frozen 1 = Panes are frozen (freeze)
1292 # 4 0010H 0 = Show zero values as empty cells 1 = Show zero values
1293 # 5 0020H 0 = Manual grid line colour 1 = Automatic grid line colour
1294 # 6 0040H 0 = Columns from left to right 1 = Columns from right to left
1295 # 7 0080H 0 = Do not show outline symbols 1 = Show outline symbols
1296 # 8 0100H 0 = Keep splits if pane freeze is removed 1 = Remove splits if pane freeze is removed
1297 # 9 0200H 0 = Sheet not selected 1 = Sheet selected (BIFF5-BIFF8)
1298 # 10 0400H 0 = Sheet not visible 1 = Sheet visible (BIFF5-BIFF8)
1299 # 11 0800H 0 = Show in normal view 1 = Show in page break preview (BIFF8)
1300 # The freeze flag specifies, if a following PANE record (6.71) describes unfrozen or frozen panes.
1301 for attr, _unused_defval in _WINDOW2_options:
1302 setattr(self, attr, options & 1)
1303 options >>= 1
1304 elif rc == XL_SCL:
1305 num, den = unpack("<HH", data)
1306 result = 0
1307 if den:
1308 result = (num * 100) // den
1309 if not(10 <= result <= 400):
1310 if DEBUG or self.verbosity >= 0:
1311 print(
1312 "WARNING *** SCL rcd sheet %d: should have 0.1 <= num/den <= 4; got %d/%d"
1313 % (self.number, num, den),
1314 file=self.logfile,
1315 )
1316 result = 100
1317 self.scl_mag_factor = result
1318 elif rc == XL_PANE:
1319 (
1320 self.vert_split_pos,
1321 self.horz_split_pos,
1322 self.horz_split_first_visible,
1323 self.vert_split_first_visible,
1324 self.split_active_pane,
1325 ) = unpack("<HHHHB", data[:9])
1326 self.has_pane_record = 1
1327 elif rc == XL_HORIZONTALPAGEBREAKS:
1328 if not fmt_info: continue
1329 num_breaks, = local_unpack("<H", data[:2])
1330 assert num_breaks * (2 + 4 * (bv >= 80)) + 2 == data_len
1331 pos = 2
1332 if bv < 80:
1333 while pos < data_len:
1334 self.horizontal_page_breaks.append((local_unpack("<H", data[pos:pos+2])[0], 0, 255))
1335 pos += 2
1336 else:
1337 while pos < data_len:
1338 self.horizontal_page_breaks.append(local_unpack("<HHH", data[pos:pos+6]))
1339 pos += 6
1340 elif rc == XL_VERTICALPAGEBREAKS:
1341 if not fmt_info: continue
1342 num_breaks, = local_unpack("<H", data[:2])
1343 assert num_breaks * (2 + 4 * (bv >= 80)) + 2 == data_len
1344 pos = 2
1345 if bv < 80:
1346 while pos < data_len:
1347 self.vertical_page_breaks.append((local_unpack("<H", data[pos:pos+2])[0], 0, 65535))
1348 pos += 2
1349 else:
1350 while pos < data_len:
1351 self.vertical_page_breaks.append(local_unpack("<HHH", data[pos:pos+6]))
1352 pos += 6
1353 #### all of the following are for BIFF <= 4W
1354 elif bv <= 45:
1355 if rc == XL_FORMAT or rc == XL_FORMAT2:
1356 bk.handle_format(data, rc)
1357 elif rc == XL_FONT or rc == XL_FONT_B3B4:
1358 bk.handle_font(data)
1359 elif rc == XL_STYLE:
1360 if not self.book._xf_epilogue_done:
1361 self.book.xf_epilogue()
1362 bk.handle_style(data)
1363 elif rc == XL_PALETTE:
1364 bk.handle_palette(data)
1365 elif rc == XL_BUILTINFMTCOUNT:
1366 bk.handle_builtinfmtcount(data)
1367 elif rc == XL_XF4 or rc == XL_XF3 or rc == XL_XF2: #### N.B. not XL_XF
1368 bk.handle_xf(data)
1369 elif rc == XL_DATEMODE:
1370 bk.handle_datemode(data)
1371 elif rc == XL_CODEPAGE:
1372 bk.handle_codepage(data)
1373 elif rc == XL_FILEPASS:
1374 bk.handle_filepass(data)
1375 elif rc == XL_WRITEACCESS:
1376 bk.handle_writeaccess(data)
1377 elif rc == XL_IXFE:
1378 self._ixfe = local_unpack('<H', data)[0]
1379 elif rc == XL_NUMBER_B2:
1380 rowx, colx, cell_attr, d = local_unpack('<HH3sd', data)
1381 self_put_cell(rowx, colx, None, d, self.fixed_BIFF2_xfindex(cell_attr, rowx, colx))
1382 elif rc == XL_INTEGER:
1383 rowx, colx, cell_attr, d = local_unpack('<HH3sH', data)
1384 self_put_cell(rowx, colx, None, float(d), self.fixed_BIFF2_xfindex(cell_attr, rowx, colx))
1385 elif rc == XL_LABEL_B2:
1386 rowx, colx, cell_attr = local_unpack('<HH3s', data[0:7])
1387 strg = unpack_string(data, 7, bk.encoding or bk.derive_encoding(), lenlen=1)
1388 self_put_cell(rowx, colx, XL_CELL_TEXT, strg, self.fixed_BIFF2_xfindex(cell_attr, rowx, colx))
1389 elif rc == XL_BOOLERR_B2:
1390 rowx, colx, cell_attr, value, is_err = local_unpack('<HH3sBB', data)
1391 cellty = (XL_CELL_BOOLEAN, XL_CELL_ERROR)[is_err]
1392 # if DEBUG: print "XL_BOOLERR_B2", rowx, colx, cell_attr, value, is_err
1393 self_put_cell(rowx, colx, cellty, value, self.fixed_BIFF2_xfindex(cell_attr, rowx, colx))
1394 elif rc == XL_BLANK_B2:
1395 if not fmt_info: continue
1396 rowx, colx, cell_attr = local_unpack('<HH3s', data[:7])
1397 self_put_cell(rowx, colx, XL_CELL_BLANK, '', self.fixed_BIFF2_xfindex(cell_attr, rowx, colx))
1398 elif rc == XL_EFONT:
1399 bk.handle_efont(data)
1400 elif rc == XL_ROW_B2:
1401 if not fmt_info: continue
1402 rowx, bits1, bits2 = local_unpack('<H4xH2xB', data[0:11])
1403 if not(0 <= rowx < self.utter_max_rows):
1404 print("*** NOTE: ROW_B2 record has row index %d; "
1405 "should have 0 <= rowx < %d -- record ignored!"
1406 % (rowx, self.utter_max_rows), file=self.logfile)
1407 continue
1408 if not (bits2 & 1): # has_default_xf_index is false
1409 xf_index = -1
1410 elif data_len == 18:
1411 # Seems the XF index in the cell_attr is dodgy
1412 xfx = local_unpack('<H', data[16:18])[0]
1413 xf_index = self.fixed_BIFF2_xfindex(cell_attr=None, rowx=rowx, colx=-1, true_xfx=xfx)
1414 else:
1415 cell_attr = data[13:16]
1416 xf_index = self.fixed_BIFF2_xfindex(cell_attr, rowx, colx=-1)
1417 key = (bits1, bits2, xf_index)
1418 r = rowinfo_sharing_dict.get(key)
1419 if r is None:
1420 rowinfo_sharing_dict[key] = r = Rowinfo()
1421 r.height = bits1 & 0x7fff
1422 r.has_default_height = (bits1 >> 15) & 1
1423 r.has_default_xf_index = bits2 & 1
1424 r.xf_index = xf_index
1425 # r.outline_level = 0 # set in __init__
1426 # r.outline_group_starts_ends = 0 # set in __init__
1427 # r.hidden = 0 # set in __init__
1428 # r.height_mismatch = 0 # set in __init__
1429 # r.additional_space_above = 0 # set in __init__
1430 # r.additional_space_below = 0 # set in __init__
1431 self.rowinfo_map[rowx] = r
1432 if 0 and r.xf_index > -1:
1433 fprintf(self.logfile,
1434 "**ROW %d %d %d\n",
1435 self.number, rowx, r.xf_index)
1436 if blah_rows:
1437 print('ROW_B2', rowx, bits1, file=self.logfile)
1438 r.dump(self.logfile,
1439 header="--- sh #%d, rowx=%d ---" % (self.number, rowx))
1440 elif rc == XL_COLWIDTH: # BIFF2 only
1441 if not fmt_info: continue
1442 first_colx, last_colx, width\
1443 = local_unpack("<BBH", data[:4])
1444 if not(first_colx <= last_colx):
1445 print("*** NOTE: COLWIDTH record has first col index %d, last %d; "
1446 "should have first <= last -- record ignored!"
1447 % (first_colx, last_colx), file=self.logfile)
1448 continue
1449 for colx in xrange(first_colx, last_colx+1):
1450 if colx in self.colinfo_map:
1451 c = self.colinfo_map[colx]
1452 else:
1453 c = Colinfo()
1454 self.colinfo_map[colx] = c
1455 c.width = width
1456 if blah:
1457 fprintf(
1458 self.logfile,
1459 "COLWIDTH sheet #%d cols %d-%d: wid=%d\n",
1460 self.number, first_colx, last_colx, width,
1461 )
1462 elif rc == XL_COLUMNDEFAULT: # BIFF2 only
1463 if not fmt_info: continue
1464 first_colx, last_colx = local_unpack("<HH", data[:4])
1465 #### Warning OOo docs wrong; first_colx <= colx < last_colx
1466 if blah:
1467 fprintf(
1468 self.logfile,
1469 "COLUMNDEFAULT sheet #%d cols in range(%d, %d)\n",
1470 self.number, first_colx, last_colx,
1471 )
1472 if not(0 <= first_colx < last_colx <= 256):
1473 print("*** NOTE: COLUMNDEFAULT record has first col index %d, last %d; "
1474 "should have 0 <= first < last <= 256"
1475 % (first_colx, last_colx), file=self.logfile)
1476 last_colx = min(last_colx, 256)
1477 for colx in xrange(first_colx, last_colx):
1478 offset = 4 + 3 * (colx - first_colx)
1479 cell_attr = data[offset:offset+3]
1480 xf_index = self.fixed_BIFF2_xfindex(cell_attr, rowx=-1, colx=colx)
1481 if colx in self.colinfo_map:
1482 c = self.colinfo_map[colx]
1483 else:
1484 c = Colinfo()
1485 self.colinfo_map[colx] = c
1486 c.xf_index = xf_index
1487 elif rc == XL_WINDOW2_B2: # BIFF 2 only
1488 attr_names = ("show_formulas", "show_grid_lines", "show_sheet_headers",
1489 "panes_are_frozen", "show_zero_values")
1490 for attr, char in zip(attr_names, data[0:5]):
1491 setattr(self, attr, int(char != b'\0'))
1492 (
1493 self.first_visible_rowx, self.first_visible_colx,
1494 self.automatic_grid_line_colour,
1495 ) = unpack("<HHB", data[5:10])
1496 self.gridline_colour_rgb = unpack("<BBB", data[10:13])
1497 self.gridline_colour_index = nearest_colour_index(
1498 self.book.colour_map, self.gridline_colour_rgb, debug=0)
1499 else:
1500 # if DEBUG: print "SHEET.READ: Unhandled record type %02x %d bytes %r" % (rc, data_len, data)
1501 pass
1502 if not eof_found:
1503 raise XLRDError("Sheet %d (%r) missing EOF record"
1504 % (self.number, self.name))
1505 self.tidy_dimensions()
1506 self.update_cooked_mag_factors()
1507 bk._position = oldpos
1508 return 1
1510 def string_record_contents(self, data):
1511 bv = self.biff_version
1512 bk = self.book
1513 lenlen = (bv >= 30) + 1
1514 nchars_expected = unpack("<" + "BH"[lenlen - 1], data[:lenlen])[0]
1515 offset = lenlen
1516 if bv < 80:
1517 enc = bk.encoding or bk.derive_encoding()
1518 nchars_found = 0
1519 result = UNICODE_LITERAL("")
1520 while 1:
1521 if bv >= 80:
1522 flag = BYTES_ORD(data[offset]) & 1
1523 enc = ("latin_1", "utf_16_le")[flag]
1524 offset += 1
1525 chunk = unicode(data[offset:], enc)
1526 result += chunk
1527 nchars_found += len(chunk)
1528 if nchars_found == nchars_expected:
1529 return result
1530 if nchars_found > nchars_expected:
1531 msg = ("STRING/CONTINUE: expected %d chars, found %d"
1532 % (nchars_expected, nchars_found))
1533 raise XLRDError(msg)
1534 rc, _unused_len, data = bk.get_record_parts()
1535 if rc != XL_CONTINUE:
1536 raise XLRDError(
1537 "Expected CONTINUE record; found record-type 0x%04X" % rc)
1538 offset = 0
1540 def update_cooked_mag_factors(self):
1541 # Cached values are used ONLY for the non-active view mode.
1542 # When the user switches to the non-active view mode,
1543 # if the cached value for that mode is not valid,
1544 # Excel pops up a window which says:
1545 # "The number must be between 10 and 400. Try again by entering a number in this range."
1546 # When the user hits OK, it drops into the non-active view mode
1547 # but uses the magn from the active mode.
1548 # NOTE: definition of "valid" depends on mode ... see below
1549 blah = DEBUG or self.verbosity > 0
1550 if self.show_in_page_break_preview:
1551 if self.scl_mag_factor is None: # no SCL record
1552 self.cooked_page_break_preview_mag_factor = 100 # Yes, 100, not 60, NOT a typo
1553 else:
1554 self.cooked_page_break_preview_mag_factor = self.scl_mag_factor
1555 zoom = self.cached_normal_view_mag_factor
1556 if not (10 <= zoom <=400):
1557 if blah:
1558 print(
1559 "WARNING *** WINDOW2 rcd sheet %d: Bad cached_normal_view_mag_factor: %d"
1560 % (self.number, self.cached_normal_view_mag_factor),
1561 file=self.logfile,
1562 )
1563 zoom = self.cooked_page_break_preview_mag_factor
1564 self.cooked_normal_view_mag_factor = zoom
1565 else:
1566 # normal view mode
1567 if self.scl_mag_factor is None: # no SCL record
1568 self.cooked_normal_view_mag_factor = 100
1569 else:
1570 self.cooked_normal_view_mag_factor = self.scl_mag_factor
1571 zoom = self.cached_page_break_preview_mag_factor
1572 if not zoom:
1573 # VALID, defaults to 60
1574 zoom = 60
1575 elif not (10 <= zoom <= 400):
1576 if blah:
1577 print(
1578 "WARNING *** WINDOW2 rcd sheet %r: Bad cached_page_break_preview_mag_factor: %r"
1579 % (self.number, self.cached_page_break_preview_mag_factor),
1580 file=self.logfile,
1581 )
1582 zoom = self.cooked_normal_view_mag_factor
1583 self.cooked_page_break_preview_mag_factor = zoom
1585 def fixed_BIFF2_xfindex(self, cell_attr, rowx, colx, true_xfx=None):
1586 DEBUG = 0
1587 blah = DEBUG or self.verbosity >= 2
1588 if self.biff_version == 21:
1589 if self.book.xf_list:
1590 if true_xfx is not None:
1591 xfx = true_xfx
1592 else:
1593 xfx = BYTES_ORD(cell_attr[0]) & 0x3F
1594 if xfx == 0x3F:
1595 if self._ixfe is None:
1596 raise XLRDError("BIFF2 cell record has XF index 63 but no preceding IXFE record.")
1597 xfx = self._ixfe
1598 # OOo docs are capable of interpretation that each
1599 # cell record is preceded immediately by its own IXFE record.
1600 # Empirical evidence is that (sensibly) an IXFE record applies to all
1601 # following cell records until another IXFE comes along.
1602 return xfx
1603 # Have either Excel 2.0, or broken 2.1 w/o XF records -- same effect.
1604 self.biff_version = self.book.biff_version = 20
1605 #### check that XF slot in cell_attr is zero
1606 xfx_slot = BYTES_ORD(cell_attr[0]) & 0x3F
1607 assert xfx_slot == 0
1608 xfx = self._cell_attr_to_xfx.get(cell_attr)
1609 if xfx is not None:
1610 return xfx
1611 if blah:
1612 fprintf(self.logfile, "New cell_attr %r at (%r, %r)\n", cell_attr, rowx, colx)
1613 if not self.book.xf_list:
1614 for xfx in xrange(16):
1615 self.insert_new_BIFF20_xf(cell_attr=b"\x40\x00\x00", style=xfx < 15)
1616 xfx = self.insert_new_BIFF20_xf(cell_attr=cell_attr)
1617 return xfx
1619 def insert_new_BIFF20_xf(self, cell_attr, style=0):
1620 DEBUG = 0
1621 blah = DEBUG or self.verbosity >= 2
1622 book = self.book
1623 xfx = len(book.xf_list)
1624 xf = self.fake_XF_from_BIFF20_cell_attr(cell_attr, style)
1625 xf.xf_index = xfx
1626 book.xf_list.append(xf)
1627 if blah:
1628 xf.dump(self.logfile, header="=== Faked XF %d ===" % xfx, footer="======")
1629 if xf.format_key not in book.format_map:
1630 if xf.format_key:
1631 msg = "ERROR *** XF[%d] unknown format key (%d, 0x%04x)\n"
1632 fprintf(self.logfile, msg,
1633 xf.xf_index, xf.format_key, xf.format_key)
1634 fmt = Format(xf.format_key, FUN, UNICODE_LITERAL("General"))
1635 book.format_map[xf.format_key] = fmt
1636 book.format_list.append(fmt)
1637 cellty_from_fmtty = {
1638 FNU: XL_CELL_NUMBER,
1639 FUN: XL_CELL_NUMBER,
1640 FGE: XL_CELL_NUMBER,
1641 FDT: XL_CELL_DATE,
1642 FTX: XL_CELL_NUMBER, # Yes, a number can be formatted as text.
1643 }
1644 fmt = book.format_map[xf.format_key]
1645 cellty = cellty_from_fmtty[fmt.type]
1646 self._xf_index_to_xl_type_map[xf.xf_index] = cellty
1647 self._cell_attr_to_xfx[cell_attr] = xfx
1648 return xfx
1650 def fake_XF_from_BIFF20_cell_attr(self, cell_attr, style=0):
1651 from .formatting import XF, XFAlignment, XFBorder, XFBackground, XFProtection
1652 xf = XF()
1653 xf.alignment = XFAlignment()
1654 xf.alignment.indent_level = 0
1655 xf.alignment.shrink_to_fit = 0
1656 xf.alignment.text_direction = 0
1657 xf.border = XFBorder()
1658 xf.border.diag_up = 0
1659 xf.border.diag_down = 0
1660 xf.border.diag_colour_index = 0
1661 xf.border.diag_line_style = 0 # no line
1662 xf.background = XFBackground()
1663 xf.protection = XFProtection()
1664 (prot_bits, font_and_format, halign_etc) = unpack('<BBB', cell_attr)
1665 xf.format_key = font_and_format & 0x3F
1666 xf.font_index = (font_and_format & 0xC0) >> 6
1667 upkbits(xf.protection, prot_bits, (
1668 (6, 0x40, 'cell_locked'),
1669 (7, 0x80, 'formula_hidden'),
1670 ))
1671 xf.alignment.hor_align = halign_etc & 0x07
1672 for mask, side in ((0x08, 'left'), (0x10, 'right'), (0x20, 'top'), (0x40, 'bottom')):
1673 if halign_etc & mask:
1674 colour_index, line_style = 8, 1 # black, thin
1675 else:
1676 colour_index, line_style = 0, 0 # none, none
1677 setattr(xf.border, side + '_colour_index', colour_index)
1678 setattr(xf.border, side + '_line_style', line_style)
1679 bg = xf.background
1680 if halign_etc & 0x80:
1681 bg.fill_pattern = 17
1682 else:
1683 bg.fill_pattern = 0
1684 bg.background_colour_index = 9 # white
1685 bg.pattern_colour_index = 8 # black
1686 xf.parent_style_index = (0x0FFF, 0)[style]
1687 xf.alignment.vert_align = 2 # bottom
1688 xf.alignment.rotation = 0
1689 attr_stems = [
1690 'format',
1691 'font',
1692 'alignment',
1693 'border',
1694 'background',
1695 'protection',
1696 ]
1697 for attr_stem in attr_stems:
1698 attr = "_" + attr_stem + "_flag"
1699 setattr(xf, attr, 1)
1700 return xf
1702 def req_fmt_info(self):
1703 if not self.formatting_info:
1704 raise XLRDError("Feature requires open_workbook(..., formatting_info=True)")
1706 def computed_column_width(self, colx):
1707 """
1708 Determine column display width.
1710 :param colx:
1711 Index of the queried column, range 0 to 255.
1712 Note that it is possible to find out the width that will be used to
1713 display columns with no cell information e.g. column IV (colx=255).
1715 :return:
1716 The column width that will be used for displaying
1717 the given column by Excel, in units of 1/256th of the width of a
1718 standard character (the digit zero in the first font).
1720 .. versionadded:: 0.6.1
1721 """
1722 self.req_fmt_info()
1723 if self.biff_version >= 80:
1724 colinfo = self.colinfo_map.get(colx, None)
1725 if colinfo is not None:
1726 return colinfo.width
1727 if self.standardwidth is not None:
1728 return self.standardwidth
1729 elif self.biff_version >= 40:
1730 if self.gcw[colx]:
1731 if self.standardwidth is not None:
1732 return self.standardwidth
1733 else:
1734 colinfo = self.colinfo_map.get(colx, None)
1735 if colinfo is not None:
1736 return colinfo.width
1737 elif self.biff_version == 30:
1738 colinfo = self.colinfo_map.get(colx, None)
1739 if colinfo is not None:
1740 return colinfo.width
1741 # All roads lead to Rome and the DEFCOLWIDTH ...
1742 if self.defcolwidth is not None:
1743 return self.defcolwidth * 256
1744 return 8 * 256 # 8 is what Excel puts in a DEFCOLWIDTH record
1746 def handle_hlink(self, data):
1747 # DEBUG = 1
1748 if DEBUG: print("\n=== hyperlink ===", file=self.logfile)
1749 record_size = len(data)
1750 h = Hyperlink()
1751 h.frowx, h.lrowx, h.fcolx, h.lcolx, guid0, dummy, options = unpack('<HHHH16s4si', data[:32])
1752 assert guid0 == b"\xD0\xC9\xEA\x79\xF9\xBA\xCE\x11\x8C\x82\x00\xAA\x00\x4B\xA9\x0B"
1753 assert dummy == b"\x02\x00\x00\x00"
1754 if DEBUG: print("options: %08X" % options, file=self.logfile)
1755 offset = 32
1757 def get_nul_terminated_unicode(buf, ofs):
1758 nb = unpack('<L', buf[ofs:ofs+4])[0] * 2
1759 ofs += 4
1760 uc = unicode(buf[ofs:ofs+nb], 'UTF-16le')[:-1]
1761 ofs += nb
1762 return uc, ofs
1764 if options & 0x14: # has a description
1765 h.desc, offset = get_nul_terminated_unicode(data, offset)
1767 if options & 0x80: # has a target
1768 h.target, offset = get_nul_terminated_unicode(data, offset)
1770 if (options & 1) and not (options & 0x100): # HasMoniker and not MonikerSavedAsString
1771 # an OLEMoniker structure
1772 clsid, = unpack('<16s', data[offset:offset + 16])
1773 if DEBUG: fprintf(self.logfile, "clsid=%r\n", clsid)
1774 offset += 16
1775 if clsid == b"\xE0\xC9\xEA\x79\xF9\xBA\xCE\x11\x8C\x82\x00\xAA\x00\x4B\xA9\x0B":
1776 # E0H C9H EAH 79H F9H BAH CEH 11H 8CH 82H 00H AAH 00H 4BH A9H 0BH
1777 # URL Moniker
1778 h.type = UNICODE_LITERAL('url')
1779 nbytes = unpack('<L', data[offset:offset + 4])[0]
1780 offset += 4
1781 h.url_or_path = unicode(data[offset:offset + nbytes], 'UTF-16le')
1782 if DEBUG: fprintf(self.logfile, "initial url=%r len=%d\n", h.url_or_path, len(h.url_or_path))
1783 endpos = h.url_or_path.find('\x00')
1784 if DEBUG: print("endpos=%d" % endpos, file=self.logfile)
1785 h.url_or_path = h.url_or_path[:endpos]
1786 true_nbytes = 2 * (endpos + 1)
1787 offset += true_nbytes
1788 extra_nbytes = nbytes - true_nbytes
1789 extra_data = data[offset:offset + extra_nbytes]
1790 offset += extra_nbytes
1791 if DEBUG:
1792 fprintf(
1793 self.logfile,
1794 "url=%r\nextra=%r\nnbytes=%d true_nbytes=%d extra_nbytes=%d\n",
1795 h.url_or_path, extra_data, nbytes, true_nbytes, extra_nbytes,
1796 )
1797 assert extra_nbytes in (24, 0)
1798 elif clsid == b"\x03\x03\x00\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46":
1799 # file moniker
1800 h.type = UNICODE_LITERAL('local file')
1801 uplevels, nbytes = unpack("<Hi", data[offset:offset + 6])
1802 offset += 6
1803 shortpath = b"..\\" * uplevels + data[offset:offset + nbytes - 1] #### BYTES, not unicode
1804 if DEBUG: fprintf(self.logfile, "uplevels=%d shortpath=%r\n", uplevels, shortpath)
1805 offset += nbytes
1806 offset += 24 # OOo: "unknown byte sequence"
1807 # above is version 0xDEAD + 20 reserved zero bytes
1808 sz = unpack('<i', data[offset:offset + 4])[0]
1809 if DEBUG: print("sz=%d" % sz, file=self.logfile)
1810 offset += 4
1811 if sz:
1812 xl = unpack('<i', data[offset:offset + 4])[0]
1813 offset += 4
1814 offset += 2 # "unknown byte sequence" MS: 0x0003
1815 extended_path = unicode(data[offset:offset + xl], 'UTF-16le') # not zero-terminated
1816 offset += xl
1817 h.url_or_path = extended_path
1818 else:
1819 h.url_or_path = shortpath
1820 #### MS KLUDGE WARNING ####
1821 # The "shortpath" is bytes encoded in the **UNKNOWN** creator's "ANSI" encoding.
1822 else:
1823 fprintf(self.logfile, "*** unknown clsid %r\n", clsid)
1824 elif options & 0x163 == 0x103: # UNC
1825 h.type = UNICODE_LITERAL('unc')
1826 h.url_or_path, offset = get_nul_terminated_unicode(data, offset)
1827 elif options & 0x16B == 8:
1828 h.type = UNICODE_LITERAL('workbook')
1829 else:
1830 h.type = UNICODE_LITERAL('unknown')
1832 if options & 0x8: # has textmark
1833 h.textmark, offset = get_nul_terminated_unicode(data, offset)
1835 if DEBUG:
1836 h.dump(header="... object dump ...")
1837 print("offset=%d record_size=%d" % (offset, record_size))
1839 extra_nbytes = record_size - offset
1840 if extra_nbytes > 0:
1841 fprintf(
1842 self.logfile,
1843 "*** WARNING: hyperlink at R%dC%d has %d extra data bytes: %s\n",
1844 h.frowx + 1,
1845 h.fcolx + 1,
1846 extra_nbytes,
1847 REPR(data[-extra_nbytes:]),
1848 )
1849 # Seen: b"\x00\x00" also b"A\x00", b"V\x00"
1850 elif extra_nbytes < 0:
1851 raise XLRDError("Bug or corrupt file, send copy of input file for debugging")
1853 self.hyperlink_list.append(h)
1854 for rowx in xrange(h.frowx, h.lrowx+1):
1855 for colx in xrange(h.fcolx, h.lcolx+1):
1856 self.hyperlink_map[rowx, colx] = h
1858 def handle_quicktip(self, data):
1859 rcx, frowx, lrowx, fcolx, lcolx = unpack('<5H', data[:10])
1860 assert rcx == XL_QUICKTIP
1861 assert self.hyperlink_list
1862 h = self.hyperlink_list[-1]
1863 assert (frowx, lrowx, fcolx, lcolx) == (h.frowx, h.lrowx, h.fcolx, h.lcolx)
1864 assert data[-2:] == b'\x00\x00'
1865 h.quicktip = unicode(data[10:-2], 'utf_16_le')
1867 def handle_msodrawingetc(self, recid, data_len, data):
1868 if not OBJ_MSO_DEBUG:
1869 return
1870 DEBUG = 1
1871 if self.biff_version < 80:
1872 return
1873 o = MSODrawing()
1874 pos = 0
1875 while pos < data_len:
1876 tmp, fbt, cb = unpack('<HHI', data[pos:pos+8])
1877 ver = tmp & 0xF
1878 inst = (tmp >> 4) & 0xFFF
1879 if ver == 0xF:
1880 ndb = 0 # container
1881 else:
1882 ndb = cb
1883 if DEBUG:
1884 hex_char_dump(data, pos, ndb + 8, base=0, fout=self.logfile)
1885 fprintf(self.logfile,
1886 "fbt:0x%04X inst:%d ver:0x%X cb:%d (0x%04X)\n",
1887 fbt, inst, ver, cb, cb)
1888 if fbt == 0xF010: # Client Anchor
1889 assert ndb == 18
1890 (o.anchor_unk,
1891 o.anchor_colx_lo, o.anchor_rowx_lo,
1892 o.anchor_colx_hi, o.anchor_rowx_hi) = unpack('<Hiiii', data[pos+8:pos+8+ndb])
1893 elif fbt == 0xF011: # Client Data
1894 # must be followed by an OBJ record
1895 assert cb == 0
1896 assert pos + 8 == data_len
1897 else:
1898 pass
1899 pos += ndb + 8
1900 else:
1901 # didn't break out of while loop
1902 assert pos == data_len
1903 if DEBUG:
1904 o.dump(self.logfile, header="=== MSODrawing ===", footer= " ")
1907 def handle_obj(self, data):
1908 if self.biff_version < 80:
1909 return None
1910 o = MSObj()
1911 data_len = len(data)
1912 pos = 0
1913 if OBJ_MSO_DEBUG:
1914 fprintf(self.logfile, "... OBJ record len=%d...\n", data_len)
1915 while pos < data_len:
1916 ft, cb = unpack('<HH', data[pos:pos+4])
1917 if OBJ_MSO_DEBUG:
1918 fprintf(self.logfile, "pos=%d ft=0x%04X cb=%d\n", pos, ft, cb)
1919 hex_char_dump(data, pos, cb + 4, base=0, fout=self.logfile)
1920 if pos == 0 and not (ft == 0x15 and cb == 18):
1921 if self.verbosity:
1922 fprintf(self.logfile, "*** WARNING Ignoring antique or corrupt OBJECT record\n")
1923 return None
1924 if ft == 0x15: # ftCmo ... s/b first
1925 assert pos == 0
1926 o.type, o.id, option_flags = unpack('<HHH', data[pos+4:pos+10])
1927 upkbits(o, option_flags, (
1928 ( 0, 0x0001, 'locked'),
1929 ( 4, 0x0010, 'printable'),
1930 ( 8, 0x0100, 'autofilter'), # not documented in Excel 97 dev kit
1931 ( 9, 0x0200, 'scrollbar_flag'), # not documented in Excel 97 dev kit
1932 (13, 0x2000, 'autofill'),
1933 (14, 0x4000, 'autoline'),
1934 ))
1935 elif ft == 0x00:
1936 if data[pos:data_len] == b'\0' * (data_len - pos):
1937 # ignore "optional reserved" data at end of record
1938 break
1939 msg = "Unexpected data at end of OBJECT record"
1940 fprintf(self.logfile, "*** ERROR %s\n" % msg)
1941 hex_char_dump(data, pos, data_len - pos, base=0, fout=self.logfile)
1942 raise XLRDError(msg)
1943 elif ft == 0x0C: # Scrollbar
1944 values = unpack('<5H', data[pos+8:pos+18])
1945 for value, tag in zip(values, ('value', 'min', 'max', 'inc', 'page')):
1946 setattr(o, 'scrollbar_' + tag, value)
1947 elif ft == 0x0D: # "Notes structure" [used for cell comments]
1948 # not documented in Excel 97 dev kit
1949 if OBJ_MSO_DEBUG: fprintf(self.logfile, "*** OBJ record has ft==0x0D 'notes' structure\n")
1950 elif ft == 0x13: # list box data
1951 if o.autofilter: # non standard exit. NOT documented
1952 break
1953 else:
1954 pass
1955 pos += cb + 4
1956 else:
1957 # didn't break out of while loop
1958 pass
1959 if OBJ_MSO_DEBUG:
1960 o.dump(self.logfile, header="=== MSOBj ===", footer= " ")
1961 return o
1963 def handle_note(self, data, txos):
1964 if OBJ_MSO_DEBUG:
1965 fprintf(self.logfile, '... NOTE record ...\n')
1966 hex_char_dump(data, 0, len(data), base=0, fout=self.logfile)
1967 o = Note()
1968 data_len = len(data)
1969 if self.biff_version < 80:
1970 o.rowx, o.colx, expected_bytes = unpack('<HHH', data[:6])
1971 nb = len(data) - 6
1972 assert nb <= expected_bytes
1973 pieces = [data[6:]]
1974 expected_bytes -= nb
1975 while expected_bytes > 0:
1976 rc2, data2_len, data2 = self.book.get_record_parts()
1977 assert rc2 == XL_NOTE
1978 dummy_rowx, nb = unpack('<H2xH', data2[:6])
1979 assert dummy_rowx == 0xFFFF
1980 assert nb == data2_len - 6
1981 pieces.append(data2[6:])
1982 expected_bytes -= nb
1983 assert expected_bytes == 0
1984 enc = self.book.encoding or self.book.derive_encoding()
1985 o.text = unicode(b''.join(pieces), enc)
1986 o.rich_text_runlist = [(0, 0)]
1987 o.show = 0
1988 o.row_hidden = 0
1989 o.col_hidden = 0
1990 o.author = UNICODE_LITERAL('')
1991 o._object_id = None
1992 self.cell_note_map[o.rowx, o.colx] = o
1993 return
1994 # Excel 8.0+
1995 o.rowx, o.colx, option_flags, o._object_id = unpack('<4H', data[:8])
1996 o.show = (option_flags >> 1) & 1
1997 o.row_hidden = (option_flags >> 7) & 1
1998 o.col_hidden = (option_flags >> 8) & 1
1999 # XL97 dev kit book says NULL [sic] bytes padding between string count and string data
2000 # to ensure that string is word-aligned. Appears to be nonsense.
2001 o.author, endpos = unpack_unicode_update_pos(data, 8, lenlen=2)
2002 # There is a random/undefined byte after the author string (not counted in the
2003 # string length).
2004 # Issue 4 on github: Google Spreadsheet doesn't write the undefined byte.
2005 assert (data_len - endpos) in (0, 1)
2006 if OBJ_MSO_DEBUG:
2007 o.dump(self.logfile, header="=== Note ===", footer= " ")
2008 txo = txos.get(o._object_id)
2009 if txo:
2010 o.text = txo.text
2011 o.rich_text_runlist = txo.rich_text_runlist
2012 self.cell_note_map[o.rowx, o.colx] = o
2014 def handle_txo(self, data):
2015 if self.biff_version < 80:
2016 return
2017 o = MSTxo()
2018 fmt = '<HH6sHHH'
2019 fmtsize = calcsize(fmt)
2020 option_flags, o.rot, controlInfo, cchText, cbRuns, o.ifntEmpty = unpack(fmt, data[:fmtsize])
2021 o.fmla = data[fmtsize:]
2022 upkbits(o, option_flags, (
2023 ( 3, 0x000E, 'horz_align'),
2024 ( 6, 0x0070, 'vert_align'),
2025 ( 9, 0x0200, 'lock_text'),
2026 (14, 0x4000, 'just_last'),
2027 (15, 0x8000, 'secret_edit'),
2028 ))
2029 totchars = 0
2030 o.text = UNICODE_LITERAL('')
2031 while totchars < cchText:
2032 rc2, data2_len, data2 = self.book.get_record_parts()
2033 assert rc2 == XL_CONTINUE
2034 if OBJ_MSO_DEBUG:
2035 hex_char_dump(data2, 0, data2_len, base=0, fout=self.logfile)
2036 nb = BYTES_ORD(data2[0]) # 0 means latin1, 1 means utf_16_le
2037 nchars = data2_len - 1
2038 if nb:
2039 assert nchars % 2 == 0
2040 nchars //= 2
2041 utext, endpos = unpack_unicode_update_pos(data2, 0, known_len=nchars)
2042 assert endpos == data2_len
2043 o.text += utext
2044 totchars += nchars
2045 o.rich_text_runlist = []
2046 totruns = 0
2047 while totruns < cbRuns: # counts of BYTES, not runs
2048 rc3, data3_len, data3 = self.book.get_record_parts()
2049 # print totruns, cbRuns, rc3, data3_len, repr(data3)
2050 assert rc3 == XL_CONTINUE
2051 assert data3_len % 8 == 0
2052 for pos in xrange(0, data3_len, 8):
2053 run = unpack('<HH4x', data3[pos:pos+8])
2054 o.rich_text_runlist.append(run)
2055 totruns += 8
2056 # remove trailing entries that point to the end of the string
2057 while o.rich_text_runlist and o.rich_text_runlist[-1][0] == cchText:
2058 del o.rich_text_runlist[-1]
2059 if OBJ_MSO_DEBUG:
2060 o.dump(self.logfile, header="=== MSTxo ===", footer= " ")
2061 print(o.rich_text_runlist, file=self.logfile)
2062 return o
2064 def handle_feat11(self, data):
2065 if not OBJ_MSO_DEBUG:
2066 return
2067 # rt: Record type; this matches the BIFF rt in the first two bytes of the record; =0872h
2068 # grbitFrt: FRT cell reference flag (see table below for details)
2069 # Ref0: Range reference to a worksheet cell region if grbitFrt=1 (bitFrtRef). Otherwise blank.
2070 # isf: Shared feature type index =5 for Table
2071 # fHdr: =0 since this is for feat not feat header
2072 # reserved0: Reserved for future use =0 for Table
2073 # cref: Count of ref ranges this feature is on
2074 # cbFeatData: Count of byte for the current feature data.
2075 # reserved1: =0 currently not used
2076 # Ref1: Repeat of Ref0. UNDOCUMENTED
2077 rt, grbitFrt, Ref0, isf, fHdr, reserved0, cref, cbFeatData, reserved1, Ref1 = unpack('<HH8sHBiHiH8s', data[0:35])
2078 assert reserved0 == 0
2079 assert reserved1 == 0
2080 assert isf == 5
2081 assert rt == 0x872
2082 assert fHdr == 0
2083 assert Ref1 == Ref0
2084 print(self.logfile, "FEAT11: grbitFrt=%d Ref0=%r cref=%d cbFeatData=%d\n", grbitFrt, Ref0, cref, cbFeatData)
2085 # lt: Table data source type:
2086 # =0 for Excel Worksheet Table =1 for read-write SharePoint linked List
2087 # =2 for XML mapper Table =3 for Query Table
2088 # idList: The ID of the Table (unique per worksheet)
2089 # crwHeader: How many header/title rows the Table has at the top
2090 # crwTotals: How many total rows the Table has at the bottom
2091 # idFieldNext: Next id to try when assigning a unique id to a new field
2092 # cbFSData: The size of the Fixed Data portion of the Table data structure.
2093 # rupBuild: the rupBuild that generated the record
2094 # unusedShort: UNUSED short that can be used later. The value is reserved during round-tripping.
2095 # listFlags: Collection of bit flags: (see listFlags' bit setting table below for detail.)
2096 # lPosStmCache: Table data stream position of cached data
2097 # cbStmCache: Count of bytes of cached data
2098 # cchStmCache: Count of characters of uncompressed cached data in the stream
2099 # lem: Table edit mode (see List (Table) Editing Mode (lem) setting table below for details.)
2100 # rgbHashParam: Hash value for SharePoint Table
2101 # cchName: Count of characters in the Table name string rgbName
2102 (lt, idList, crwHeader, crwTotals, idFieldNext, cbFSData,
2103 rupBuild, unusedShort, listFlags, lPosStmCache, cbStmCache,
2104 cchStmCache, lem, rgbHashParam, cchName) = unpack('<iiiiiiHHiiiii16sH', data[35:35+66])
2105 print("lt=%d idList=%d crwHeader=%d crwTotals=%d idFieldNext=%d cbFSData=%d\n"
2106 "rupBuild=%d unusedShort=%d listFlags=%04X lPosStmCache=%d cbStmCache=%d\n"
2107 "cchStmCache=%d lem=%d rgbHashParam=%r cchName=%d" % (
2108 lt, idList, crwHeader, crwTotals, idFieldNext, cbFSData,
2109 rupBuild, unusedShort,listFlags, lPosStmCache, cbStmCache,
2110 cchStmCache, lem, rgbHashParam, cchName), file=self.logfile)
2112 def __repr__(self):
2113 return "Sheet {:>2}:<{}>".format(self.number, self.name)
2116class MSODrawing(BaseObject):
2117 pass
2120class MSObj(BaseObject):
2121 pass
2124class MSTxo(BaseObject):
2125 pass
2128class Note(BaseObject):
2129 """
2130 Represents a user "comment" or "note".
2131 Note objects are accessible through :attr:`Sheet.cell_note_map`.
2133 .. versionadded:: 0.7.2
2134 """
2136 #: Author of note
2137 author = UNICODE_LITERAL('')
2139 #: ``True`` if the containing column is hidden
2140 col_hidden = 0
2142 #: Column index
2143 colx = 0
2145 #: List of ``(offset_in_string, font_index)`` tuples.
2146 #: Unlike :attr:`Sheet.rich_text_runlist_map`, the first offset should
2147 #: always be 0.
2148 rich_text_runlist = None
2150 #: True if the containing row is hidden
2151 row_hidden = 0
2153 #: Row index
2154 rowx = 0
2156 #: True if note is always shown
2157 show = 0
2159 #: Text of the note
2160 text = UNICODE_LITERAL('')
2163class Hyperlink(BaseObject):
2164 """
2165 Contains the attributes of a hyperlink.
2166 Hyperlink objects are accessible through :attr:`Sheet.hyperlink_list`
2167 and :attr:`Sheet.hyperlink_map`.
2169 .. versionadded:: 0.7.2
2170 """
2172 #: Index of first row
2173 frowx = None
2175 #: Index of last row
2176 lrowx = None
2178 #: Index of first column
2179 fcolx = None
2181 #: Index of last column
2182 lcolx = None
2184 #: Type of hyperlink. Unicode string, one of 'url', 'unc',
2185 #: 'local file', 'workbook', 'unknown'
2186 type = None
2188 #: The URL or file-path, depending in the type. Unicode string, except
2189 #: in the rare case of a local but non-existent file with non-ASCII
2190 #: characters in the name, in which case only the "8.3" filename is
2191 #: available, as a :class:`bytes` (3.x) or :class:`str` (2.x) string,
2192 #: *with unknown encoding.*
2193 url_or_path = None
2195 #: Description.
2196 #: This is displayed in the cell,
2197 #: and should be identical to the cell value. Unicode string, or ``None``.
2198 #: It seems impossible NOT to have a description created by the Excel UI.
2199 desc = None
2201 #: Target frame. Unicode string.
2202 #:
2203 #: .. note::
2204 #: No cases of this have been seen in the wild.
2205 #: It seems impossible to create one in the Excel UI.
2206 target = None
2208 #: The piece after the "#" in
2209 #: "http://docs.python.org/library#struct_module", or the ``Sheet1!A1:Z99``
2210 #: part when type is "workbook".
2211 textmark = None
2213 #: The text of the "quick tip" displayed when the cursor
2214 #: hovers over the hyperlink.
2215 quicktip = None
2217# === helpers ===
2219def unpack_RK(rk_str):
2220 flags = BYTES_ORD(rk_str[0])
2221 if flags & 2:
2222 # There's a SIGNED 30-bit integer in there!
2223 i, = unpack('<i', rk_str)
2224 i >>= 2 # div by 4 to drop the 2 flag bits
2225 if flags & 1:
2226 return i / 100.0
2227 return float(i)
2228 else:
2229 # It's the most significant 30 bits of an IEEE 754 64-bit FP number
2230 d, = unpack('<d', b'\0\0\0\0' + BYTES_LITERAL(chr(flags & 252)) + rk_str[1:4])
2231 if flags & 1:
2232 return d / 100.0
2233 return d
2235##### =============== Cell ======================================== #####
2237cellty_from_fmtty = {
2238 FNU: XL_CELL_NUMBER,
2239 FUN: XL_CELL_NUMBER,
2240 FGE: XL_CELL_NUMBER,
2241 FDT: XL_CELL_DATE,
2242 FTX: XL_CELL_NUMBER, # Yes, a number can be formatted as text.
2243}
2245ctype_text = {
2246 XL_CELL_EMPTY: 'empty',
2247 XL_CELL_TEXT: 'text',
2248 XL_CELL_NUMBER: 'number',
2249 XL_CELL_DATE: 'xldate',
2250 XL_CELL_BOOLEAN: 'bool',
2251 XL_CELL_ERROR: 'error',
2252 XL_CELL_BLANK: 'blank',
2253}
2256class Cell(BaseObject):
2257 """
2258 Contains the data for one cell.
2260 .. warning::
2261 You don't call this class yourself. You access :class:`Cell` objects
2262 via methods of the :class:`Sheet` object(s) that you found in the
2263 :class:`~xlrd.book.Book` object that was returned when you called
2264 :func:`~xlrd.open_workbook`
2266 Cell objects have three attributes: ``ctype`` is an int, ``value``
2267 (which depends on ``ctype``) and ``xf_index``.
2268 If ``formatting_info`` is not enabled when the workbook is opened,
2269 ``xf_index`` will be ``None``.
2271 The following table describes the types of cells and how their values
2272 are represented in Python.
2274 .. raw:: html
2276 <table border="1" cellpadding="7">
2277 <tr>
2278 <th>Type symbol</th>
2279 <th>Type number</th>
2280 <th>Python value</th>
2281 </tr>
2282 <tr>
2283 <td>XL_CELL_EMPTY</td>
2284 <td align="center">0</td>
2285 <td>empty string ''</td>
2286 </tr>
2287 <tr>
2288 <td>XL_CELL_TEXT</td>
2289 <td align="center">1</td>
2290 <td>a Unicode string</td>
2291 </tr>
2292 <tr>
2293 <td>XL_CELL_NUMBER</td>
2294 <td align="center">2</td>
2295 <td>float</td>
2296 </tr>
2297 <tr>
2298 <td>XL_CELL_DATE</td>
2299 <td align="center">3</td>
2300 <td>float</td>
2301 </tr>
2302 <tr>
2303 <td>XL_CELL_BOOLEAN</td>
2304 <td align="center">4</td>
2305 <td>int; 1 means TRUE, 0 means FALSE</td>
2306 </tr>
2307 <tr>
2308 <td>XL_CELL_ERROR</td>
2309 <td align="center">5</td>
2310 <td>int representing internal Excel codes; for a text representation,
2311 refer to the supplied dictionary error_text_from_code</td>
2312 </tr>
2313 <tr>
2314 <td>XL_CELL_BLANK</td>
2315 <td align="center">6</td>
2316 <td>empty string ''. Note: this type will appear only when
2317 open_workbook(..., formatting_info=True) is used.</td>
2318 </tr>
2319 </table>
2320 """
2322 __slots__ = ['ctype', 'value', 'xf_index']
2324 def __init__(self, ctype, value, xf_index=None):
2325 self.ctype = ctype
2326 self.value = value
2327 self.xf_index = xf_index
2329 def __repr__(self):
2330 if self.xf_index is None:
2331 return "%s:%r" % (ctype_text[self.ctype], self.value)
2332 else:
2333 return "%s:%r (XF:%r)" % (ctype_text[self.ctype], self.value, self.xf_index)
2335empty_cell = Cell(XL_CELL_EMPTY, UNICODE_LITERAL(''))
2337##### =============== Colinfo and Rowinfo ============================== #####
2340class Colinfo(BaseObject):
2341 """
2342 Width and default formatting information that applies to one or
2343 more columns in a sheet. Derived from ``COLINFO`` records.
2345 Here is the default hierarchy for width, according to the OOo docs:
2347 In BIFF3, if a ``COLINFO`` record is missing for a column,
2348 the width specified in the record ``DEFCOLWIDTH`` is used instead.
2350 In BIFF4-BIFF7, the width set in this ``COLINFO`` record is only used,
2351 if the corresponding bit for this column is cleared in the ``GCW``
2352 record, otherwise the column width set in the ``DEFCOLWIDTH`` record
2353 is used (the ``STANDARDWIDTH`` record is always ignored in this case [#f1]_).
2355 In BIFF8, if a ``COLINFO`` record is missing for a column,
2356 the width specified in the record ``STANDARDWIDTH`` is used.
2357 If this ``STANDARDWIDTH`` record is also missing,
2358 the column width of the record ``DEFCOLWIDTH`` is used instead.
2360 .. [#f1] The docs on the ``GCW`` record say this:
2362 If a bit is set, the corresponding column uses the width set in the
2363 ``STANDARDWIDTH`` record. If a bit is cleared, the corresponding column
2364 uses the width set in the ``COLINFO`` record for this column.
2366 If a bit is set, and the worksheet does not contain the ``STANDARDWIDTH``
2367 record, or if the bit is cleared, and the worksheet does not contain the
2368 ``COLINFO`` record, the ``DEFCOLWIDTH`` record of the worksheet will be
2369 used instead.
2371 xlrd goes with the GCW version of the story.
2372 Reference to the source may be useful: see
2373 :meth:`Sheet.computed_column_width`.
2375 .. versionadded:: 0.6.1
2376 """
2378 #: Width of the column in 1/256 of the width of the zero character,
2379 #: using default font (first ``FONT`` record in the file).
2380 width = 0
2382 #: XF index to be used for formatting empty cells.
2383 xf_index = -1
2385 #: 1 = column is hidden
2386 hidden = 0
2388 #: Value of a 1-bit flag whose purpose is unknown
2389 #: but is often seen set to 1
2390 bit1_flag = 0
2392 #: Outline level of the column, in ``range(7)``.
2393 #: (0 = no outline)
2394 outline_level = 0
2396 #: 1 = column is collapsed
2397 collapsed = 0
2399_USE_SLOTS = 1
2402class Rowinfo(BaseObject):
2403 """
2404 Height and default formatting information that applies to a row in a sheet.
2405 Derived from ``ROW`` records.
2407 .. versionadded:: 0.6.1
2408 """
2410 if _USE_SLOTS: 2410 ↛ 2424line 2410 didn't jump to line 2424, because the condition on line 2410 was never false
2411 __slots__ = (
2412 "height",
2413 "has_default_height",
2414 "outline_level",
2415 "outline_group_starts_ends",
2416 "hidden",
2417 "height_mismatch",
2418 "has_default_xf_index",
2419 "xf_index",
2420 "additional_space_above",
2421 "additional_space_below",
2422 )
2424 def __init__(self):
2425 #: Height of the row, in twips. One twip == 1/20 of a point.
2426 self.height = None
2428 #: 0 = Row has custom height; 1 = Row has default height.
2429 self.has_default_height = None
2431 #: Outline level of the row (0 to 7)
2432 self.outline_level = None
2434 #: 1 = Outline group starts or ends here (depending on where the
2435 #: outline buttons are located, see ``WSBOOL`` record, which is not
2436 #: parsed by xlrd), *and* is collapsed.
2437 self.outline_group_starts_ends = None
2439 #: 1 = Row is hidden (manually, or by a filter or outline group)
2440 self.hidden = None
2442 #: 1 = Row height and default font height do not match.
2443 self.height_mismatch = None
2445 #: 1 = the xf_index attribute is usable; 0 = ignore it.
2446 self.has_default_xf_index = None
2448 #: Index to default :class:`~xlrd.formatting.XF` record for empty cells
2449 #: in this row. Don't use this if ``has_default_xf_index == 0``.
2450 self.xf_index = None
2452 #: This flag is set if the upper border of at least one cell in this
2453 #: row or if the lower border of at least one cell in the row above is
2454 #: formatted with a thick line style. Thin and medium line styles are
2455 #: not taken into account.
2456 self.additional_space_above = None
2458 #: This flag is set if the lower border of at least one cell in this row
2459 #: or if the upper border of at least one cell in the row below is
2460 #: formatted with a medium or thick line style. Thin line styles are not
2461 #: taken into account.
2462 self.additional_space_below = None
2464 def __getstate__(self):
2465 return (
2466 self.height,
2467 self.has_default_height,
2468 self.outline_level,
2469 self.outline_group_starts_ends,
2470 self.hidden,
2471 self.height_mismatch,
2472 self.has_default_xf_index,
2473 self.xf_index,
2474 self.additional_space_above,
2475 self.additional_space_below,
2476 )
2478 def __setstate__(self, state):
2479 (
2480 self.height,
2481 self.has_default_height,
2482 self.outline_level,
2483 self.outline_group_starts_ends,
2484 self.hidden,
2485 self.height_mismatch,
2486 self.has_default_xf_index,
2487 self.xf_index,
2488 self.additional_space_above,
2489 self.additional_space_below,
2490 ) = state