Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/xlwt/UnicodeUtils.py: 8%

1# -*- coding: windows-1252 -*-

3'''

4From BIFF8 on, strings are always stored using UTF-16LE text encoding. The

5character array is a sequence of 16-bit values4. Additionally it is

6possible to use a compressed format, which omits the high bytes of all

7characters, if they are all zero.

9The following tables describe the standard format of the entire string, but

10in many records the strings differ from this format. This will be mentioned

11separately. It is possible (but not required) to store Rich-Text formatting

12information and Asian phonetic information inside a Unicode string. This

13results in four different ways to store a string. The character array

14is not zero-terminated.

16The string consists of the character count (as usual an 8-bit value or

17a 16-bit value), option flags, the character array and optional formatting

18information. If the string is empty, sometimes the option flags field will

19not occur. This is mentioned at the respective place.

21Offset Size Contents

220 1 or 2 Length of the string (character count, ln)

231 or 2 1 Option flags:

24 Bit Mask Contents

25 0 01H Character compression (ccompr):

26 0 = Compressed (8-bit characters)

27 1 = Uncompressed (16-bit characters)

28 2 04H Asian phonetic settings (phonetic):

29 0 = Does not contain Asian phonetic settings

30 1 = Contains Asian phonetic settings

31 3 08H Rich-Text settings (richtext):

32 0 = Does not contain Rich-Text settings

33 1 = Contains Rich-Text settings

34[2 or 3] 2 (optional, only if richtext=1) Number of Rich-Text formatting runs (rt)

35[var.] 4 (optional, only if phonetic=1) Size of Asian phonetic settings block (in bytes, sz)

36var. ln or

37 2·ln Character array (8-bit characters or 16-bit characters, dependent on ccompr)

38[var.] 4·rt (optional, only if richtext=1) List of rt formatting runs

39[var.] sz (optional, only if phonetic=1) Asian Phonetic Settings Block

40'''

42from .compat import unicode, unicode_type

43from struct import pack

45def upack2(s, encoding='ascii'):

46 # If not unicode, make it so.

47 if isinstance(s, unicode_type):

48 us = s

49 else:

50 us = unicode(s, encoding)

51 # Limit is based on number of content characters

52 # (not on number of bytes in packed result)

53 len_us = len(us)

54 if len_us > 32767:

55 raise Exception('String longer than 32767 characters')

56 try:

57 encs = us.encode('latin1')

58 # Success here means all chars are in U+0000 to U+00FF

59 # inclusive, meaning that we can use "compressed format".

60 flag = 0

61 n_items = len_us

62 except UnicodeEncodeError:

63 encs = us.encode('utf_16_le')

64 flag = 1

65 n_items = len(encs) // 2

66 # n_items is the number of "double byte characters" i.e. MS C wchars

67 # Can't use len(us).

68 # len(u"\U0001D400") -> 1 on a wide-unicode build

69 # and 2 on a narrow-unicode build.

70 # We need n_items == 2 in this case.

71 return pack('<HB', n_items, flag) + encs

73def upack2rt(rt, encoding='ascii'):

74 us = u''

75 fr = b''

76 offset = 0

77 # convert rt strings to unicode if not already unicode

78 # also generate the formatting run for the styles added

79 for s, fontx in rt:

80 if not isinstance(s, unicode_type):

81 s = unicode(s, encoding)

82 us += s

83 if fontx is not None:

84 # code in Rows.py ensures that

85 # fontx can be None only for the first piece

86 fr += pack('<HH', offset, fontx)

87 # offset is the number of MS C wchar characters.

88 # That is 1 if c <= u'\uFFFF' else 2

89 offset += len(s.encode('utf_16_le')) // 2

90 num_fr = len(fr) // 4 # ensure result is int

91 if offset > 32767:

92 raise Exception('String longer than 32767 characters')

93 try:

94 encs = us.encode('latin1')

95 # Success here means all chars are in U+0000 to U+00FF

96 # inclusive, meaning that we can use "compressed format".

97 flag = 0 | 8

98 n_items = len(encs)

99 except UnicodeEncodeError:

100 encs = us.encode('utf_16_le')

101 flag = 1 | 8

102 n_items = len(encs) // 2 # see comments in upack2 function above

103 return pack('<HBH', n_items, flag, num_fr) + encs, fr

104

105def upack1(s, encoding='ascii'):

106 # Same as upack2(), but with a one-byte length field.

107 if isinstance(s, unicode_type):

108 us = s

109 else:

110 us = unicode(s, encoding)

111 len_us = len(us)

112 if len_us > 255:

113 raise Exception('String longer than 255 characters')

114 try:

115 encs = us.encode('latin1')

116 flag = 0

117 n_items = len_us

118 except UnicodeEncodeError:

119 encs = us.encode('utf_16_le')

120 flag = 1

121 n_items = len(encs) // 2

122 return pack('<BB', n_items, flag) + encs