Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/phonenumbers/unicode_util.py: 90%

297 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1"""Unicode utility functions 

2 

3>>> from .import unicode_util 

4>>> from .util import u 

5>>> u1 = '1' # DIGIT ONE 

6>>> u2 = u('a') # LATIN SMALL LETTER A 

7>>> u3 = u('\uFF12') # FULLWIDTH DIGIT TWO 

8>>> u4 = u('\u0100') # LATIN CAPITAL LETTER A WITH MACRON 

9>>> unicode_util.Category.get(u1) == u('Nd') 

10True 

11>>> unicode_util.Category.get(u2) == u('Ll') 

12True 

13>>> unicode_util.Category.get(u3) == u('Nd') 

14True 

15>>> unicode_util.Category.get(u4) == u('Lu') 

16True 

17>>> unicode_util.Category.get(u2) == unicode_util.Category.LOWERCASE_LETTER 

18True 

19>>> try: 

20... beyond_bmp = u('\U00010100') # AEGEAN WORD SEPARATOR LINE 

21... except Exception: 

22... beyond_bmp = u('') 

23>>> if len(beyond_bmp) == 1: # We have a UCS4 build of Python 

24... cat_po = unicode_util.Category.get(beyond_bmp) 

25... else: # UCS2 build of Python; no non-BMP chars available 

26... cat_po = unicode_util.Category.OTHER_PUNCTUATION 

27>>> cat_po == u('Po') 

28True 

29>>> unicode_util.is_letter(u1) 

30False 

31>>> unicode_util.is_letter(u2) 

32True 

33>>> unicode_util.is_letter(u3) 

34False 

35>>> unicode_util.is_letter(u4) 

36True 

37>>> b1 = unicode_util.Block.get(u1) 

38>>> str(b1) 

39'Block[0000, 007f]' 

40>>> b1 == unicode_util.Block.BASIC_LATIN 

41True 

42>>> b1 == [0x0000, 0x0075] 

43False 

44>>> b2 = unicode_util.Block.get(u2) 

45>>> b2 == unicode_util.Block.BASIC_LATIN 

46True 

47>>> b3 = unicode_util.Block.get(u3) 

48>>> b3 != unicode_util.Block.BASIC_LATIN 

49True 

50>>> b3 == unicode_util.Block.HALFWIDTH_AND_FULLWIDTH_FORMS 

51True 

52>>> b4 = unicode_util.Block.get(u4) 

53>>> b4 == unicode_util.Block.LATIN_EXTENDED_A 

54True 

55>>> unicode_util.Block.get(u('\u0860')) == unicode_util.Block.UNKNOWN 

56True 

57>>> try: 

58... unknown_block = u('\U00013430') 

59... except Exception: 

60... unknown_block = u('') 

61>>> if len(unknown_block) == 1: # We have a UCS4 build of Python 

62... unicode_util.Block.get(u('\U00013430')) == unicode_util.Block.UNKNOWN 

63... else: # UCS2 build of Python; no unknown characters available 

64... True 

65True 

66>>> unicode_util.digit(u1) 

671 

68>>> unicode_util.digit(u2, -1) 

69-1 

70>>> unicode_util.digit(u3, -1) 

712 

72>>> str(hash(b3)) # doctest: +ELLIPSIS 

73'...' 

74""" 

75import bisect 

76import unicodedata 

77 

78from .util import UnicodeMixin, unicod, u 

79 

80 

81class Category(object): 

82 """General category of a Unicode character. 

83 

84 See http://www.unicode.org/reports/tr18/#Categories""" 

85 LETTER = u("L") 

86 UPPERCASE_LETTER = u("Lu") 

87 LOWERCASE_LETTER = u("Ll") 

88 TITLECASE_LETTER = u("Lt") 

89 MODIFIER_LETTER = u("Lm") 

90 OTHER_LETTER = u("Lo") 

91 MARK = u("M") 

92 NON_SPACING_MARK = u("Mn") 

93 SPACING_COMBINING_MARK = u("Mc") 

94 ENCLOSING_MARK = u("Me") 

95 NUMBER = u("N") 

96 DECIMAL_DIGIT_NUMBER = u("Nd") 

97 LETTER_NUMBER = u("Nl") 

98 OTHER_NUMBER = u("No") 

99 SYMBOL = u("S") 

100 MATH_SYMBOL = u("Sm") 

101 CURRENCY_SYMBOL = u("Sc") 

102 MODIFIER_SYMBOL = u("Sk") 

103 OTHER_SYMBOL = u("So") 

104 PUNCTUATION = u("P") 

105 CONNECTOR_PUNCTUATION = u("Pc") 

106 DASH_PUNCTUATION = u("Pd") 

107 OPEN_PUNCTUATION = u("Ps") 

108 CLOSE_PUNCTUATION = u("Pe") 

109 INITIAL_PUNCTUATION = u("Pi") 

110 FINAL_PUNCTUATION = u("Pf") 

111 OTHER_PUNCTUATION = u("Po") 

112 SEPARATOR = u("Z") 

113 SPACE_SEPARATOR = u("Zs") 

114 LINE_SEPARATOR = u("Zl") 

115 PARAGRAPH_SEPARATOR = u("Zp") 

116 OTHER = u("C") 

117 CONTROL = u("Cc") 

118 FORMAT = u("Cf") 

119 SURROGATE = u("Cs") 

120 PRIVATE_USE = u("Co") 

121 NOT_ASSIGNED = u("Cn") 

122 

123 @classmethod 

124 def get(cls, uni_char): 

125 """Return the general category code (as Unicode string) for the given Unicode character""" 

126 uni_char = unicod(uni_char) # Force to Unicode 

127 return unicod(unicodedata.category(uni_char)) 

128 

129 

130def is_letter(uni_char): 

131 """Determine whether the given Unicode character is a Unicode letter""" 

132 category = Category.get(uni_char) 

133 return (category == Category.UPPERCASE_LETTER or 

134 category == Category.LOWERCASE_LETTER or 

135 category == Category.TITLECASE_LETTER or 

136 category == Category.MODIFIER_LETTER or 

137 category == Category.OTHER_LETTER) 

138 

139 

140class _BlockRange(UnicodeMixin): 

141 """Describe the range of characters encompassed by a Unicode block""" 

142 def __init__(self, start, end, regdict=None): 

143 self.start = start 

144 self.end = end 

145 if regdict is not None: 

146 regdict[start] = self 

147 

148 def __eq__(self, other): 

149 if not isinstance(other, _BlockRange): 

150 return NotImplemented 

151 return (self.start == other.start and self.end == other.end) 

152 

153 def __ne__(self, other): 

154 return not self == other 

155 

156 def __hash__(self): 

157 return hash((self.start, self.end)) 

158 

159 def __unicode__(self): 

160 return unicod("Block[%04x, %04x]") % (self.start, self.end) 

161 

162 

163class Block(object): 

164 """Description of the possible Unicode blocks""" 

165 

166 _RANGES = {} # lower end of range => _BlockRange object 

167 _RANGE_KEYS = None # sorted list of _RANGES.keys() 

168 

169 # Taken from http://www.unicode.org/Public/UNIDATA/Blocks.txt 

170 BASIC_LATIN = _BlockRange(0x0000, 0x007F, _RANGES) 

171 LATIN_1_SUPPLEMENT = _BlockRange(0x0080, 0x00FF, _RANGES) 

172 LATIN_EXTENDED_A = _BlockRange(0x0100, 0x017F, _RANGES) 

173 LATIN_EXTENDED_B = _BlockRange(0x0180, 0x024F, _RANGES) 

174 IPA_EXTENSIONS = _BlockRange(0x0250, 0x02AF, _RANGES) 

175 SPACING_MODIFIER_LETTERS = _BlockRange(0x02B0, 0x02FF, _RANGES) 

176 COMBINING_DIACRITICAL_MARKS = _BlockRange(0x0300, 0x036F, _RANGES) 

177 GREEK_AND_COPTIC = _BlockRange(0x0370, 0x03FF, _RANGES) 

178 CYRILLIC = _BlockRange(0x0400, 0x04FF, _RANGES) 

179 CYRILLIC_SUPPLEMENT = _BlockRange(0x0500, 0x052F, _RANGES) 

180 ARMENIAN = _BlockRange(0x0530, 0x058F, _RANGES) 

181 HEBREW = _BlockRange(0x0590, 0x05FF, _RANGES) 

182 ARABIC = _BlockRange(0x0600, 0x06FF, _RANGES) 

183 SYRIAC = _BlockRange(0x0700, 0x074F, _RANGES) 

184 ARABIC_SUPPLEMENT = _BlockRange(0x0750, 0x077F, _RANGES) 

185 THAANA = _BlockRange(0x0780, 0x07BF, _RANGES) 

186 NKO = _BlockRange(0x07C0, 0x07FF, _RANGES) 

187 SAMARITAN = _BlockRange(0x0800, 0x083F, _RANGES) 

188 MANDAIC = _BlockRange(0x0840, 0x085F, _RANGES) 

189 DEVANAGARI = _BlockRange(0x0900, 0x097F, _RANGES) 

190 BENGALI = _BlockRange(0x0980, 0x09FF, _RANGES) 

191 GURMUKHI = _BlockRange(0x0A00, 0x0A7F, _RANGES) 

192 GUJARATI = _BlockRange(0x0A80, 0x0AFF, _RANGES) 

193 ORIYA = _BlockRange(0x0B00, 0x0B7F, _RANGES) 

194 TAMIL = _BlockRange(0x0B80, 0x0BFF, _RANGES) 

195 TELUGU = _BlockRange(0x0C00, 0x0C7F, _RANGES) 

196 KANNADA = _BlockRange(0x0C80, 0x0CFF, _RANGES) 

197 MALAYALAM = _BlockRange(0x0D00, 0x0D7F, _RANGES) 

198 SINHALA = _BlockRange(0x0D80, 0x0DFF, _RANGES) 

199 THAI = _BlockRange(0x0E00, 0x0E7F, _RANGES) 

200 LAO = _BlockRange(0x0E80, 0x0EFF, _RANGES) 

201 TIBETAN = _BlockRange(0x0F00, 0x0FFF, _RANGES) 

202 MYANMAR = _BlockRange(0x1000, 0x109F, _RANGES) 

203 GEORGIAN = _BlockRange(0x10A0, 0x10FF, _RANGES) 

204 HANGUL_JAMO = _BlockRange(0x1100, 0x11FF, _RANGES) 

205 ETHIOPIC = _BlockRange(0x1200, 0x137F, _RANGES) 

206 ETHIOPIC_SUPPLEMENT = _BlockRange(0x1380, 0x139F, _RANGES) 

207 CHEROKEE = _BlockRange(0x13A0, 0x13FF, _RANGES) 

208 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = _BlockRange(0x1400, 0x167F, _RANGES) 

209 OGHAM = _BlockRange(0x1680, 0x169F, _RANGES) 

210 RUNIC = _BlockRange(0x16A0, 0x16FF, _RANGES) 

211 TAGALOG = _BlockRange(0x1700, 0x171F, _RANGES) 

212 HANUNOO = _BlockRange(0x1720, 0x173F, _RANGES) 

213 BUHID = _BlockRange(0x1740, 0x175F, _RANGES) 

214 TAGBANWA = _BlockRange(0x1760, 0x177F, _RANGES) 

215 KHMER = _BlockRange(0x1780, 0x17FF, _RANGES) 

216 MONGOLIAN = _BlockRange(0x1800, 0x18AF, _RANGES) 

217 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = _BlockRange(0x18B0, 0x18FF, _RANGES) 

218 LIMBU = _BlockRange(0x1900, 0x194F, _RANGES) 

219 TAI_LE = _BlockRange(0x1950, 0x197F, _RANGES) 

220 NEW_TAI_LUE = _BlockRange(0x1980, 0x19DF, _RANGES) 

221 KHMER_SYMBOLS = _BlockRange(0x19E0, 0x19FF, _RANGES) 

222 BUGINESE = _BlockRange(0x1A00, 0x1A1F, _RANGES) 

223 TAI_THAM = _BlockRange(0x1A20, 0x1AAF, _RANGES) 

224 BALINESE = _BlockRange(0x1B00, 0x1B7F, _RANGES) 

225 SUNDANESE = _BlockRange(0x1B80, 0x1BBF, _RANGES) 

226 BATAK = _BlockRange(0x1BC0, 0x1BFF, _RANGES) 

227 LEPCHA = _BlockRange(0x1C00, 0x1C4F, _RANGES) 

228 OL_CHIKI = _BlockRange(0x1C50, 0x1C7F, _RANGES) 

229 VEDIC_EXTENSIONS = _BlockRange(0x1CD0, 0x1CFF, _RANGES) 

230 PHONETIC_EXTENSIONS = _BlockRange(0x1D00, 0x1D7F, _RANGES) 

231 PHONETIC_EXTENSIONS_SUPPLEMENT = _BlockRange(0x1D80, 0x1DBF, _RANGES) 

232 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = _BlockRange(0x1DC0, 0x1DFF, _RANGES) 

233 LATIN_EXTENDED_ADDITIONAL = _BlockRange(0x1E00, 0x1EFF, _RANGES) 

234 GREEK_EXTENDED = _BlockRange(0x1F00, 0x1FFF, _RANGES) 

235 GENERAL_PUNCTUATION = _BlockRange(0x2000, 0x206F, _RANGES) 

236 SUPERSCRIPTS_AND_SUBSCRIPTS = _BlockRange(0x2070, 0x209F, _RANGES) 

237 CURRENCY_SYMBOLS = _BlockRange(0x20A0, 0x20CF, _RANGES) 

238 COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS = _BlockRange(0x20D0, 0x20FF, _RANGES) 

239 LETTERLIKE_SYMBOLS = _BlockRange(0x2100, 0x214F, _RANGES) 

240 NUMBER_FORMS = _BlockRange(0x2150, 0x218F, _RANGES) 

241 ARROWS = _BlockRange(0x2190, 0x21FF, _RANGES) 

242 MATHEMATICAL_OPERATORS = _BlockRange(0x2200, 0x22FF, _RANGES) 

243 MISCELLANEOUS_TECHNICAL = _BlockRange(0x2300, 0x23FF, _RANGES) 

244 CONTROL_PICTURES = _BlockRange(0x2400, 0x243F, _RANGES) 

245 OPTICAL_CHARACTER_RECOGNITION = _BlockRange(0x2440, 0x245F, _RANGES) 

246 ENCLOSED_ALPHANUMERICS = _BlockRange(0x2460, 0x24FF, _RANGES) 

247 BOX_DRAWING = _BlockRange(0x2500, 0x257F, _RANGES) 

248 BLOCK_ELEMENTS = _BlockRange(0x2580, 0x259F, _RANGES) 

249 GEOMETRIC_SHAPES = _BlockRange(0x25A0, 0x25FF, _RANGES) 

250 MISCELLANEOUS_SYMBOLS = _BlockRange(0x2600, 0x26FF, _RANGES) 

251 DINGBATS = _BlockRange(0x2700, 0x27BF, _RANGES) 

252 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = _BlockRange(0x27C0, 0x27EF, _RANGES) 

253 SUPPLEMENTAL_ARROWS_A = _BlockRange(0x27F0, 0x27FF, _RANGES) 

254 BRAILLE_PATTERNS = _BlockRange(0x2800, 0x28FF, _RANGES) 

255 SUPPLEMENTAL_ARROWS_B = _BlockRange(0x2900, 0x297F, _RANGES) 

256 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = _BlockRange(0x2980, 0x29FF, _RANGES) 

257 SUPPLEMENTAL_MATHEMATICAL_OPERATORS = _BlockRange(0x2A00, 0x2AFF, _RANGES) 

258 MISCELLANEOUS_SYMBOLS_AND_ARROWS = _BlockRange(0x2B00, 0x2BFF, _RANGES) 

259 GLAGOLITIC = _BlockRange(0x2C00, 0x2C5F, _RANGES) 

260 LATIN_EXTENDED_C = _BlockRange(0x2C60, 0x2C7F, _RANGES) 

261 COPTIC = _BlockRange(0x2C80, 0x2CFF, _RANGES) 

262 GEORGIAN_SUPPLEMENT = _BlockRange(0x2D00, 0x2D2F, _RANGES) 

263 TIFINAGH = _BlockRange(0x2D30, 0x2D7F, _RANGES) 

264 ETHIOPIC_EXTENDED = _BlockRange(0x2D80, 0x2DDF, _RANGES) 

265 CYRILLIC_EXTENDED_A = _BlockRange(0x2DE0, 0x2DFF, _RANGES) 

266 SUPPLEMENTAL_PUNCTUATION = _BlockRange(0x2E00, 0x2E7F, _RANGES) 

267 CJK_RADICALS_SUPPLEMENT = _BlockRange(0x2E80, 0x2EFF, _RANGES) 

268 KANGXI_RADICALS = _BlockRange(0x2F00, 0x2FDF, _RANGES) 

269 IDEOGRAPHIC_DESCRIPTION_CHARACTERS = _BlockRange(0x2FF0, 0x2FFF, _RANGES) 

270 CJK_SYMBOLS_AND_PUNCTUATION = _BlockRange(0x3000, 0x303F, _RANGES) 

271 HIRAGANA = _BlockRange(0x3040, 0x309F, _RANGES) 

272 KATAKANA = _BlockRange(0x30A0, 0x30FF, _RANGES) 

273 BOPOMOFO = _BlockRange(0x3100, 0x312F, _RANGES) 

274 HANGUL_COMPATIBILITY_JAMO = _BlockRange(0x3130, 0x318F, _RANGES) 

275 KANBUN = _BlockRange(0x3190, 0x319F, _RANGES) 

276 BOPOMOFO_EXTENDED = _BlockRange(0x31A0, 0x31BF, _RANGES) 

277 CJK_STROKES = _BlockRange(0x31C0, 0x31EF, _RANGES) 

278 KATAKANA_PHONETIC_EXTENSIONS = _BlockRange(0x31F0, 0x31FF, _RANGES) 

279 ENCLOSED_CJK_LETTERS_AND_MONTHS = _BlockRange(0x3200, 0x32FF, _RANGES) 

280 CJK_COMPATIBILITY = _BlockRange(0x3300, 0x33FF, _RANGES) 

281 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = _BlockRange(0x3400, 0x4DBF, _RANGES) 

282 YIJING_HEXAGRAM_SYMBOLS = _BlockRange(0x4DC0, 0x4DFF, _RANGES) 

283 CJK_UNIFIED_IDEOGRAPHS = _BlockRange(0x4E00, 0x9FFF, _RANGES) 

284 YI_SYLLABLES = _BlockRange(0xA000, 0xA48F, _RANGES) 

285 YI_RADICALS = _BlockRange(0xA490, 0xA4CF, _RANGES) 

286 LISU = _BlockRange(0xA4D0, 0xA4FF, _RANGES) 

287 VAI = _BlockRange(0xA500, 0xA63F, _RANGES) 

288 CYRILLIC_EXTENDED_B = _BlockRange(0xA640, 0xA69F, _RANGES) 

289 BAMUM = _BlockRange(0xA6A0, 0xA6FF, _RANGES) 

290 MODIFIER_TONE_LETTERS = _BlockRange(0xA700, 0xA71F, _RANGES) 

291 LATIN_EXTENDED_D = _BlockRange(0xA720, 0xA7FF, _RANGES) 

292 SYLOTI_NAGRI = _BlockRange(0xA800, 0xA82F, _RANGES) 

293 COMMON_INDIC_NUMBER_FORMS = _BlockRange(0xA830, 0xA83F, _RANGES) 

294 PHAGS_PA = _BlockRange(0xA840, 0xA87F, _RANGES) 

295 SAURASHTRA = _BlockRange(0xA880, 0xA8DF, _RANGES) 

296 DEVANAGARI_EXTENDED = _BlockRange(0xA8E0, 0xA8FF, _RANGES) 

297 KAYAH_LI = _BlockRange(0xA900, 0xA92F, _RANGES) 

298 REJANG = _BlockRange(0xA930, 0xA95F, _RANGES) 

299 HANGUL_JAMO_EXTENDED_A = _BlockRange(0xA960, 0xA97F, _RANGES) 

300 JAVANESE = _BlockRange(0xA980, 0xA9DF, _RANGES) 

301 CHAM = _BlockRange(0xAA00, 0xAA5F, _RANGES) 

302 MYANMAR_EXTENDED_A = _BlockRange(0xAA60, 0xAA7F, _RANGES) 

303 TAI_VIET = _BlockRange(0xAA80, 0xAADF, _RANGES) 

304 ETHIOPIC_EXTENDED_A = _BlockRange(0xAB00, 0xAB2F, _RANGES) 

305 MEETEI_MAYEK = _BlockRange(0xABC0, 0xABFF, _RANGES) 

306 HANGUL_SYLLABLES = _BlockRange(0xAC00, 0xD7AF, _RANGES) 

307 HANGUL_JAMO_EXTENDED_B = _BlockRange(0xD7B0, 0xD7FF, _RANGES) 

308 HIGH_SURROGATES = _BlockRange(0xD800, 0xDB7F, _RANGES) 

309 HIGH_PRIVATE_USE_SURROGATES = _BlockRange(0xDB80, 0xDBFF, _RANGES) 

310 LOW_SURROGATES = _BlockRange(0xDC00, 0xDFFF, _RANGES) 

311 PRIVATE_USE_AREA = _BlockRange(0xE000, 0xF8FF, _RANGES) 

312 CJK_COMPATIBILITY_IDEOGRAPHS = _BlockRange(0xF900, 0xFAFF, _RANGES) 

313 ALPHABETIC_PRESENTATION_FORMS = _BlockRange(0xFB00, 0xFB4F, _RANGES) 

314 ARABIC_PRESENTATION_FORMS_A = _BlockRange(0xFB50, 0xFDFF, _RANGES) 

315 VARIATION_SELECTORS = _BlockRange(0xFE00, 0xFE0F, _RANGES) 

316 VERTICAL_FORMS = _BlockRange(0xFE10, 0xFE1F, _RANGES) 

317 COMBINING_HALF_MARKS = _BlockRange(0xFE20, 0xFE2F, _RANGES) 

318 CJK_COMPATIBILITY_FORMS = _BlockRange(0xFE30, 0xFE4F, _RANGES) 

319 SMALL_FORM_VARIANTS = _BlockRange(0xFE50, 0xFE6F, _RANGES) 

320 ARABIC_PRESENTATION_FORMS_B = _BlockRange(0xFE70, 0xFEFF, _RANGES) 

321 HALFWIDTH_AND_FULLWIDTH_FORMS = _BlockRange(0xFF00, 0xFFEF, _RANGES) 

322 SPECIALS = _BlockRange(0xFFF0, 0xFFFF, _RANGES) 

323 LINEAR_B_SYLLABARY = _BlockRange(0x10000, 0x1007F, _RANGES) 

324 LINEAR_B_IDEOGRAMS = _BlockRange(0x10080, 0x100FF, _RANGES) 

325 AEGEAN_NUMBERS = _BlockRange(0x10100, 0x1013F, _RANGES) 

326 ANCIENT_GREEK_NUMBERS = _BlockRange(0x10140, 0x1018F, _RANGES) 

327 ANCIENT_SYMBOLS = _BlockRange(0x10190, 0x101CF, _RANGES) 

328 PHAISTOS_DISC = _BlockRange(0x101D0, 0x101FF, _RANGES) 

329 LYCIAN = _BlockRange(0x10280, 0x1029F, _RANGES) 

330 CARIAN = _BlockRange(0x102A0, 0x102DF, _RANGES) 

331 OLD_ITALIC = _BlockRange(0x10300, 0x1032F, _RANGES) 

332 GOTHIC = _BlockRange(0x10330, 0x1034F, _RANGES) 

333 UGARITIC = _BlockRange(0x10380, 0x1039F, _RANGES) 

334 OLD_PERSIAN = _BlockRange(0x103A0, 0x103DF, _RANGES) 

335 DESERET = _BlockRange(0x10400, 0x1044F, _RANGES) 

336 SHAVIAN = _BlockRange(0x10450, 0x1047F, _RANGES) 

337 OSMANYA = _BlockRange(0x10480, 0x104AF, _RANGES) 

338 CYPRIOT_SYLLABARY = _BlockRange(0x10800, 0x1083F, _RANGES) 

339 IMPERIAL_ARAMAIC = _BlockRange(0x10840, 0x1085F, _RANGES) 

340 PHOENICIAN = _BlockRange(0x10900, 0x1091F, _RANGES) 

341 LYDIAN = _BlockRange(0x10920, 0x1093F, _RANGES) 

342 KHAROSHTHI = _BlockRange(0x10A00, 0x10A5F, _RANGES) 

343 OLD_SOUTH_ARABIAN = _BlockRange(0x10A60, 0x10A7F, _RANGES) 

344 AVESTAN = _BlockRange(0x10B00, 0x10B3F, _RANGES) 

345 INSCRIPTIONAL_PARTHIAN = _BlockRange(0x10B40, 0x10B5F, _RANGES) 

346 INSCRIPTIONAL_PAHLAVI = _BlockRange(0x10B60, 0x10B7F, _RANGES) 

347 OLD_TURKIC = _BlockRange(0x10C00, 0x10C4F, _RANGES) 

348 RUMI_NUMERAL_SYMBOLS = _BlockRange(0x10E60, 0x10E7F, _RANGES) 

349 BRAHMI = _BlockRange(0x11000, 0x1107F, _RANGES) 

350 KAITHI = _BlockRange(0x11080, 0x110CF, _RANGES) 

351 CUNEIFORM = _BlockRange(0x12000, 0x123FF, _RANGES) 

352 CUNEIFORM_NUMBERS_AND_PUNCTUATION = _BlockRange(0x12400, 0x1247F, _RANGES) 

353 EGYPTIAN_HIEROGLYPHS = _BlockRange(0x13000, 0x1342F, _RANGES) 

354 BAMUM_SUPPLEMENT = _BlockRange(0x16800, 0x16A3F, _RANGES) 

355 KANA_SUPPLEMENT = _BlockRange(0x1B000, 0x1B0FF, _RANGES) 

356 BYZANTINE_MUSICAL_SYMBOLS = _BlockRange(0x1D000, 0x1D0FF, _RANGES) 

357 MUSICAL_SYMBOLS = _BlockRange(0x1D100, 0x1D1FF, _RANGES) 

358 ANCIENT_GREEK_MUSICAL_NOTATION = _BlockRange(0x1D200, 0x1D24F, _RANGES) 

359 TAI_XUAN_JING_SYMBOLS = _BlockRange(0x1D300, 0x1D35F, _RANGES) 

360 COUNTING_ROD_NUMERALS = _BlockRange(0x1D360, 0x1D37F, _RANGES) 

361 MATHEMATICAL_ALPHANUMERIC_SYMBOLS = _BlockRange(0x1D400, 0x1D7FF, _RANGES) 

362 MAHJONG_TILES = _BlockRange(0x1F000, 0x1F02F, _RANGES) 

363 DOMINO_TILES = _BlockRange(0x1F030, 0x1F09F, _RANGES) 

364 PLAYING_CARDS = _BlockRange(0x1F0A0, 0x1F0FF, _RANGES) 

365 ENCLOSED_ALPHANUMERIC_SUPPLEMENT = _BlockRange(0x1F100, 0x1F1FF, _RANGES) 

366 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = _BlockRange(0x1F200, 0x1F2FF, _RANGES) 

367 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = _BlockRange(0x1F300, 0x1F5FF, _RANGES) 

368 EMOTICONS = _BlockRange(0x1F600, 0x1F64F, _RANGES) 

369 TRANSPORT_AND_MAP_SYMBOLS = _BlockRange(0x1F680, 0x1F6FF, _RANGES) 

370 ALCHEMICAL_SYMBOLS = _BlockRange(0x1F700, 0x1F77F, _RANGES) 

371 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = _BlockRange(0x20000, 0x2A6DF, _RANGES) 

372 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = _BlockRange(0x2A700, 0x2B73F, _RANGES) 

373 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = _BlockRange(0x2B740, 0x2B81F, _RANGES) 

374 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = _BlockRange(0x2F800, 0x2FA1F, _RANGES) 

375 TAGS = _BlockRange(0xE0000, 0xE007F, _RANGES) 

376 VARIATION_SELECTORS_SUPPLEMENT = _BlockRange(0xE0100, 0xE01EF, _RANGES) 

377 SUPPLEMENTARY_PRIVATE_USE_AREA_A = _BlockRange(0xF0000, 0xFFFFF, _RANGES) 

378 SUPPLEMENTARY_PRIVATE_USE_AREA_B = _BlockRange(0x100000, 0x10FFFF, _RANGES) 

379 UNKNOWN = _BlockRange(-1, -1) 

380 

381 @classmethod 

382 def get(cls, uni_char): 

383 """Return the Unicode block of the given Unicode character""" 

384 uni_char = unicod(uni_char) # Force to Unicode 

385 code_point = ord(uni_char) 

386 if Block._RANGE_KEYS is None: 

387 Block._RANGE_KEYS = sorted(Block._RANGES.keys()) 

388 idx = bisect.bisect_left(Block._RANGE_KEYS, code_point) 

389 if (idx > 0 and 

390 code_point >= Block._RANGES[Block._RANGE_KEYS[idx - 1]].start and 

391 code_point <= Block._RANGES[Block._RANGE_KEYS[idx - 1]].end): 

392 return Block._RANGES[Block._RANGE_KEYS[idx - 1]] 

393 elif (idx < len(Block._RANGES) and 

394 code_point >= Block._RANGES[Block._RANGE_KEYS[idx]].start and 

395 code_point <= Block._RANGES[Block._RANGE_KEYS[idx]].end): 

396 return Block._RANGES[Block._RANGE_KEYS[idx]] 

397 else: 

398 return Block.UNKNOWN 

399 

400 

401def digit(uni_char, default_value=None): 

402 """Returns the digit value assigned to the Unicode character uni_char as 

403 integer. If no such value is defined, default is returned, or, if not 

404 given, ValueError is raised.""" 

405 uni_char = unicod(uni_char) # Force to Unicode. 

406 if default_value is not None: 406 ↛ 409line 406 didn't jump to line 409, because the condition on line 406 was never false

407 return unicodedata.digit(uni_char, default_value) 

408 else: 

409 return unicodedata.digit(uni_char) 

410 

411 

412if __name__ == '__main__': # pragma no cover 412 ↛ 413line 412 didn't jump to line 413, because the condition on line 412 was never true

413 import doctest 

414 doctest.testmod()