Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/django/utils/text.py: 24%

210 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1import html.entities 

2import re 

3import unicodedata 

4from gzip import GzipFile 

5from gzip import compress as gzip_compress 

6from io import BytesIO 

7 

8from django.core.exceptions import SuspiciousFileOperation 

9from django.utils.functional import SimpleLazyObject, keep_lazy_text, lazy 

10from django.utils.regex_helper import _lazy_re_compile 

11from django.utils.translation import gettext as _ 

12from django.utils.translation import gettext_lazy, pgettext 

13 

14 

15@keep_lazy_text 

16def capfirst(x): 

17 """Capitalize the first letter of a string.""" 

18 if not x: 18 ↛ 19line 18 didn't jump to line 19, because the condition on line 18 was never true

19 return x 

20 if not isinstance(x, str): 20 ↛ 21line 20 didn't jump to line 21, because the condition on line 20 was never true

21 x = str(x) 

22 return x[0].upper() + x[1:] 

23 

24 

25# Set up regular expressions 

26re_words = _lazy_re_compile(r"<[^>]+?>|([^<>\s]+)", re.S) 

27re_chars = _lazy_re_compile(r"<[^>]+?>|(.)", re.S) 

28re_tag = _lazy_re_compile(r"<(/)?(\S+?)(?:(\s*/)|\s.*?)?>", re.S) 

29re_newlines = _lazy_re_compile(r"\r\n|\r") # Used in normalize_newlines 

30re_camel_case = _lazy_re_compile(r"(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))") 

31 

32 

33@keep_lazy_text 

34def wrap(text, width): 

35 """ 

36 A word-wrap function that preserves existing line breaks. Expects that 

37 existing line breaks are posix newlines. 

38 

39 Preserve all white space except added line breaks consume the space on 

40 which they break the line. 

41 

42 Don't wrap long words, thus the output text may have lines longer than 

43 ``width``. 

44 """ 

45 

46 def _generator(): 

47 for line in text.splitlines(True): # True keeps trailing linebreaks 

48 max_width = min((line.endswith("\n") and width + 1 or width), width) 

49 while len(line) > max_width: 

50 space = line[: max_width + 1].rfind(" ") + 1 

51 if space == 0: 

52 space = line.find(" ") + 1 

53 if space == 0: 

54 yield line 

55 line = "" 

56 break 

57 yield "%s\n" % line[: space - 1] 

58 line = line[space:] 

59 max_width = min((line.endswith("\n") and width + 1 or width), width) 

60 if line: 

61 yield line 

62 

63 return "".join(_generator()) 

64 

65 

66class Truncator(SimpleLazyObject): 

67 """ 

68 An object used to truncate text, either by characters or words. 

69 """ 

70 

71 def __init__(self, text): 

72 super().__init__(lambda: str(text)) 

73 

74 def add_truncation_text(self, text, truncate=None): 

75 if truncate is None: 

76 truncate = pgettext( 

77 "String to return when truncating text", "%(truncated_text)s…" 

78 ) 

79 if "%(truncated_text)s" in truncate: 

80 return truncate % {"truncated_text": text} 

81 # The truncation text didn't contain the %(truncated_text)s string 

82 # replacement argument so just append it to the text. 

83 if text.endswith(truncate): 

84 # But don't append the truncation text if the current text already 

85 # ends in this. 

86 return text 

87 return "%s%s" % (text, truncate) 

88 

89 def chars(self, num, truncate=None, html=False): 

90 """ 

91 Return the text truncated to be no longer than the specified number 

92 of characters. 

93 

94 `truncate` specifies what should be used to notify that the string has 

95 been truncated, defaulting to a translatable string of an ellipsis. 

96 """ 

97 self._setup() 

98 length = int(num) 

99 text = unicodedata.normalize("NFC", self._wrapped) 

100 

101 # Calculate the length to truncate to (max length - end_text length) 

102 truncate_len = length 

103 for char in self.add_truncation_text("", truncate): 

104 if not unicodedata.combining(char): 

105 truncate_len -= 1 

106 if truncate_len == 0: 

107 break 

108 if html: 

109 return self._truncate_html(length, truncate, text, truncate_len, False) 

110 return self._text_chars(length, truncate, text, truncate_len) 

111 

112 def _text_chars(self, length, truncate, text, truncate_len): 

113 """Truncate a string after a certain number of chars.""" 

114 s_len = 0 

115 end_index = None 

116 for i, char in enumerate(text): 

117 if unicodedata.combining(char): 

118 # Don't consider combining characters 

119 # as adding to the string length 

120 continue 

121 s_len += 1 

122 if end_index is None and s_len > truncate_len: 

123 end_index = i 

124 if s_len > length: 

125 # Return the truncated string 

126 return self.add_truncation_text(text[: end_index or 0], truncate) 

127 

128 # Return the original string since no truncation was necessary 

129 return text 

130 

131 def words(self, num, truncate=None, html=False): 

132 """ 

133 Truncate a string after a certain number of words. `truncate` specifies 

134 what should be used to notify that the string has been truncated, 

135 defaulting to ellipsis. 

136 """ 

137 self._setup() 

138 length = int(num) 

139 if html: 

140 return self._truncate_html(length, truncate, self._wrapped, length, True) 

141 return self._text_words(length, truncate) 

142 

143 def _text_words(self, length, truncate): 

144 """ 

145 Truncate a string after a certain number of words. 

146 

147 Strip newlines in the string. 

148 """ 

149 words = self._wrapped.split() 

150 if len(words) > length: 

151 words = words[:length] 

152 return self.add_truncation_text(" ".join(words), truncate) 

153 return " ".join(words) 

154 

155 def _truncate_html(self, length, truncate, text, truncate_len, words): 

156 """ 

157 Truncate HTML to a certain number of chars (not counting tags and 

158 comments), or, if words is True, then to a certain number of words. 

159 Close opened tags if they were correctly closed in the given HTML. 

160 

161 Preserve newlines in the HTML. 

162 """ 

163 if words and length <= 0: 

164 return "" 

165 

166 html4_singlets = ( 

167 "br", 

168 "col", 

169 "link", 

170 "base", 

171 "img", 

172 "param", 

173 "area", 

174 "hr", 

175 "input", 

176 ) 

177 

178 # Count non-HTML chars/words and keep note of open tags 

179 pos = 0 

180 end_text_pos = 0 

181 current_len = 0 

182 open_tags = [] 

183 

184 regex = re_words if words else re_chars 

185 

186 while current_len <= length: 

187 m = regex.search(text, pos) 

188 if not m: 

189 # Checked through whole string 

190 break 

191 pos = m.end(0) 

192 if m[1]: 

193 # It's an actual non-HTML word or char 

194 current_len += 1 

195 if current_len == truncate_len: 

196 end_text_pos = pos 

197 continue 

198 # Check for tag 

199 tag = re_tag.match(m[0]) 

200 if not tag or current_len >= truncate_len: 

201 # Don't worry about non tags or tags after our truncate point 

202 continue 

203 closing_tag, tagname, self_closing = tag.groups() 

204 # Element names are always case-insensitive 

205 tagname = tagname.lower() 

206 if self_closing or tagname in html4_singlets: 

207 pass 

208 elif closing_tag: 

209 # Check for match in open tags list 

210 try: 

211 i = open_tags.index(tagname) 

212 except ValueError: 

213 pass 

214 else: 

215 # SGML: An end tag closes, back to the matching start tag, 

216 # all unclosed intervening start tags with omitted end tags 

217 open_tags = open_tags[i + 1 :] 

218 else: 

219 # Add it to the start of the open tags list 

220 open_tags.insert(0, tagname) 

221 

222 if current_len <= length: 

223 return text 

224 out = text[:end_text_pos] 

225 truncate_text = self.add_truncation_text("", truncate) 

226 if truncate_text: 

227 out += truncate_text 

228 # Close any tags still open 

229 for tag in open_tags: 

230 out += "</%s>" % tag 

231 # Return string 

232 return out 

233 

234 

235@keep_lazy_text 

236def get_valid_filename(name): 

237 """ 

238 Return the given string converted to a string that can be used for a clean 

239 filename. Remove leading and trailing spaces; convert other spaces to 

240 underscores; and remove anything that is not an alphanumeric, dash, 

241 underscore, or dot. 

242 >>> get_valid_filename("john's portrait in 2004.jpg") 

243 'johns_portrait_in_2004.jpg' 

244 """ 

245 s = str(name).strip().replace(" ", "_") 

246 s = re.sub(r"(?u)[^-\w.]", "", s) 

247 if s in {"", ".", ".."}: 247 ↛ 248line 247 didn't jump to line 248, because the condition on line 247 was never true

248 raise SuspiciousFileOperation("Could not derive file name from '%s'" % name) 

249 return s 

250 

251 

252@keep_lazy_text 

253def get_text_list(list_, last_word=gettext_lazy("or")): 

254 """ 

255 >>> get_text_list(['a', 'b', 'c', 'd']) 

256 'a, b, c or d' 

257 >>> get_text_list(['a', 'b', 'c'], 'and') 

258 'a, b and c' 

259 >>> get_text_list(['a', 'b'], 'and') 

260 'a and b' 

261 >>> get_text_list(['a']) 

262 'a' 

263 >>> get_text_list([]) 

264 '' 

265 """ 

266 if not list_: 

267 return "" 

268 if len(list_) == 1: 

269 return str(list_[0]) 

270 return "%s %s %s" % ( 

271 # Translators: This string is used as a separator between list elements 

272 _(", ").join(str(i) for i in list_[:-1]), 

273 str(last_word), 

274 str(list_[-1]), 

275 ) 

276 

277 

278@keep_lazy_text 

279def normalize_newlines(text): 

280 """Normalize CRLF and CR newlines to just LF.""" 

281 return re_newlines.sub("\n", str(text)) 

282 

283 

284@keep_lazy_text 

285def phone2numeric(phone): 

286 """Convert a phone number with letters into its numeric equivalent.""" 

287 char2number = { 

288 "a": "2", 

289 "b": "2", 

290 "c": "2", 

291 "d": "3", 

292 "e": "3", 

293 "f": "3", 

294 "g": "4", 

295 "h": "4", 

296 "i": "4", 

297 "j": "5", 

298 "k": "5", 

299 "l": "5", 

300 "m": "6", 

301 "n": "6", 

302 "o": "6", 

303 "p": "7", 

304 "q": "7", 

305 "r": "7", 

306 "s": "7", 

307 "t": "8", 

308 "u": "8", 

309 "v": "8", 

310 "w": "9", 

311 "x": "9", 

312 "y": "9", 

313 "z": "9", 

314 } 

315 return "".join(char2number.get(c, c) for c in phone.lower()) 

316 

317 

318def compress_string(s): 

319 return gzip_compress(s, compresslevel=6, mtime=0) 

320 

321 

322class StreamingBuffer(BytesIO): 

323 def read(self): 

324 ret = self.getvalue() 

325 self.seek(0) 

326 self.truncate() 

327 return ret 

328 

329 

330# Like compress_string, but for iterators of strings. 

331def compress_sequence(sequence): 

332 buf = StreamingBuffer() 

333 with GzipFile(mode="wb", compresslevel=6, fileobj=buf, mtime=0) as zfile: 

334 # Output headers... 

335 yield buf.read() 

336 for item in sequence: 

337 zfile.write(item) 

338 data = buf.read() 

339 if data: 

340 yield data 

341 yield buf.read() 

342 

343 

344# Expression to match some_token and some_token="with spaces" (and similarly 

345# for single-quoted strings). 

346smart_split_re = _lazy_re_compile( 

347 r""" 

348 ((?: 

349 [^\s'"]* 

350 (?: 

351 (?:"(?:[^"\\]|\\.)*" | '(?:[^'\\]|\\.)*') 

352 [^\s'"]* 

353 )+ 

354 ) | \S+) 

355""", 

356 re.VERBOSE, 

357) 

358 

359 

360def smart_split(text): 

361 r""" 

362 Generator that splits a string by spaces, leaving quoted phrases together. 

363 Supports both single and double quotes, and supports escaping quotes with 

364 backslashes. In the output, strings will keep their initial and trailing 

365 quote marks and escaped quotes will remain escaped (the results can then 

366 be further processed with unescape_string_literal()). 

367 

368 >>> list(smart_split(r'This is "a person\'s" test.')) 

369 ['This', 'is', '"a person\\\'s"', 'test.'] 

370 >>> list(smart_split(r"Another 'person\'s' test.")) 

371 ['Another', "'person\\'s'", 'test.'] 

372 >>> list(smart_split(r'A "\"funky\" style" test.')) 

373 ['A', '"\\"funky\\" style"', 'test.'] 

374 """ 

375 for bit in smart_split_re.finditer(str(text)): 

376 yield bit[0] 

377 

378 

379def _replace_entity(match): 

380 text = match[1] 

381 if text[0] == "#": 

382 text = text[1:] 

383 try: 

384 if text[0] in "xX": 

385 c = int(text[1:], 16) 

386 else: 

387 c = int(text) 

388 return chr(c) 

389 except ValueError: 

390 return match[0] 

391 else: 

392 try: 

393 return chr(html.entities.name2codepoint[text]) 

394 except KeyError: 

395 return match[0] 

396 

397 

398_entity_re = _lazy_re_compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));") 

399 

400 

401@keep_lazy_text 

402def unescape_string_literal(s): 

403 r""" 

404 Convert quoted string literals to unquoted strings with escaped quotes and 

405 backslashes unquoted:: 

406 

407 >>> unescape_string_literal('"abc"') 

408 'abc' 

409 >>> unescape_string_literal("'abc'") 

410 'abc' 

411 >>> unescape_string_literal('"a \"bc\""') 

412 'a "bc"' 

413 >>> unescape_string_literal("'\'ab\' c'") 

414 "'ab' c" 

415 """ 

416 if s[0] not in "\"'" or s[-1] != s[0]: 

417 raise ValueError("Not a string literal: %r" % s) 

418 quote = s[0] 

419 return s[1:-1].replace(r"\%s" % quote, quote).replace(r"\\", "\\") 

420 

421 

422@keep_lazy_text 

423def slugify(value, allow_unicode=False): 

424 """ 

425 Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated 

426 dashes to single dashes. Remove characters that aren't alphanumerics, 

427 underscores, or hyphens. Convert to lowercase. Also strip leading and 

428 trailing whitespace, dashes, and underscores. 

429 """ 

430 value = str(value) 

431 if allow_unicode: 431 ↛ 432line 431 didn't jump to line 432, because the condition on line 431 was never true

432 value = unicodedata.normalize("NFKC", value) 

433 else: 

434 value = ( 

435 unicodedata.normalize("NFKD", value) 

436 .encode("ascii", "ignore") 

437 .decode("ascii") 

438 ) 

439 value = re.sub(r"[^\w\s-]", "", value.lower()) 

440 return re.sub(r"[-\s]+", "-", value).strip("-_") 

441 

442 

443def camel_case_to_spaces(value): 

444 """ 

445 Split CamelCase and convert to lowercase. Strip surrounding whitespace. 

446 """ 

447 return re_camel_case.sub(r" \1", value).strip().lower() 

448 

449 

450def _format_lazy(format_string, *args, **kwargs): 

451 """ 

452 Apply str.format() on 'format_string' where format_string, args, 

453 and/or kwargs might be lazy. 

454 """ 

455 return format_string.format(*args, **kwargs) 

456 

457 

458format_lazy = lazy(_format_lazy, str)