Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/django/utils/text.py: 24%

1import html.entities

2import re

3import unicodedata

4from gzip import GzipFile

5from gzip import compress as gzip_compress

6from io import BytesIO

8from django.core.exceptions import SuspiciousFileOperation

9from django.utils.functional import SimpleLazyObject, keep_lazy_text, lazy

10from django.utils.regex_helper import _lazy_re_compile

11from django.utils.translation import gettext as _

12from django.utils.translation import gettext_lazy, pgettext

15@keep_lazy_text

16def capfirst(x):

17 """Capitalize the first letter of a string."""

18 if not x: 18 ↛ 19line 18 didn't jump to line 19, because the condition on line 18 was never true

19 return x

20 if not isinstance(x, str): 20 ↛ 21line 20 didn't jump to line 21, because the condition on line 20 was never true

21 x = str(x)

22 return x[0].upper() + x[1:]

25# Set up regular expressions

26re_words = _lazy_re_compile(r"<[^>]+?>|([^<>\s]+)", re.S)

27re_chars = _lazy_re_compile(r"<[^>]+?>|(.)", re.S)

28re_tag = _lazy_re_compile(r"<(/)?(\S+?)(?:(\s*/)|\s.*?)?>", re.S)

29re_newlines = _lazy_re_compile(r"\r\n|\r") # Used in normalize_newlines

30re_camel_case = _lazy_re_compile(r"(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))")

33@keep_lazy_text

34def wrap(text, width):

35 """

36 A word-wrap function that preserves existing line breaks. Expects that

37 existing line breaks are posix newlines.

39 Preserve all white space except added line breaks consume the space on

40 which they break the line.

42 Don't wrap long words, thus the output text may have lines longer than

43 ``width``.

44 """

46 def _generator():

47 for line in text.splitlines(True): # True keeps trailing linebreaks

48 max_width = min((line.endswith("\n") and width + 1 or width), width)

49 while len(line) > max_width:

50 space = line[: max_width + 1].rfind(" ") + 1

51 if space == 0:

52 space = line.find(" ") + 1

53 if space == 0:

54 yield line

55 line = ""

56 break

57 yield "%s\n" % line[: space - 1]

58 line = line[space:]

59 max_width = min((line.endswith("\n") and width + 1 or width), width)

60 if line:

61 yield line

63 return "".join(_generator())

66class Truncator(SimpleLazyObject):

67 """

68 An object used to truncate text, either by characters or words.

69 """

71 def __init__(self, text):

72 super().__init__(lambda: str(text))

74 def add_truncation_text(self, text, truncate=None):

75 if truncate is None:

76 truncate = pgettext(

77 "String to return when truncating text", "%(truncated_text)s…"

78 )

79 if "%(truncated_text)s" in truncate:

80 return truncate % {"truncated_text": text}

81 # The truncation text didn't contain the %(truncated_text)s string

82 # replacement argument so just append it to the text.

83 if text.endswith(truncate):

84 # But don't append the truncation text if the current text already

85 # ends in this.

86 return text

87 return "%s%s" % (text, truncate)

89 def chars(self, num, truncate=None, html=False):

90 """

91 Return the text truncated to be no longer than the specified number

92 of characters.

94 `truncate` specifies what should be used to notify that the string has

95 been truncated, defaulting to a translatable string of an ellipsis.

96 """

97 self._setup()

98 length = int(num)

99 text = unicodedata.normalize("NFC", self._wrapped)

100

101 # Calculate the length to truncate to (max length - end_text length)

102 truncate_len = length

103 for char in self.add_truncation_text("", truncate):

104 if not unicodedata.combining(char):

105 truncate_len -= 1

106 if truncate_len == 0:

107 break

108 if html:

109 return self._truncate_html(length, truncate, text, truncate_len, False)

110 return self._text_chars(length, truncate, text, truncate_len)

111

112 def _text_chars(self, length, truncate, text, truncate_len):

113 """Truncate a string after a certain number of chars."""

114 s_len = 0

115 end_index = None

116 for i, char in enumerate(text):

117 if unicodedata.combining(char):

118 # Don't consider combining characters

119 # as adding to the string length

120 continue

121 s_len += 1

122 if end_index is None and s_len > truncate_len:

123 end_index = i

124 if s_len > length:

125 # Return the truncated string

126 return self.add_truncation_text(text[: end_index or 0], truncate)

127

128 # Return the original string since no truncation was necessary

129 return text

130

131 def words(self, num, truncate=None, html=False):

132 """

133 Truncate a string after a certain number of words. `truncate` specifies

134 what should be used to notify that the string has been truncated,

135 defaulting to ellipsis.

136 """

137 self._setup()

138 length = int(num)

139 if html:

140 return self._truncate_html(length, truncate, self._wrapped, length, True)

141 return self._text_words(length, truncate)

142

143 def _text_words(self, length, truncate):

144 """

145 Truncate a string after a certain number of words.

146

147 Strip newlines in the string.

148 """

149 words = self._wrapped.split()

150 if len(words) > length:

151 words = words[:length]

152 return self.add_truncation_text(" ".join(words), truncate)

153 return " ".join(words)

154

155 def _truncate_html(self, length, truncate, text, truncate_len, words):

156 """

157 Truncate HTML to a certain number of chars (not counting tags and

158 comments), or, if words is True, then to a certain number of words.

159 Close opened tags if they were correctly closed in the given HTML.

160

161 Preserve newlines in the HTML.

162 """

163 if words and length <= 0:

164 return ""

165

166 html4_singlets = (

167 "br",

168 "col",

169 "link",

170 "base",

171 "img",

172 "param",

173 "area",

174 "hr",

175 "input",

176 )

177

178 # Count non-HTML chars/words and keep note of open tags

179 pos = 0

180 end_text_pos = 0

181 current_len = 0

182 open_tags = []

183

184 regex = re_words if words else re_chars

185

186 while current_len <= length:

187 m = regex.search(text, pos)

188 if not m:

189 # Checked through whole string

190 break

191 pos = m.end(0)

192 if m[1]:

193 # It's an actual non-HTML word or char

194 current_len += 1

195 if current_len == truncate_len:

196 end_text_pos = pos

197 continue

198 # Check for tag

199 tag = re_tag.match(m[0])

200 if not tag or current_len >= truncate_len:

201 # Don't worry about non tags or tags after our truncate point

202 continue

203 closing_tag, tagname, self_closing = tag.groups()

204 # Element names are always case-insensitive

205 tagname = tagname.lower()

206 if self_closing or tagname in html4_singlets:

207 pass

208 elif closing_tag:

209 # Check for match in open tags list

210 try:

211 i = open_tags.index(tagname)

212 except ValueError:

213 pass

214 else:

215 # SGML: An end tag closes, back to the matching start tag,

216 # all unclosed intervening start tags with omitted end tags

217 open_tags = open_tags[i + 1 :]

218 else:

219 # Add it to the start of the open tags list

220 open_tags.insert(0, tagname)

221

222 if current_len <= length:

223 return text

224 out = text[:end_text_pos]

225 truncate_text = self.add_truncation_text("", truncate)

226 if truncate_text:

227 out += truncate_text

228 # Close any tags still open

229 for tag in open_tags:

230 out += "</%s>" % tag

231 # Return string

232 return out

233

234

235@keep_lazy_text

236def get_valid_filename(name):

237 """

238 Return the given string converted to a string that can be used for a clean

239 filename. Remove leading and trailing spaces; convert other spaces to

240 underscores; and remove anything that is not an alphanumeric, dash,

241 underscore, or dot.

242 >>> get_valid_filename("john's portrait in 2004.jpg")

243 'johns_portrait_in_2004.jpg'

244 """

245 s = str(name).strip().replace(" ", "_")

246 s = re.sub(r"(?u)[^-\w.]", "", s)

247 if s in {"", ".", ".."}: 247 ↛ 248line 247 didn't jump to line 248, because the condition on line 247 was never true

248 raise SuspiciousFileOperation("Could not derive file name from '%s'" % name)

249 return s

250

251

252@keep_lazy_text

253def get_text_list(list_, last_word=gettext_lazy("or")):

254 """

255 >>> get_text_list(['a', 'b', 'c', 'd'])

256 'a, b, c or d'

257 >>> get_text_list(['a', 'b', 'c'], 'and')

258 'a, b and c'

259 >>> get_text_list(['a', 'b'], 'and')

260 'a and b'

261 >>> get_text_list(['a'])

262 'a'

263 >>> get_text_list([])

264 ''

265 """

266 if not list_:

267 return ""

268 if len(list_) == 1:

269 return str(list_[0])

270 return "%s %s %s" % (

271 # Translators: This string is used as a separator between list elements

272 _(", ").join(str(i) for i in list_[:-1]),

273 str(last_word),

274 str(list_[-1]),

275 )

276

277

278@keep_lazy_text

279def normalize_newlines(text):

280 """Normalize CRLF and CR newlines to just LF."""

281 return re_newlines.sub("\n", str(text))

282

283

284@keep_lazy_text

285def phone2numeric(phone):

286 """Convert a phone number with letters into its numeric equivalent."""

287 char2number = {

288 "a": "2",

289 "b": "2",

290 "c": "2",

291 "d": "3",

292 "e": "3",

293 "f": "3",

294 "g": "4",

295 "h": "4",

296 "i": "4",

297 "j": "5",

298 "k": "5",

299 "l": "5",

300 "m": "6",

301 "n": "6",

302 "o": "6",

303 "p": "7",

304 "q": "7",

305 "r": "7",

306 "s": "7",

307 "t": "8",

308 "u": "8",

309 "v": "8",

310 "w": "9",

311 "x": "9",

312 "y": "9",

313 "z": "9",

314 }

315 return "".join(char2number.get(c, c) for c in phone.lower())

316

317

318def compress_string(s):

319 return gzip_compress(s, compresslevel=6, mtime=0)

320

321

322class StreamingBuffer(BytesIO):

323 def read(self):

324 ret = self.getvalue()

325 self.seek(0)

326 self.truncate()

327 return ret

328

329

330# Like compress_string, but for iterators of strings.

331def compress_sequence(sequence):

332 buf = StreamingBuffer()

333 with GzipFile(mode="wb", compresslevel=6, fileobj=buf, mtime=0) as zfile:

334 # Output headers...

335 yield buf.read()

336 for item in sequence:

337 zfile.write(item)

338 data = buf.read()

339 if data:

340 yield data

341 yield buf.read()

342

343

344# Expression to match some_token and some_token="with spaces" (and similarly

345# for single-quoted strings).

346smart_split_re = _lazy_re_compile(

347 r"""

348 ((?:

349 [^\s'"]*

350 (?:

351 (?:"(?:[^"\\]|\\.)*" | '(?:[^'\\]|\\.)*')

352 [^\s'"]*

353 )+

354 ) | \S+)

355""",

356 re.VERBOSE,

357)

358

359

360def smart_split(text):

361 r"""

362 Generator that splits a string by spaces, leaving quoted phrases together.

363 Supports both single and double quotes, and supports escaping quotes with

364 backslashes. In the output, strings will keep their initial and trailing

365 quote marks and escaped quotes will remain escaped (the results can then

366 be further processed with unescape_string_literal()).

367

368 >>> list(smart_split(r'This is "a person\'s" test.'))

369 ['This', 'is', '"a person\\\'s"', 'test.']

370 >>> list(smart_split(r"Another 'person\'s' test."))

371 ['Another', "'person\\'s'", 'test.']

372 >>> list(smart_split(r'A "\"funky\" style" test.'))

373 ['A', '"\\"funky\\" style"', 'test.']

374 """

375 for bit in smart_split_re.finditer(str(text)):

376 yield bit[0]

377

378

379def _replace_entity(match):

380 text = match[1]

381 if text[0] == "#":

382 text = text[1:]

383 try:

384 if text[0] in "xX":

385 c = int(text[1:], 16)

386 else:

387 c = int(text)

388 return chr(c)

389 except ValueError:

390 return match[0]

391 else:

392 try:

393 return chr(html.entities.name2codepoint[text])

394 except KeyError:

395 return match[0]

396

397

398_entity_re = _lazy_re_compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));")

399

400

401@keep_lazy_text

402def unescape_string_literal(s):

403 r"""

404 Convert quoted string literals to unquoted strings with escaped quotes and

405 backslashes unquoted::

406

407 >>> unescape_string_literal('"abc"')

408 'abc'

409 >>> unescape_string_literal("'abc'")

410 'abc'

411 >>> unescape_string_literal('"a \"bc\""')

412 'a "bc"'

413 >>> unescape_string_literal("'\'ab\' c'")

414 "'ab' c"

415 """

416 if s[0] not in "\"'" or s[-1] != s[0]:

417 raise ValueError("Not a string literal: %r" % s)

418 quote = s[0]

419 return s[1:-1].replace(r"\%s" % quote, quote).replace(r"\\", "\\")

420

421

422@keep_lazy_text

423def slugify(value, allow_unicode=False):

424 """

425 Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated

426 dashes to single dashes. Remove characters that aren't alphanumerics,

427 underscores, or hyphens. Convert to lowercase. Also strip leading and

428 trailing whitespace, dashes, and underscores.

429 """

430 value = str(value)

431 if allow_unicode: 431 ↛ 432line 431 didn't jump to line 432, because the condition on line 431 was never true

432 value = unicodedata.normalize("NFKC", value)

433 else:

434 value = (

435 unicodedata.normalize("NFKD", value)

436 .encode("ascii", "ignore")

437 .decode("ascii")

438 )

439 value = re.sub(r"[^\w\s-]", "", value.lower())

440 return re.sub(r"[-\s]+", "-", value).strip("-_")

441

442

443def camel_case_to_spaces(value):

444 """

445 Split CamelCase and convert to lowercase. Strip surrounding whitespace.

446 """

447 return re_camel_case.sub(r" \1", value).strip().lower()

448

449

450def _format_lazy(format_string, *args, **kwargs):

451 """

452 Apply str.format() on 'format_string' where format_string, args,

453 and/or kwargs might be lazy.

454 """

455 return format_string.format(*args, **kwargs)

456

457

458format_lazy = lazy(_format_lazy, str)