Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/django/utils/html.py: 22%

1"""HTML utilities suitable for global use."""

3import html

4import json

5import re

6from html.parser import HTMLParser

7from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsplit

9from django.utils.encoding import punycode

10from django.utils.functional import Promise, keep_lazy, keep_lazy_text

11from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS

12from django.utils.regex_helper import _lazy_re_compile

13from django.utils.safestring import SafeData, SafeString, mark_safe

14from django.utils.text import normalize_newlines

16# Configuration for urlize() function.

17TRAILING_PUNCTUATION_CHARS = ".,:;!"

18WRAPPING_PUNCTUATION = [("(", ")"), ("[", "]")]

20# List of possible strings used for bullets in bulleted lists.

21DOTS = ["·", "*", "\u2022", "", "•", "•"]

23word_split_re = _lazy_re_compile(r"""([\s<>"']+)""")

24simple_url_re = _lazy_re_compile(r"^https?://\[?\w", re.IGNORECASE)

25simple_url_2_re = _lazy_re_compile(

26 r"^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)($|/.*)$", re.IGNORECASE

27)

30@keep_lazy(str, SafeString)

31def escape(text):

32 """

33 Return the given text with ampersands, quotes and angle brackets encoded

34 for use in HTML.

36 Always escape input, even if it's already escaped and marked as such.

37 This may result in double-escaping. If this is a concern, use

38 conditional_escape() instead.

39 """

40 return mark_safe(html.escape(str(text)))

43_js_escapes = {

44 ord("\\"): "\\u005C",

45 ord("'"): "\\u0027",

46 ord('"'): "\\u0022",

47 ord(">"): "\\u003E",

48 ord("<"): "\\u003C",

49 ord("&"): "\\u0026",

50 ord("="): "\\u003D",

51 ord("-"): "\\u002D",

52 ord(";"): "\\u003B",

53 ord("`"): "\\u0060",

54 ord("\u2028"): "\\u2028",

55 ord("\u2029"): "\\u2029",

56}

58# Escape every ASCII character with a value less than 32.

59_js_escapes.update((ord("%c" % z), "\\u%04X" % z) for z in range(32))

62@keep_lazy(str, SafeString)

63def escapejs(value):

64 """Hex encode characters for use in JavaScript strings."""

65 return mark_safe(str(value).translate(_js_escapes))

68_json_script_escapes = {

69 ord(">"): "\\u003E",

70 ord("<"): "\\u003C",

71 ord("&"): "\\u0026",

72}

75def json_script(value, element_id):

76 """

77 Escape all the HTML/XML special characters with their unicode escapes, so

78 value is safe to be output anywhere except for inside a tag attribute. Wrap

79 the escaped JSON in a script tag.

80 """

81 from django.core.serializers.json import DjangoJSONEncoder

83 json_str = json.dumps(value, cls=DjangoJSONEncoder).translate(_json_script_escapes)

84 return format_html(

85 '<script id="{}" type="application/json">{}</script>',

86 element_id,

87 mark_safe(json_str),

88 )

91def conditional_escape(text):

92 """

93 Similar to escape(), except that it doesn't operate on pre-escaped strings.

95 This function relies on the __html__ convention used both by Django's

96 SafeData class and by third-party libraries like markupsafe.

97 """

98 if isinstance(text, Promise):

99 text = str(text)

100 if hasattr(text, "__html__"):

101 return text.__html__()

102 else:

103 return escape(text)

104

105

106def format_html(format_string, *args, **kwargs):

107 """

108 Similar to str.format, but pass all arguments through conditional_escape(),

109 and call mark_safe() on the result. This function should be used instead

110 of str.format or % interpolation to build up small HTML fragments.

111 """

112 args_safe = map(conditional_escape, args)

113 kwargs_safe = {k: conditional_escape(v) for (k, v) in kwargs.items()}

114 return mark_safe(format_string.format(*args_safe, **kwargs_safe))

115

116

117def format_html_join(sep, format_string, args_generator):

118 """

119 A wrapper of format_html, for the common case of a group of arguments that

120 need to be formatted using the same format string, and then joined using

121 'sep'. 'sep' is also passed through conditional_escape.

122

123 'args_generator' should be an iterator that returns the sequence of 'args'

124 that will be passed to format_html.

125

126 Example:

127

128 format_html_join('\n', "<li>{} {}</li>", ((u.first_name, u.last_name)

129 for u in users))

130 """

131 return mark_safe(

132 conditional_escape(sep).join(

133 format_html(format_string, *args) for args in args_generator

134 )

135 )

136

137

138@keep_lazy_text

139def linebreaks(value, autoescape=False):

140 """Convert newlines into <p> and <br>s."""

141 value = normalize_newlines(value)

142 paras = re.split("\n{2,}", str(value))

143 if autoescape:

144 paras = ["<p>%s</p>" % escape(p).replace("\n", "<br>") for p in paras]

145 else:

146 paras = ["<p>%s</p>" % p.replace("\n", "<br>") for p in paras]

147 return "\n\n".join(paras)

148

149

150class MLStripper(HTMLParser):

151 def __init__(self):

152 super().__init__(convert_charrefs=False)

153 self.reset()

154 self.fed = []

155

156 def handle_data(self, d):

157 self.fed.append(d)

158

159 def handle_entityref(self, name):

160 self.fed.append("&%s;" % name)

161

162 def handle_charref(self, name):

163 self.fed.append("&#%s;" % name)

164

165 def get_data(self):

166 return "".join(self.fed)

167

168

169def _strip_once(value):

170 """

171 Internal tag stripping utility used by strip_tags.

172 """

173 s = MLStripper()

174 s.feed(value)

175 s.close()

176 return s.get_data()

177

178

179@keep_lazy_text

180def strip_tags(value):

181 """Return the given HTML with all tags stripped."""

182 # Note: in typical case this loop executes _strip_once once. Loop condition

183 # is redundant, but helps to reduce number of executions of _strip_once.

184 value = str(value)

185 while "<" in value and ">" in value:

186 new_value = _strip_once(value)

187 if value.count("<") == new_value.count("<"):

188 # _strip_once wasn't able to detect more tags.

189 break

190 value = new_value

191 return value

192

193

194@keep_lazy_text

195def strip_spaces_between_tags(value):

196 """Return the given HTML with spaces between tags removed."""

197 return re.sub(r">\s+<", "><", str(value))

198

199

200def smart_urlquote(url):

201 """Quote a URL if it isn't already quoted."""

202

203 def unquote_quote(segment):

204 segment = unquote(segment)

205 # Tilde is part of RFC3986 Unreserved Characters

206 # https://tools.ietf.org/html/rfc3986#section-2.3

207 # See also https://bugs.python.org/issue16285

208 return quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + "~")

209

210 # Handle IDN before quoting.

211 try:

212 scheme, netloc, path, query, fragment = urlsplit(url)

213 except ValueError:

214 # invalid IPv6 URL (normally square brackets in hostname part).

215 return unquote_quote(url)

216

217 try:

218 netloc = punycode(netloc) # IDN -> ACE

219 except UnicodeError: # invalid domain part

220 return unquote_quote(url)

221

222 if query:

223 # Separately unquoting key/value, so as to not mix querystring separators

224 # included in query values. See #22267.

225 query_parts = [

226 (unquote(q[0]), unquote(q[1]))

227 for q in parse_qsl(query, keep_blank_values=True)

228 ]

229 # urlencode will take care of quoting

230 query = urlencode(query_parts)

231

232 path = unquote_quote(path)

233 fragment = unquote_quote(fragment)

234

235 return urlunsplit((scheme, netloc, path, query, fragment))

236

237

238@keep_lazy_text

239def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):

240 """

241 Convert any URLs in text into clickable links.

242

243 Works on http://, https://, www. links, and also on links ending in one of

244 the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org).

245 Links can have trailing punctuation (periods, commas, close-parens) and

246 leading punctuation (opening parens) and it'll still do the right thing.

247

248 If trim_url_limit is not None, truncate the URLs in the link text longer

249 than this limit to trim_url_limit - 1 characters and append an ellipsis.

250

251 If nofollow is True, give the links a rel="nofollow" attribute.

252

253 If autoescape is True, autoescape the link text and URLs.

254 """

255 safe_input = isinstance(text, SafeData)

256

257 def trim_url(x, limit=trim_url_limit):

258 if limit is None or len(x) <= limit:

259 return x

260 return "%s…" % x[: max(0, limit - 1)]

261

262 def trim_punctuation(lead, middle, trail):

263 """

264 Trim trailing and wrapping punctuation from `middle`. Return the items

265 of the new state.

266 """

267 # Continue trimming until middle remains unchanged.

268 trimmed_something = True

269 while trimmed_something:

270 trimmed_something = False

271 # Trim wrapping punctuation.

272 for opening, closing in WRAPPING_PUNCTUATION:

273 if middle.startswith(opening):

274 middle = middle[len(opening) :]

275 lead += opening

276 trimmed_something = True

277 # Keep parentheses at the end only if they're balanced.

278 if (

279 middle.endswith(closing)

280 and middle.count(closing) == middle.count(opening) + 1

281 ):

282 middle = middle[: -len(closing)]

283 trail = closing + trail

284 trimmed_something = True

285 # Trim trailing punctuation (after trimming wrapping punctuation,

286 # as encoded entities contain ';'). Unescape entities to avoid

287 # breaking them by removing ';'.

288 middle_unescaped = html.unescape(middle)

289 stripped = middle_unescaped.rstrip(TRAILING_PUNCTUATION_CHARS)

290 if middle_unescaped != stripped:

291 punctuation_count = len(middle_unescaped) - len(stripped)

292 trail = middle[-punctuation_count:] + trail

293 middle = middle[:-punctuation_count]

294 trimmed_something = True

295 return lead, middle, trail

296

297 def is_email_simple(value):

298 """Return True if value looks like an email address."""

299 # An @ must be in the middle of the value.

300 if "@" not in value or value.startswith("@") or value.endswith("@"):

301 return False

302 try:

303 p1, p2 = value.split("@")

304 except ValueError:

305 # value contains more than one @.

306 return False

307 # Dot must be in p2 (e.g. example.com)

308 if "." not in p2 or p2.startswith("."):

309 return False

310 return True

311

312 words = word_split_re.split(str(text))

313 for i, word in enumerate(words):

314 if "." in word or "@" in word or ":" in word:

315 # lead: Current punctuation trimmed from the beginning of the word.

316 # middle: Current state of the word.

317 # trail: Current punctuation trimmed from the end of the word.

318 lead, middle, trail = "", word, ""

319 # Deal with punctuation.

320 lead, middle, trail = trim_punctuation(lead, middle, trail)

321

322 # Make URL we want to point to.

323 url = None

324 nofollow_attr = ' rel="nofollow"' if nofollow else ""

325 if simple_url_re.match(middle):

326 url = smart_urlquote(html.unescape(middle))

327 elif simple_url_2_re.match(middle):

328 url = smart_urlquote("http://%s" % html.unescape(middle))

329 elif ":" not in middle and is_email_simple(middle):

330 local, domain = middle.rsplit("@", 1)

331 try:

332 domain = punycode(domain)

333 except UnicodeError:

334 continue

335 url = "mailto:%s@%s" % (local, domain)

336 nofollow_attr = ""

337

338 # Make link.

339 if url:

340 trimmed = trim_url(middle)

341 if autoescape and not safe_input:

342 lead, trail = escape(lead), escape(trail)

343 trimmed = escape(trimmed)

344 middle = '<a href="%s"%s>%s</a>' % (escape(url), nofollow_attr, trimmed)

345 words[i] = mark_safe("%s%s%s" % (lead, middle, trail))

346 else:

347 if safe_input:

348 words[i] = mark_safe(word)

349 elif autoescape:

350 words[i] = escape(word)

351 elif safe_input:

352 words[i] = mark_safe(word)

353 elif autoescape:

354 words[i] = escape(word)

355 return "".join(words)

356

357

358def avoid_wrapping(value):

359 """

360 Avoid text wrapping in the middle of a phrase by adding non-breaking

361 spaces where there previously were normal spaces.

362 """

363 return value.replace(" ", "\xa0")

364

365

366def html_safe(klass):

367 """

368 A decorator that defines the __html__ method. This helps non-Django

369 templates to detect classes whose __str__ methods return SafeString.

370 """

371 if "__html__" in klass.__dict__:

372 raise ValueError(

373 "can't apply @html_safe to %s because it defines "

374 "__html__()." % klass.__name__

375 )

376 if "__str__" not in klass.__dict__:

377 raise ValueError(

378 "can't apply @html_safe to %s because it doesn't "

379 "define __str__()." % klass.__name__

380 )

381 klass_str = klass.__str__

382 klass.__str__ = lambda self: mark_safe(klass_str(self))

383 klass.__html__ = lambda self: str(self) 383 ↛ exitline 383 didn't run the lambda on line 383

384 return klass