Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/django/utils/html.py: 22%
178 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1"""HTML utilities suitable for global use."""
3import html
4import json
5import re
6from html.parser import HTMLParser
7from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsplit
9from django.utils.encoding import punycode
10from django.utils.functional import Promise, keep_lazy, keep_lazy_text
11from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS
12from django.utils.regex_helper import _lazy_re_compile
13from django.utils.safestring import SafeData, SafeString, mark_safe
14from django.utils.text import normalize_newlines
16# Configuration for urlize() function.
17TRAILING_PUNCTUATION_CHARS = ".,:;!"
18WRAPPING_PUNCTUATION = [("(", ")"), ("[", "]")]
20# List of possible strings used for bullets in bulleted lists.
21DOTS = ["·", "*", "\u2022", "•", "•", "•"]
23word_split_re = _lazy_re_compile(r"""([\s<>"']+)""")
24simple_url_re = _lazy_re_compile(r"^https?://\[?\w", re.IGNORECASE)
25simple_url_2_re = _lazy_re_compile(
26 r"^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)($|/.*)$", re.IGNORECASE
27)
30@keep_lazy(str, SafeString)
31def escape(text):
32 """
33 Return the given text with ampersands, quotes and angle brackets encoded
34 for use in HTML.
36 Always escape input, even if it's already escaped and marked as such.
37 This may result in double-escaping. If this is a concern, use
38 conditional_escape() instead.
39 """
40 return mark_safe(html.escape(str(text)))
43_js_escapes = {
44 ord("\\"): "\\u005C",
45 ord("'"): "\\u0027",
46 ord('"'): "\\u0022",
47 ord(">"): "\\u003E",
48 ord("<"): "\\u003C",
49 ord("&"): "\\u0026",
50 ord("="): "\\u003D",
51 ord("-"): "\\u002D",
52 ord(";"): "\\u003B",
53 ord("`"): "\\u0060",
54 ord("\u2028"): "\\u2028",
55 ord("\u2029"): "\\u2029",
56}
58# Escape every ASCII character with a value less than 32.
59_js_escapes.update((ord("%c" % z), "\\u%04X" % z) for z in range(32))
62@keep_lazy(str, SafeString)
63def escapejs(value):
64 """Hex encode characters for use in JavaScript strings."""
65 return mark_safe(str(value).translate(_js_escapes))
68_json_script_escapes = {
69 ord(">"): "\\u003E",
70 ord("<"): "\\u003C",
71 ord("&"): "\\u0026",
72}
75def json_script(value, element_id):
76 """
77 Escape all the HTML/XML special characters with their unicode escapes, so
78 value is safe to be output anywhere except for inside a tag attribute. Wrap
79 the escaped JSON in a script tag.
80 """
81 from django.core.serializers.json import DjangoJSONEncoder
83 json_str = json.dumps(value, cls=DjangoJSONEncoder).translate(_json_script_escapes)
84 return format_html(
85 '<script id="{}" type="application/json">{}</script>',
86 element_id,
87 mark_safe(json_str),
88 )
91def conditional_escape(text):
92 """
93 Similar to escape(), except that it doesn't operate on pre-escaped strings.
95 This function relies on the __html__ convention used both by Django's
96 SafeData class and by third-party libraries like markupsafe.
97 """
98 if isinstance(text, Promise):
99 text = str(text)
100 if hasattr(text, "__html__"):
101 return text.__html__()
102 else:
103 return escape(text)
106def format_html(format_string, *args, **kwargs):
107 """
108 Similar to str.format, but pass all arguments through conditional_escape(),
109 and call mark_safe() on the result. This function should be used instead
110 of str.format or % interpolation to build up small HTML fragments.
111 """
112 args_safe = map(conditional_escape, args)
113 kwargs_safe = {k: conditional_escape(v) for (k, v) in kwargs.items()}
114 return mark_safe(format_string.format(*args_safe, **kwargs_safe))
117def format_html_join(sep, format_string, args_generator):
118 """
119 A wrapper of format_html, for the common case of a group of arguments that
120 need to be formatted using the same format string, and then joined using
121 'sep'. 'sep' is also passed through conditional_escape.
123 'args_generator' should be an iterator that returns the sequence of 'args'
124 that will be passed to format_html.
126 Example:
128 format_html_join('\n', "<li>{} {}</li>", ((u.first_name, u.last_name)
129 for u in users))
130 """
131 return mark_safe(
132 conditional_escape(sep).join(
133 format_html(format_string, *args) for args in args_generator
134 )
135 )
138@keep_lazy_text
139def linebreaks(value, autoescape=False):
140 """Convert newlines into <p> and <br>s."""
141 value = normalize_newlines(value)
142 paras = re.split("\n{2,}", str(value))
143 if autoescape:
144 paras = ["<p>%s</p>" % escape(p).replace("\n", "<br>") for p in paras]
145 else:
146 paras = ["<p>%s</p>" % p.replace("\n", "<br>") for p in paras]
147 return "\n\n".join(paras)
150class MLStripper(HTMLParser):
151 def __init__(self):
152 super().__init__(convert_charrefs=False)
153 self.reset()
154 self.fed = []
156 def handle_data(self, d):
157 self.fed.append(d)
159 def handle_entityref(self, name):
160 self.fed.append("&%s;" % name)
162 def handle_charref(self, name):
163 self.fed.append("&#%s;" % name)
165 def get_data(self):
166 return "".join(self.fed)
169def _strip_once(value):
170 """
171 Internal tag stripping utility used by strip_tags.
172 """
173 s = MLStripper()
174 s.feed(value)
175 s.close()
176 return s.get_data()
179@keep_lazy_text
180def strip_tags(value):
181 """Return the given HTML with all tags stripped."""
182 # Note: in typical case this loop executes _strip_once once. Loop condition
183 # is redundant, but helps to reduce number of executions of _strip_once.
184 value = str(value)
185 while "<" in value and ">" in value:
186 new_value = _strip_once(value)
187 if value.count("<") == new_value.count("<"):
188 # _strip_once wasn't able to detect more tags.
189 break
190 value = new_value
191 return value
194@keep_lazy_text
195def strip_spaces_between_tags(value):
196 """Return the given HTML with spaces between tags removed."""
197 return re.sub(r">\s+<", "><", str(value))
200def smart_urlquote(url):
201 """Quote a URL if it isn't already quoted."""
203 def unquote_quote(segment):
204 segment = unquote(segment)
205 # Tilde is part of RFC3986 Unreserved Characters
206 # https://tools.ietf.org/html/rfc3986#section-2.3
207 # See also https://bugs.python.org/issue16285
208 return quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + "~")
210 # Handle IDN before quoting.
211 try:
212 scheme, netloc, path, query, fragment = urlsplit(url)
213 except ValueError:
214 # invalid IPv6 URL (normally square brackets in hostname part).
215 return unquote_quote(url)
217 try:
218 netloc = punycode(netloc) # IDN -> ACE
219 except UnicodeError: # invalid domain part
220 return unquote_quote(url)
222 if query:
223 # Separately unquoting key/value, so as to not mix querystring separators
224 # included in query values. See #22267.
225 query_parts = [
226 (unquote(q[0]), unquote(q[1]))
227 for q in parse_qsl(query, keep_blank_values=True)
228 ]
229 # urlencode will take care of quoting
230 query = urlencode(query_parts)
232 path = unquote_quote(path)
233 fragment = unquote_quote(fragment)
235 return urlunsplit((scheme, netloc, path, query, fragment))
238@keep_lazy_text
239def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
240 """
241 Convert any URLs in text into clickable links.
243 Works on http://, https://, www. links, and also on links ending in one of
244 the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org).
245 Links can have trailing punctuation (periods, commas, close-parens) and
246 leading punctuation (opening parens) and it'll still do the right thing.
248 If trim_url_limit is not None, truncate the URLs in the link text longer
249 than this limit to trim_url_limit - 1 characters and append an ellipsis.
251 If nofollow is True, give the links a rel="nofollow" attribute.
253 If autoescape is True, autoescape the link text and URLs.
254 """
255 safe_input = isinstance(text, SafeData)
257 def trim_url(x, limit=trim_url_limit):
258 if limit is None or len(x) <= limit:
259 return x
260 return "%s…" % x[: max(0, limit - 1)]
262 def trim_punctuation(lead, middle, trail):
263 """
264 Trim trailing and wrapping punctuation from `middle`. Return the items
265 of the new state.
266 """
267 # Continue trimming until middle remains unchanged.
268 trimmed_something = True
269 while trimmed_something:
270 trimmed_something = False
271 # Trim wrapping punctuation.
272 for opening, closing in WRAPPING_PUNCTUATION:
273 if middle.startswith(opening):
274 middle = middle[len(opening) :]
275 lead += opening
276 trimmed_something = True
277 # Keep parentheses at the end only if they're balanced.
278 if (
279 middle.endswith(closing)
280 and middle.count(closing) == middle.count(opening) + 1
281 ):
282 middle = middle[: -len(closing)]
283 trail = closing + trail
284 trimmed_something = True
285 # Trim trailing punctuation (after trimming wrapping punctuation,
286 # as encoded entities contain ';'). Unescape entities to avoid
287 # breaking them by removing ';'.
288 middle_unescaped = html.unescape(middle)
289 stripped = middle_unescaped.rstrip(TRAILING_PUNCTUATION_CHARS)
290 if middle_unescaped != stripped:
291 punctuation_count = len(middle_unescaped) - len(stripped)
292 trail = middle[-punctuation_count:] + trail
293 middle = middle[:-punctuation_count]
294 trimmed_something = True
295 return lead, middle, trail
297 def is_email_simple(value):
298 """Return True if value looks like an email address."""
299 # An @ must be in the middle of the value.
300 if "@" not in value or value.startswith("@") or value.endswith("@"):
301 return False
302 try:
303 p1, p2 = value.split("@")
304 except ValueError:
305 # value contains more than one @.
306 return False
307 # Dot must be in p2 (e.g. example.com)
308 if "." not in p2 or p2.startswith("."):
309 return False
310 return True
312 words = word_split_re.split(str(text))
313 for i, word in enumerate(words):
314 if "." in word or "@" in word or ":" in word:
315 # lead: Current punctuation trimmed from the beginning of the word.
316 # middle: Current state of the word.
317 # trail: Current punctuation trimmed from the end of the word.
318 lead, middle, trail = "", word, ""
319 # Deal with punctuation.
320 lead, middle, trail = trim_punctuation(lead, middle, trail)
322 # Make URL we want to point to.
323 url = None
324 nofollow_attr = ' rel="nofollow"' if nofollow else ""
325 if simple_url_re.match(middle):
326 url = smart_urlquote(html.unescape(middle))
327 elif simple_url_2_re.match(middle):
328 url = smart_urlquote("http://%s" % html.unescape(middle))
329 elif ":" not in middle and is_email_simple(middle):
330 local, domain = middle.rsplit("@", 1)
331 try:
332 domain = punycode(domain)
333 except UnicodeError:
334 continue
335 url = "mailto:%s@%s" % (local, domain)
336 nofollow_attr = ""
338 # Make link.
339 if url:
340 trimmed = trim_url(middle)
341 if autoescape and not safe_input:
342 lead, trail = escape(lead), escape(trail)
343 trimmed = escape(trimmed)
344 middle = '<a href="%s"%s>%s</a>' % (escape(url), nofollow_attr, trimmed)
345 words[i] = mark_safe("%s%s%s" % (lead, middle, trail))
346 else:
347 if safe_input:
348 words[i] = mark_safe(word)
349 elif autoescape:
350 words[i] = escape(word)
351 elif safe_input:
352 words[i] = mark_safe(word)
353 elif autoescape:
354 words[i] = escape(word)
355 return "".join(words)
358def avoid_wrapping(value):
359 """
360 Avoid text wrapping in the middle of a phrase by adding non-breaking
361 spaces where there previously were normal spaces.
362 """
363 return value.replace(" ", "\xa0")
366def html_safe(klass):
367 """
368 A decorator that defines the __html__ method. This helps non-Django
369 templates to detect classes whose __str__ methods return SafeString.
370 """
371 if "__html__" in klass.__dict__:
372 raise ValueError(
373 "can't apply @html_safe to %s because it defines "
374 "__html__()." % klass.__name__
375 )
376 if "__str__" not in klass.__dict__:
377 raise ValueError(
378 "can't apply @html_safe to %s because it doesn't "
379 "define __str__()." % klass.__name__
380 )
381 klass_str = klass.__str__
382 klass.__str__ = lambda self: mark_safe(klass_str(self))
383 klass.__html__ = lambda self: str(self) 383 ↛ exitline 383 didn't run the lambda on line 383
384 return klass