Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/django/utils/html.py: 22%

178 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1"""HTML utilities suitable for global use.""" 

2 

3import html 

4import json 

5import re 

6from html.parser import HTMLParser 

7from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsplit 

8 

9from django.utils.encoding import punycode 

10from django.utils.functional import Promise, keep_lazy, keep_lazy_text 

11from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS 

12from django.utils.regex_helper import _lazy_re_compile 

13from django.utils.safestring import SafeData, SafeString, mark_safe 

14from django.utils.text import normalize_newlines 

15 

16# Configuration for urlize() function. 

17TRAILING_PUNCTUATION_CHARS = ".,:;!" 

18WRAPPING_PUNCTUATION = [("(", ")"), ("[", "]")] 

19 

20# List of possible strings used for bullets in bulleted lists. 

21DOTS = ["·", "*", "\u2022", "•", "•", "•"] 

22 

23word_split_re = _lazy_re_compile(r"""([\s<>"']+)""") 

24simple_url_re = _lazy_re_compile(r"^https?://\[?\w", re.IGNORECASE) 

25simple_url_2_re = _lazy_re_compile( 

26 r"^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)($|/.*)$", re.IGNORECASE 

27) 

28 

29 

30@keep_lazy(str, SafeString) 

31def escape(text): 

32 """ 

33 Return the given text with ampersands, quotes and angle brackets encoded 

34 for use in HTML. 

35 

36 Always escape input, even if it's already escaped and marked as such. 

37 This may result in double-escaping. If this is a concern, use 

38 conditional_escape() instead. 

39 """ 

40 return mark_safe(html.escape(str(text))) 

41 

42 

43_js_escapes = { 

44 ord("\\"): "\\u005C", 

45 ord("'"): "\\u0027", 

46 ord('"'): "\\u0022", 

47 ord(">"): "\\u003E", 

48 ord("<"): "\\u003C", 

49 ord("&"): "\\u0026", 

50 ord("="): "\\u003D", 

51 ord("-"): "\\u002D", 

52 ord(";"): "\\u003B", 

53 ord("`"): "\\u0060", 

54 ord("\u2028"): "\\u2028", 

55 ord("\u2029"): "\\u2029", 

56} 

57 

58# Escape every ASCII character with a value less than 32. 

59_js_escapes.update((ord("%c" % z), "\\u%04X" % z) for z in range(32)) 

60 

61 

62@keep_lazy(str, SafeString) 

63def escapejs(value): 

64 """Hex encode characters for use in JavaScript strings.""" 

65 return mark_safe(str(value).translate(_js_escapes)) 

66 

67 

68_json_script_escapes = { 

69 ord(">"): "\\u003E", 

70 ord("<"): "\\u003C", 

71 ord("&"): "\\u0026", 

72} 

73 

74 

75def json_script(value, element_id): 

76 """ 

77 Escape all the HTML/XML special characters with their unicode escapes, so 

78 value is safe to be output anywhere except for inside a tag attribute. Wrap 

79 the escaped JSON in a script tag. 

80 """ 

81 from django.core.serializers.json import DjangoJSONEncoder 

82 

83 json_str = json.dumps(value, cls=DjangoJSONEncoder).translate(_json_script_escapes) 

84 return format_html( 

85 '<script id="{}" type="application/json">{}</script>', 

86 element_id, 

87 mark_safe(json_str), 

88 ) 

89 

90 

91def conditional_escape(text): 

92 """ 

93 Similar to escape(), except that it doesn't operate on pre-escaped strings. 

94 

95 This function relies on the __html__ convention used both by Django's 

96 SafeData class and by third-party libraries like markupsafe. 

97 """ 

98 if isinstance(text, Promise): 

99 text = str(text) 

100 if hasattr(text, "__html__"): 

101 return text.__html__() 

102 else: 

103 return escape(text) 

104 

105 

106def format_html(format_string, *args, **kwargs): 

107 """ 

108 Similar to str.format, but pass all arguments through conditional_escape(), 

109 and call mark_safe() on the result. This function should be used instead 

110 of str.format or % interpolation to build up small HTML fragments. 

111 """ 

112 args_safe = map(conditional_escape, args) 

113 kwargs_safe = {k: conditional_escape(v) for (k, v) in kwargs.items()} 

114 return mark_safe(format_string.format(*args_safe, **kwargs_safe)) 

115 

116 

117def format_html_join(sep, format_string, args_generator): 

118 """ 

119 A wrapper of format_html, for the common case of a group of arguments that 

120 need to be formatted using the same format string, and then joined using 

121 'sep'. 'sep' is also passed through conditional_escape. 

122 

123 'args_generator' should be an iterator that returns the sequence of 'args' 

124 that will be passed to format_html. 

125 

126 Example: 

127 

128 format_html_join('\n', "<li>{} {}</li>", ((u.first_name, u.last_name) 

129 for u in users)) 

130 """ 

131 return mark_safe( 

132 conditional_escape(sep).join( 

133 format_html(format_string, *args) for args in args_generator 

134 ) 

135 ) 

136 

137 

138@keep_lazy_text 

139def linebreaks(value, autoescape=False): 

140 """Convert newlines into <p> and <br>s.""" 

141 value = normalize_newlines(value) 

142 paras = re.split("\n{2,}", str(value)) 

143 if autoescape: 

144 paras = ["<p>%s</p>" % escape(p).replace("\n", "<br>") for p in paras] 

145 else: 

146 paras = ["<p>%s</p>" % p.replace("\n", "<br>") for p in paras] 

147 return "\n\n".join(paras) 

148 

149 

150class MLStripper(HTMLParser): 

151 def __init__(self): 

152 super().__init__(convert_charrefs=False) 

153 self.reset() 

154 self.fed = [] 

155 

156 def handle_data(self, d): 

157 self.fed.append(d) 

158 

159 def handle_entityref(self, name): 

160 self.fed.append("&%s;" % name) 

161 

162 def handle_charref(self, name): 

163 self.fed.append("&#%s;" % name) 

164 

165 def get_data(self): 

166 return "".join(self.fed) 

167 

168 

169def _strip_once(value): 

170 """ 

171 Internal tag stripping utility used by strip_tags. 

172 """ 

173 s = MLStripper() 

174 s.feed(value) 

175 s.close() 

176 return s.get_data() 

177 

178 

179@keep_lazy_text 

180def strip_tags(value): 

181 """Return the given HTML with all tags stripped.""" 

182 # Note: in typical case this loop executes _strip_once once. Loop condition 

183 # is redundant, but helps to reduce number of executions of _strip_once. 

184 value = str(value) 

185 while "<" in value and ">" in value: 

186 new_value = _strip_once(value) 

187 if value.count("<") == new_value.count("<"): 

188 # _strip_once wasn't able to detect more tags. 

189 break 

190 value = new_value 

191 return value 

192 

193 

194@keep_lazy_text 

195def strip_spaces_between_tags(value): 

196 """Return the given HTML with spaces between tags removed.""" 

197 return re.sub(r">\s+<", "><", str(value)) 

198 

199 

200def smart_urlquote(url): 

201 """Quote a URL if it isn't already quoted.""" 

202 

203 def unquote_quote(segment): 

204 segment = unquote(segment) 

205 # Tilde is part of RFC3986 Unreserved Characters 

206 # https://tools.ietf.org/html/rfc3986#section-2.3 

207 # See also https://bugs.python.org/issue16285 

208 return quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + "~") 

209 

210 # Handle IDN before quoting. 

211 try: 

212 scheme, netloc, path, query, fragment = urlsplit(url) 

213 except ValueError: 

214 # invalid IPv6 URL (normally square brackets in hostname part). 

215 return unquote_quote(url) 

216 

217 try: 

218 netloc = punycode(netloc) # IDN -> ACE 

219 except UnicodeError: # invalid domain part 

220 return unquote_quote(url) 

221 

222 if query: 

223 # Separately unquoting key/value, so as to not mix querystring separators 

224 # included in query values. See #22267. 

225 query_parts = [ 

226 (unquote(q[0]), unquote(q[1])) 

227 for q in parse_qsl(query, keep_blank_values=True) 

228 ] 

229 # urlencode will take care of quoting 

230 query = urlencode(query_parts) 

231 

232 path = unquote_quote(path) 

233 fragment = unquote_quote(fragment) 

234 

235 return urlunsplit((scheme, netloc, path, query, fragment)) 

236 

237 

238@keep_lazy_text 

239def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): 

240 """ 

241 Convert any URLs in text into clickable links. 

242 

243 Works on http://, https://, www. links, and also on links ending in one of 

244 the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org). 

245 Links can have trailing punctuation (periods, commas, close-parens) and 

246 leading punctuation (opening parens) and it'll still do the right thing. 

247 

248 If trim_url_limit is not None, truncate the URLs in the link text longer 

249 than this limit to trim_url_limit - 1 characters and append an ellipsis. 

250 

251 If nofollow is True, give the links a rel="nofollow" attribute. 

252 

253 If autoescape is True, autoescape the link text and URLs. 

254 """ 

255 safe_input = isinstance(text, SafeData) 

256 

257 def trim_url(x, limit=trim_url_limit): 

258 if limit is None or len(x) <= limit: 

259 return x 

260 return "%s…" % x[: max(0, limit - 1)] 

261 

262 def trim_punctuation(lead, middle, trail): 

263 """ 

264 Trim trailing and wrapping punctuation from `middle`. Return the items 

265 of the new state. 

266 """ 

267 # Continue trimming until middle remains unchanged. 

268 trimmed_something = True 

269 while trimmed_something: 

270 trimmed_something = False 

271 # Trim wrapping punctuation. 

272 for opening, closing in WRAPPING_PUNCTUATION: 

273 if middle.startswith(opening): 

274 middle = middle[len(opening) :] 

275 lead += opening 

276 trimmed_something = True 

277 # Keep parentheses at the end only if they're balanced. 

278 if ( 

279 middle.endswith(closing) 

280 and middle.count(closing) == middle.count(opening) + 1 

281 ): 

282 middle = middle[: -len(closing)] 

283 trail = closing + trail 

284 trimmed_something = True 

285 # Trim trailing punctuation (after trimming wrapping punctuation, 

286 # as encoded entities contain ';'). Unescape entities to avoid 

287 # breaking them by removing ';'. 

288 middle_unescaped = html.unescape(middle) 

289 stripped = middle_unescaped.rstrip(TRAILING_PUNCTUATION_CHARS) 

290 if middle_unescaped != stripped: 

291 punctuation_count = len(middle_unescaped) - len(stripped) 

292 trail = middle[-punctuation_count:] + trail 

293 middle = middle[:-punctuation_count] 

294 trimmed_something = True 

295 return lead, middle, trail 

296 

297 def is_email_simple(value): 

298 """Return True if value looks like an email address.""" 

299 # An @ must be in the middle of the value. 

300 if "@" not in value or value.startswith("@") or value.endswith("@"): 

301 return False 

302 try: 

303 p1, p2 = value.split("@") 

304 except ValueError: 

305 # value contains more than one @. 

306 return False 

307 # Dot must be in p2 (e.g. example.com) 

308 if "." not in p2 or p2.startswith("."): 

309 return False 

310 return True 

311 

312 words = word_split_re.split(str(text)) 

313 for i, word in enumerate(words): 

314 if "." in word or "@" in word or ":" in word: 

315 # lead: Current punctuation trimmed from the beginning of the word. 

316 # middle: Current state of the word. 

317 # trail: Current punctuation trimmed from the end of the word. 

318 lead, middle, trail = "", word, "" 

319 # Deal with punctuation. 

320 lead, middle, trail = trim_punctuation(lead, middle, trail) 

321 

322 # Make URL we want to point to. 

323 url = None 

324 nofollow_attr = ' rel="nofollow"' if nofollow else "" 

325 if simple_url_re.match(middle): 

326 url = smart_urlquote(html.unescape(middle)) 

327 elif simple_url_2_re.match(middle): 

328 url = smart_urlquote("http://%s" % html.unescape(middle)) 

329 elif ":" not in middle and is_email_simple(middle): 

330 local, domain = middle.rsplit("@", 1) 

331 try: 

332 domain = punycode(domain) 

333 except UnicodeError: 

334 continue 

335 url = "mailto:%s@%s" % (local, domain) 

336 nofollow_attr = "" 

337 

338 # Make link. 

339 if url: 

340 trimmed = trim_url(middle) 

341 if autoescape and not safe_input: 

342 lead, trail = escape(lead), escape(trail) 

343 trimmed = escape(trimmed) 

344 middle = '<a href="%s"%s>%s</a>' % (escape(url), nofollow_attr, trimmed) 

345 words[i] = mark_safe("%s%s%s" % (lead, middle, trail)) 

346 else: 

347 if safe_input: 

348 words[i] = mark_safe(word) 

349 elif autoescape: 

350 words[i] = escape(word) 

351 elif safe_input: 

352 words[i] = mark_safe(word) 

353 elif autoescape: 

354 words[i] = escape(word) 

355 return "".join(words) 

356 

357 

358def avoid_wrapping(value): 

359 """ 

360 Avoid text wrapping in the middle of a phrase by adding non-breaking 

361 spaces where there previously were normal spaces. 

362 """ 

363 return value.replace(" ", "\xa0") 

364 

365 

366def html_safe(klass): 

367 """ 

368 A decorator that defines the __html__ method. This helps non-Django 

369 templates to detect classes whose __str__ methods return SafeString. 

370 """ 

371 if "__html__" in klass.__dict__: 

372 raise ValueError( 

373 "can't apply @html_safe to %s because it defines " 

374 "__html__()." % klass.__name__ 

375 ) 

376 if "__str__" not in klass.__dict__: 

377 raise ValueError( 

378 "can't apply @html_safe to %s because it doesn't " 

379 "define __str__()." % klass.__name__ 

380 ) 

381 klass_str = klass.__str__ 

382 klass.__str__ = lambda self: mark_safe(klass_str(self)) 

383 klass.__html__ = lambda self: str(self) 383 ↛ exitline 383 didn't run the lambda on line 383

384 return klass