Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/django/utils/http.py: 27%
174 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1import base64
2import datetime
3import re
4import unicodedata
5from binascii import Error as BinasciiError
6from email.utils import formatdate
7from urllib.parse import (
8 ParseResult,
9 SplitResult,
10 _coerce_args,
11 _splitnetloc,
12 _splitparams,
13 scheme_chars,
14)
15from urllib.parse import urlencode as original_urlencode
16from urllib.parse import uses_params
18from django.utils.datastructures import MultiValueDict
19from django.utils.regex_helper import _lazy_re_compile
21# based on RFC 7232, Appendix C
22ETAG_MATCH = _lazy_re_compile(
23 r"""
24 \A( # start of string and capture group
25 (?:W/)? # optional weak indicator
26 " # opening quote
27 [^"]* # any sequence of non-quote characters
28 " # end quote
29 )\Z # end of string and capture group
30""",
31 re.X,
32)
34MONTHS = "jan feb mar apr may jun jul aug sep oct nov dec".split()
35__D = r"(?P<day>\d{2})"
36__D2 = r"(?P<day>[ \d]\d)"
37__M = r"(?P<mon>\w{3})"
38__Y = r"(?P<year>\d{4})"
39__Y2 = r"(?P<year>\d{2})"
40__T = r"(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})"
41RFC1123_DATE = _lazy_re_compile(r"^\w{3}, %s %s %s %s GMT$" % (__D, __M, __Y, __T))
42RFC850_DATE = _lazy_re_compile(r"^\w{6,9}, %s-%s-%s %s GMT$" % (__D, __M, __Y2, __T))
43ASCTIME_DATE = _lazy_re_compile(r"^\w{3} %s %s %s %s$" % (__M, __D2, __T, __Y))
45RFC3986_GENDELIMS = ":/?#[]@"
46RFC3986_SUBDELIMS = "!$&'()*+,;="
49def urlencode(query, doseq=False):
50 """
51 A version of Python's urllib.parse.urlencode() function that can operate on
52 MultiValueDict and non-string values.
53 """
54 if isinstance(query, MultiValueDict): 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true
55 query = query.lists()
56 elif hasattr(query, "items"): 56 ↛ 58line 56 didn't jump to line 58, because the condition on line 56 was never false
57 query = query.items()
58 query_params = []
59 for key, value in query: 59 ↛ 60line 59 didn't jump to line 60, because the loop on line 59 never started
60 if value is None:
61 raise TypeError(
62 "Cannot encode None for key '%s' in a query string. Did you "
63 "mean to pass an empty string or omit the value?" % key
64 )
65 elif not doseq or isinstance(value, (str, bytes)):
66 query_val = value
67 else:
68 try:
69 itr = iter(value)
70 except TypeError:
71 query_val = value
72 else:
73 # Consume generators and iterators, when doseq=True, to
74 # work around https://bugs.python.org/issue31706.
75 query_val = []
76 for item in itr:
77 if item is None:
78 raise TypeError(
79 "Cannot encode None for key '%s' in a query "
80 "string. Did you mean to pass an empty string or "
81 "omit the value?" % key
82 )
83 elif not isinstance(item, bytes):
84 item = str(item)
85 query_val.append(item)
86 query_params.append((key, query_val))
87 return original_urlencode(query_params, doseq)
90def http_date(epoch_seconds=None):
91 """
92 Format the time to match the RFC1123 date format as specified by HTTP
93 RFC7231 section 7.1.1.1.
95 `epoch_seconds` is a floating point number expressed in seconds since the
96 epoch, in UTC - such as that outputted by time.time(). If set to None, it
97 defaults to the current time.
99 Output a string in the format 'Wdy, DD Mon YYYY HH:MM:SS GMT'.
100 """
101 return formatdate(epoch_seconds, usegmt=True)
104def parse_http_date(date):
105 """
106 Parse a date format as specified by HTTP RFC7231 section 7.1.1.1.
108 The three formats allowed by the RFC are accepted, even if only the first
109 one is still in widespread use.
111 Return an integer expressed in seconds since the epoch, in UTC.
112 """
113 # email.utils.parsedate() does the job for RFC1123 dates; unfortunately
114 # RFC7231 makes it mandatory to support RFC850 dates too. So we roll
115 # our own RFC-compliant parsing.
116 for regex in RFC1123_DATE, RFC850_DATE, ASCTIME_DATE:
117 m = regex.match(date)
118 if m is not None:
119 break
120 else:
121 raise ValueError("%r is not in a valid HTTP date format" % date)
122 try:
123 tz = datetime.timezone.utc
124 year = int(m["year"])
125 if year < 100:
126 current_year = datetime.datetime.now(tz=tz).year
127 current_century = current_year - (current_year % 100)
128 if year - (current_year % 100) > 50:
129 # year that appears to be more than 50 years in the future are
130 # interpreted as representing the past.
131 year += current_century - 100
132 else:
133 year += current_century
134 month = MONTHS.index(m["mon"].lower()) + 1
135 day = int(m["day"])
136 hour = int(m["hour"])
137 min = int(m["min"])
138 sec = int(m["sec"])
139 result = datetime.datetime(year, month, day, hour, min, sec, tzinfo=tz)
140 return int(result.timestamp())
141 except Exception as exc:
142 raise ValueError("%r is not a valid date" % date) from exc
145def parse_http_date_safe(date):
146 """
147 Same as parse_http_date, but return None if the input is invalid.
148 """
149 try:
150 return parse_http_date(date)
151 except Exception:
152 pass
155# Base 36 functions: useful for generating compact URLs
158def base36_to_int(s):
159 """
160 Convert a base 36 string to an int. Raise ValueError if the input won't fit
161 into an int.
162 """
163 # To prevent overconsumption of server resources, reject any
164 # base36 string that is longer than 13 base36 digits (13 digits
165 # is sufficient to base36-encode any 64-bit integer)
166 if len(s) > 13:
167 raise ValueError("Base36 input too large")
168 return int(s, 36)
171def int_to_base36(i):
172 """Convert an integer to a base36 string."""
173 char_set = "0123456789abcdefghijklmnopqrstuvwxyz"
174 if i < 0: 174 ↛ 175line 174 didn't jump to line 175, because the condition on line 174 was never true
175 raise ValueError("Negative base36 conversion input.")
176 if i < 36: 176 ↛ 177line 176 didn't jump to line 177, because the condition on line 176 was never true
177 return char_set[i]
178 b36 = ""
179 while i != 0:
180 i, n = divmod(i, 36)
181 b36 = char_set[n] + b36
182 return b36
185def urlsafe_base64_encode(s):
186 """
187 Encode a bytestring to a base64 string for use in URLs. Strip any trailing
188 equal signs.
189 """
190 return base64.urlsafe_b64encode(s).rstrip(b"\n=").decode("ascii")
193def urlsafe_base64_decode(s):
194 """
195 Decode a base64 encoded string. Add back any trailing equal signs that
196 might have been stripped.
197 """
198 s = s.encode()
199 try:
200 return base64.urlsafe_b64decode(s.ljust(len(s) + len(s) % 4, b"="))
201 except (LookupError, BinasciiError) as e:
202 raise ValueError(e)
205def parse_etags(etag_str):
206 """
207 Parse a string of ETags given in an If-None-Match or If-Match header as
208 defined by RFC 7232. Return a list of quoted ETags, or ['*'] if all ETags
209 should be matched.
210 """
211 if etag_str.strip() == "*":
212 return ["*"]
213 else:
214 # Parse each ETag individually, and return any that are valid.
215 etag_matches = (ETAG_MATCH.match(etag.strip()) for etag in etag_str.split(","))
216 return [match[1] for match in etag_matches if match]
219def quote_etag(etag_str):
220 """
221 If the provided string is already a quoted ETag, return it. Otherwise, wrap
222 the string in quotes, making it a strong ETag.
223 """
224 if ETAG_MATCH.match(etag_str):
225 return etag_str
226 else:
227 return '"%s"' % etag_str
230def is_same_domain(host, pattern):
231 """
232 Return ``True`` if the host is either an exact match or a match
233 to the wildcard pattern.
235 Any pattern beginning with a period matches a domain and all of its
236 subdomains. (e.g. ``.example.com`` matches ``example.com`` and
237 ``foo.example.com``). Anything else is an exact string match.
238 """
239 if not pattern: 239 ↛ 240line 239 didn't jump to line 240, because the condition on line 239 was never true
240 return False
242 pattern = pattern.lower()
243 return (
244 pattern[0] == "."
245 and (host.endswith(pattern) or host == pattern[1:])
246 or pattern == host
247 )
250def url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False):
251 """
252 Return ``True`` if the url uses an allowed host and a safe scheme.
254 Always return ``False`` on an empty url.
256 If ``require_https`` is ``True``, only 'https' will be considered a valid
257 scheme, as opposed to 'http' and 'https' with the default, ``False``.
259 Note: "True" doesn't entail that a URL is "safe". It may still be e.g.
260 quoted incorrectly. Ensure to also use django.utils.encoding.iri_to_uri()
261 on the path component of untrusted URLs.
262 """
263 if url is not None:
264 url = url.strip()
265 if not url:
266 return False
267 if allowed_hosts is None:
268 allowed_hosts = set()
269 elif isinstance(allowed_hosts, str):
270 allowed_hosts = {allowed_hosts}
271 # Chrome treats \ completely as / in paths but it could be part of some
272 # basic auth credentials so we need to check both URLs.
273 return _url_has_allowed_host_and_scheme(
274 url, allowed_hosts, require_https=require_https
275 ) and _url_has_allowed_host_and_scheme(
276 url.replace("\\", "/"), allowed_hosts, require_https=require_https
277 )
280# Copied from urllib.parse.urlparse() but uses fixed urlsplit() function.
281def _urlparse(url, scheme="", allow_fragments=True):
282 """Parse a URL into 6 components:
283 <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
284 Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
285 Note that we don't break the components up in smaller bits
286 (e.g. netloc is a single string) and we don't expand % escapes."""
287 url, scheme, _coerce_result = _coerce_args(url, scheme)
288 splitresult = _urlsplit(url, scheme, allow_fragments)
289 scheme, netloc, url, query, fragment = splitresult
290 if scheme in uses_params and ";" in url:
291 url, params = _splitparams(url)
292 else:
293 params = ""
294 result = ParseResult(scheme, netloc, url, params, query, fragment)
295 return _coerce_result(result)
298# Copied from urllib.parse.urlsplit() with
299# https://github.com/python/cpython/pull/661 applied.
300def _urlsplit(url, scheme="", allow_fragments=True):
301 """Parse a URL into 5 components:
302 <scheme>://<netloc>/<path>?<query>#<fragment>
303 Return a 5-tuple: (scheme, netloc, path, query, fragment).
304 Note that we don't break the components up in smaller bits
305 (e.g. netloc is a single string) and we don't expand % escapes."""
306 url, scheme, _coerce_result = _coerce_args(url, scheme)
307 netloc = query = fragment = ""
308 i = url.find(":")
309 if i > 0:
310 for c in url[:i]:
311 if c not in scheme_chars:
312 break
313 else:
314 scheme, url = url[:i].lower(), url[i + 1 :]
316 if url[:2] == "//":
317 netloc, url = _splitnetloc(url, 2)
318 if ("[" in netloc and "]" not in netloc) or (
319 "]" in netloc and "[" not in netloc
320 ):
321 raise ValueError("Invalid IPv6 URL")
322 if allow_fragments and "#" in url:
323 url, fragment = url.split("#", 1)
324 if "?" in url:
325 url, query = url.split("?", 1)
326 v = SplitResult(scheme, netloc, url, query, fragment)
327 return _coerce_result(v)
330def _url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False):
331 # Chrome considers any URL with more than two slashes to be absolute, but
332 # urlparse is not so flexible. Treat any url with three slashes as unsafe.
333 if url.startswith("///"):
334 return False
335 try:
336 url_info = _urlparse(url)
337 except ValueError: # e.g. invalid IPv6 addresses
338 return False
339 # Forbid URLs like http:///example.com - with a scheme, but without a hostname.
340 # In that URL, example.com is not the hostname but, a path component. However,
341 # Chrome will still consider example.com to be the hostname, so we must not
342 # allow this syntax.
343 if not url_info.netloc and url_info.scheme:
344 return False
345 # Forbid URLs that start with control characters. Some browsers (like
346 # Chrome) ignore quite a few control characters at the start of a
347 # URL and might consider the URL as scheme relative.
348 if unicodedata.category(url[0])[0] == "C":
349 return False
350 scheme = url_info.scheme
351 # Consider URLs without a scheme (e.g. //example.com/p) to be http.
352 if not url_info.scheme and url_info.netloc:
353 scheme = "http"
354 valid_schemes = ["https"] if require_https else ["http", "https"]
355 return (not url_info.netloc or url_info.netloc in allowed_hosts) and (
356 not scheme or scheme in valid_schemes
357 )
360def escape_leading_slashes(url):
361 """
362 If redirecting to an absolute path (two leading slashes), a slash must be
363 escaped to prevent browsers from handling the path as schemaless and
364 redirecting to another host.
365 """
366 if url.startswith("//"): 366 ↛ 367line 366 didn't jump to line 367, because the condition on line 366 was never true
367 url = "/%2F{}".format(url[2:])
368 return url