Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/django/utils/http.py: 27%

1import base64

2import datetime

3import re

4import unicodedata

5from binascii import Error as BinasciiError

6from email.utils import formatdate

7from urllib.parse import (

8 ParseResult,

9 SplitResult,

10 _coerce_args,

11 _splitnetloc,

12 _splitparams,

13 scheme_chars,

14)

15from urllib.parse import urlencode as original_urlencode

16from urllib.parse import uses_params

18from django.utils.datastructures import MultiValueDict

19from django.utils.regex_helper import _lazy_re_compile

21# based on RFC 7232, Appendix C

22ETAG_MATCH = _lazy_re_compile(

23 r"""

24 \A( # start of string and capture group

25 (?:W/)? # optional weak indicator

26 " # opening quote

27 [^"]* # any sequence of non-quote characters

28 " # end quote

29 )\Z # end of string and capture group

30""",

31 re.X,

32)

34MONTHS = "jan feb mar apr may jun jul aug sep oct nov dec".split()

35__D = r"(?P<day>\d{2})"

36__D2 = r"(?P<day>[ \d]\d)"

37__M = r"(?P<mon>\w{3})"

38__Y = r"(?P<year>\d{4})"

39__Y2 = r"(?P<year>\d{2})"

40__T = r"(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})"

41RFC1123_DATE = _lazy_re_compile(r"^\w{3}, %s %s %s %s GMT$" % (__D, __M, __Y, __T))

42RFC850_DATE = _lazy_re_compile(r"^\w{6,9}, %s-%s-%s %s GMT$" % (__D, __M, __Y2, __T))

43ASCTIME_DATE = _lazy_re_compile(r"^\w{3} %s %s %s %s$" % (__M, __D2, __T, __Y))

45RFC3986_GENDELIMS = ":/?#[]@"

46RFC3986_SUBDELIMS = "!$&'()*+,;="

49def urlencode(query, doseq=False):

50 """

51 A version of Python's urllib.parse.urlencode() function that can operate on

52 MultiValueDict and non-string values.

53 """

54 if isinstance(query, MultiValueDict): 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true

55 query = query.lists()

56 elif hasattr(query, "items"): 56 ↛ 58line 56 didn't jump to line 58, because the condition on line 56 was never false

57 query = query.items()

58 query_params = []

59 for key, value in query: 59 ↛ 60line 59 didn't jump to line 60, because the loop on line 59 never started

60 if value is None:

61 raise TypeError(

62 "Cannot encode None for key '%s' in a query string. Did you "

63 "mean to pass an empty string or omit the value?" % key

64 )

65 elif not doseq or isinstance(value, (str, bytes)):

66 query_val = value

67 else:

68 try:

69 itr = iter(value)

70 except TypeError:

71 query_val = value

72 else:

73 # Consume generators and iterators, when doseq=True, to

74 # work around https://bugs.python.org/issue31706.

75 query_val = []

76 for item in itr:

77 if item is None:

78 raise TypeError(

79 "Cannot encode None for key '%s' in a query "

80 "string. Did you mean to pass an empty string or "

81 "omit the value?" % key

82 )

83 elif not isinstance(item, bytes):

84 item = str(item)

85 query_val.append(item)

86 query_params.append((key, query_val))

87 return original_urlencode(query_params, doseq)

90def http_date(epoch_seconds=None):

91 """

92 Format the time to match the RFC1123 date format as specified by HTTP

93 RFC7231 section 7.1.1.1.

95 `epoch_seconds` is a floating point number expressed in seconds since the

96 epoch, in UTC - such as that outputted by time.time(). If set to None, it

97 defaults to the current time.

99 Output a string in the format 'Wdy, DD Mon YYYY HH:MM:SS GMT'.

100 """

101 return formatdate(epoch_seconds, usegmt=True)

102

103

104def parse_http_date(date):

105 """

106 Parse a date format as specified by HTTP RFC7231 section 7.1.1.1.

107

108 The three formats allowed by the RFC are accepted, even if only the first

109 one is still in widespread use.

110

111 Return an integer expressed in seconds since the epoch, in UTC.

112 """

113 # email.utils.parsedate() does the job for RFC1123 dates; unfortunately

114 # RFC7231 makes it mandatory to support RFC850 dates too. So we roll

115 # our own RFC-compliant parsing.

116 for regex in RFC1123_DATE, RFC850_DATE, ASCTIME_DATE:

117 m = regex.match(date)

118 if m is not None:

119 break

120 else:

121 raise ValueError("%r is not in a valid HTTP date format" % date)

122 try:

123 tz = datetime.timezone.utc

124 year = int(m["year"])

125 if year < 100:

126 current_year = datetime.datetime.now(tz=tz).year

127 current_century = current_year - (current_year % 100)

128 if year - (current_year % 100) > 50:

129 # year that appears to be more than 50 years in the future are

130 # interpreted as representing the past.

131 year += current_century - 100

132 else:

133 year += current_century

134 month = MONTHS.index(m["mon"].lower()) + 1

135 day = int(m["day"])

136 hour = int(m["hour"])

137 min = int(m["min"])

138 sec = int(m["sec"])

139 result = datetime.datetime(year, month, day, hour, min, sec, tzinfo=tz)

140 return int(result.timestamp())

141 except Exception as exc:

142 raise ValueError("%r is not a valid date" % date) from exc

143

144

145def parse_http_date_safe(date):

146 """

147 Same as parse_http_date, but return None if the input is invalid.

148 """

149 try:

150 return parse_http_date(date)

151 except Exception:

152 pass

153

154

155# Base 36 functions: useful for generating compact URLs

156

157

158def base36_to_int(s):

159 """

160 Convert a base 36 string to an int. Raise ValueError if the input won't fit

161 into an int.

162 """

163 # To prevent overconsumption of server resources, reject any

164 # base36 string that is longer than 13 base36 digits (13 digits

165 # is sufficient to base36-encode any 64-bit integer)

166 if len(s) > 13:

167 raise ValueError("Base36 input too large")

168 return int(s, 36)

169

170

171def int_to_base36(i):

172 """Convert an integer to a base36 string."""

173 char_set = "0123456789abcdefghijklmnopqrstuvwxyz"

174 if i < 0: 174 ↛ 175line 174 didn't jump to line 175, because the condition on line 174 was never true

175 raise ValueError("Negative base36 conversion input.")

176 if i < 36: 176 ↛ 177line 176 didn't jump to line 177, because the condition on line 176 was never true

177 return char_set[i]

178 b36 = ""

179 while i != 0:

180 i, n = divmod(i, 36)

181 b36 = char_set[n] + b36

182 return b36

183

184

185def urlsafe_base64_encode(s):

186 """

187 Encode a bytestring to a base64 string for use in URLs. Strip any trailing

188 equal signs.

189 """

190 return base64.urlsafe_b64encode(s).rstrip(b"\n=").decode("ascii")

191

192

193def urlsafe_base64_decode(s):

194 """

195 Decode a base64 encoded string. Add back any trailing equal signs that

196 might have been stripped.

197 """

198 s = s.encode()

199 try:

200 return base64.urlsafe_b64decode(s.ljust(len(s) + len(s) % 4, b"="))

201 except (LookupError, BinasciiError) as e:

202 raise ValueError(e)

203

204

205def parse_etags(etag_str):

206 """

207 Parse a string of ETags given in an If-None-Match or If-Match header as

208 defined by RFC 7232. Return a list of quoted ETags, or ['*'] if all ETags

209 should be matched.

210 """

211 if etag_str.strip() == "*":

212 return ["*"]

213 else:

214 # Parse each ETag individually, and return any that are valid.

215 etag_matches = (ETAG_MATCH.match(etag.strip()) for etag in etag_str.split(","))

216 return [match[1] for match in etag_matches if match]

217

218

219def quote_etag(etag_str):

220 """

221 If the provided string is already a quoted ETag, return it. Otherwise, wrap

222 the string in quotes, making it a strong ETag.

223 """

224 if ETAG_MATCH.match(etag_str):

225 return etag_str

226 else:

227 return '"%s"' % etag_str

228

229

230def is_same_domain(host, pattern):

231 """

232 Return ``True`` if the host is either an exact match or a match

233 to the wildcard pattern.

234

235 Any pattern beginning with a period matches a domain and all of its

236 subdomains. (e.g. ``.example.com`` matches ``example.com`` and

237 ``foo.example.com``). Anything else is an exact string match.

238 """

239 if not pattern: 239 ↛ 240line 239 didn't jump to line 240, because the condition on line 239 was never true

240 return False

241

242 pattern = pattern.lower()

243 return (

244 pattern[0] == "."

245 and (host.endswith(pattern) or host == pattern[1:])

246 or pattern == host

247 )

248

249

250def url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False):

251 """

252 Return ``True`` if the url uses an allowed host and a safe scheme.

253

254 Always return ``False`` on an empty url.

255

256 If ``require_https`` is ``True``, only 'https' will be considered a valid

257 scheme, as opposed to 'http' and 'https' with the default, ``False``.

258

259 Note: "True" doesn't entail that a URL is "safe". It may still be e.g.

260 quoted incorrectly. Ensure to also use django.utils.encoding.iri_to_uri()

261 on the path component of untrusted URLs.

262 """

263 if url is not None:

264 url = url.strip()

265 if not url:

266 return False

267 if allowed_hosts is None:

268 allowed_hosts = set()

269 elif isinstance(allowed_hosts, str):

270 allowed_hosts = {allowed_hosts}

271 # Chrome treats \ completely as / in paths but it could be part of some

272 # basic auth credentials so we need to check both URLs.

273 return _url_has_allowed_host_and_scheme(

274 url, allowed_hosts, require_https=require_https

275 ) and _url_has_allowed_host_and_scheme(

276 url.replace("\\", "/"), allowed_hosts, require_https=require_https

277 )

278

279

280# Copied from urllib.parse.urlparse() but uses fixed urlsplit() function.

281def _urlparse(url, scheme="", allow_fragments=True):

282 """Parse a URL into 6 components:

283 <scheme>://<netloc>/<path>;<params>?<query>#<fragment>

284 Return a 6-tuple: (scheme, netloc, path, params, query, fragment).

285 Note that we don't break the components up in smaller bits

286 (e.g. netloc is a single string) and we don't expand % escapes."""

287 url, scheme, _coerce_result = _coerce_args(url, scheme)

288 splitresult = _urlsplit(url, scheme, allow_fragments)

289 scheme, netloc, url, query, fragment = splitresult

290 if scheme in uses_params and ";" in url:

291 url, params = _splitparams(url)

292 else:

293 params = ""

294 result = ParseResult(scheme, netloc, url, params, query, fragment)

295 return _coerce_result(result)

296

297

298# Copied from urllib.parse.urlsplit() with

299# https://github.com/python/cpython/pull/661 applied.

300def _urlsplit(url, scheme="", allow_fragments=True):

301 """Parse a URL into 5 components:

302 <scheme>://<netloc>/<path>?<query>#<fragment>

303 Return a 5-tuple: (scheme, netloc, path, query, fragment).

304 Note that we don't break the components up in smaller bits

305 (e.g. netloc is a single string) and we don't expand % escapes."""

306 url, scheme, _coerce_result = _coerce_args(url, scheme)

307 netloc = query = fragment = ""

308 i = url.find(":")

309 if i > 0:

310 for c in url[:i]:

311 if c not in scheme_chars:

312 break

313 else:

314 scheme, url = url[:i].lower(), url[i + 1 :]

315

316 if url[:2] == "//":

317 netloc, url = _splitnetloc(url, 2)

318 if ("[" in netloc and "]" not in netloc) or (

319 "]" in netloc and "[" not in netloc

320 ):

321 raise ValueError("Invalid IPv6 URL")

322 if allow_fragments and "#" in url:

323 url, fragment = url.split("#", 1)

324 if "?" in url:

325 url, query = url.split("?", 1)

326 v = SplitResult(scheme, netloc, url, query, fragment)

327 return _coerce_result(v)

328

329

330def _url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False):

331 # Chrome considers any URL with more than two slashes to be absolute, but

332 # urlparse is not so flexible. Treat any url with three slashes as unsafe.

333 if url.startswith("///"):

334 return False

335 try:

336 url_info = _urlparse(url)

337 except ValueError: # e.g. invalid IPv6 addresses

338 return False

339 # Forbid URLs like http:///example.com - with a scheme, but without a hostname.

340 # In that URL, example.com is not the hostname but, a path component. However,

341 # Chrome will still consider example.com to be the hostname, so we must not

342 # allow this syntax.

343 if not url_info.netloc and url_info.scheme:

344 return False

345 # Forbid URLs that start with control characters. Some browsers (like

346 # Chrome) ignore quite a few control characters at the start of a

347 # URL and might consider the URL as scheme relative.

348 if unicodedata.category(url[0])[0] == "C":

349 return False

350 scheme = url_info.scheme

351 # Consider URLs without a scheme (e.g. //example.com/p) to be http.

352 if not url_info.scheme and url_info.netloc:

353 scheme = "http"

354 valid_schemes = ["https"] if require_https else ["http", "https"]

355 return (not url_info.netloc or url_info.netloc in allowed_hosts) and (

356 not scheme or scheme in valid_schemes

357 )

358

359

360def escape_leading_slashes(url):

361 """

362 If redirecting to an absolute path (two leading slashes), a slash must be

363 escaped to prevent browsers from handling the path as schemaless and

364 redirecting to another host.

365 """

366 if url.startswith("//"): 366 ↛ 367line 366 didn't jump to line 367, because the condition on line 366 was never true

367 url = "/%2F{}".format(url[2:])

368 return url