Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/django/utils/http.py: 27%

174 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1import base64 

2import datetime 

3import re 

4import unicodedata 

5from binascii import Error as BinasciiError 

6from email.utils import formatdate 

7from urllib.parse import ( 

8 ParseResult, 

9 SplitResult, 

10 _coerce_args, 

11 _splitnetloc, 

12 _splitparams, 

13 scheme_chars, 

14) 

15from urllib.parse import urlencode as original_urlencode 

16from urllib.parse import uses_params 

17 

18from django.utils.datastructures import MultiValueDict 

19from django.utils.regex_helper import _lazy_re_compile 

20 

21# based on RFC 7232, Appendix C 

22ETAG_MATCH = _lazy_re_compile( 

23 r""" 

24 \A( # start of string and capture group 

25 (?:W/)? # optional weak indicator 

26 " # opening quote 

27 [^"]* # any sequence of non-quote characters 

28 " # end quote 

29 )\Z # end of string and capture group 

30""", 

31 re.X, 

32) 

33 

34MONTHS = "jan feb mar apr may jun jul aug sep oct nov dec".split() 

35__D = r"(?P<day>\d{2})" 

36__D2 = r"(?P<day>[ \d]\d)" 

37__M = r"(?P<mon>\w{3})" 

38__Y = r"(?P<year>\d{4})" 

39__Y2 = r"(?P<year>\d{2})" 

40__T = r"(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})" 

41RFC1123_DATE = _lazy_re_compile(r"^\w{3}, %s %s %s %s GMT$" % (__D, __M, __Y, __T)) 

42RFC850_DATE = _lazy_re_compile(r"^\w{6,9}, %s-%s-%s %s GMT$" % (__D, __M, __Y2, __T)) 

43ASCTIME_DATE = _lazy_re_compile(r"^\w{3} %s %s %s %s$" % (__M, __D2, __T, __Y)) 

44 

45RFC3986_GENDELIMS = ":/?#[]@" 

46RFC3986_SUBDELIMS = "!$&'()*+,;=" 

47 

48 

49def urlencode(query, doseq=False): 

50 """ 

51 A version of Python's urllib.parse.urlencode() function that can operate on 

52 MultiValueDict and non-string values. 

53 """ 

54 if isinstance(query, MultiValueDict): 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true

55 query = query.lists() 

56 elif hasattr(query, "items"): 56 ↛ 58line 56 didn't jump to line 58, because the condition on line 56 was never false

57 query = query.items() 

58 query_params = [] 

59 for key, value in query: 59 ↛ 60line 59 didn't jump to line 60, because the loop on line 59 never started

60 if value is None: 

61 raise TypeError( 

62 "Cannot encode None for key '%s' in a query string. Did you " 

63 "mean to pass an empty string or omit the value?" % key 

64 ) 

65 elif not doseq or isinstance(value, (str, bytes)): 

66 query_val = value 

67 else: 

68 try: 

69 itr = iter(value) 

70 except TypeError: 

71 query_val = value 

72 else: 

73 # Consume generators and iterators, when doseq=True, to 

74 # work around https://bugs.python.org/issue31706. 

75 query_val = [] 

76 for item in itr: 

77 if item is None: 

78 raise TypeError( 

79 "Cannot encode None for key '%s' in a query " 

80 "string. Did you mean to pass an empty string or " 

81 "omit the value?" % key 

82 ) 

83 elif not isinstance(item, bytes): 

84 item = str(item) 

85 query_val.append(item) 

86 query_params.append((key, query_val)) 

87 return original_urlencode(query_params, doseq) 

88 

89 

90def http_date(epoch_seconds=None): 

91 """ 

92 Format the time to match the RFC1123 date format as specified by HTTP 

93 RFC7231 section 7.1.1.1. 

94 

95 `epoch_seconds` is a floating point number expressed in seconds since the 

96 epoch, in UTC - such as that outputted by time.time(). If set to None, it 

97 defaults to the current time. 

98 

99 Output a string in the format 'Wdy, DD Mon YYYY HH:MM:SS GMT'. 

100 """ 

101 return formatdate(epoch_seconds, usegmt=True) 

102 

103 

104def parse_http_date(date): 

105 """ 

106 Parse a date format as specified by HTTP RFC7231 section 7.1.1.1. 

107 

108 The three formats allowed by the RFC are accepted, even if only the first 

109 one is still in widespread use. 

110 

111 Return an integer expressed in seconds since the epoch, in UTC. 

112 """ 

113 # email.utils.parsedate() does the job for RFC1123 dates; unfortunately 

114 # RFC7231 makes it mandatory to support RFC850 dates too. So we roll 

115 # our own RFC-compliant parsing. 

116 for regex in RFC1123_DATE, RFC850_DATE, ASCTIME_DATE: 

117 m = regex.match(date) 

118 if m is not None: 

119 break 

120 else: 

121 raise ValueError("%r is not in a valid HTTP date format" % date) 

122 try: 

123 tz = datetime.timezone.utc 

124 year = int(m["year"]) 

125 if year < 100: 

126 current_year = datetime.datetime.now(tz=tz).year 

127 current_century = current_year - (current_year % 100) 

128 if year - (current_year % 100) > 50: 

129 # year that appears to be more than 50 years in the future are 

130 # interpreted as representing the past. 

131 year += current_century - 100 

132 else: 

133 year += current_century 

134 month = MONTHS.index(m["mon"].lower()) + 1 

135 day = int(m["day"]) 

136 hour = int(m["hour"]) 

137 min = int(m["min"]) 

138 sec = int(m["sec"]) 

139 result = datetime.datetime(year, month, day, hour, min, sec, tzinfo=tz) 

140 return int(result.timestamp()) 

141 except Exception as exc: 

142 raise ValueError("%r is not a valid date" % date) from exc 

143 

144 

145def parse_http_date_safe(date): 

146 """ 

147 Same as parse_http_date, but return None if the input is invalid. 

148 """ 

149 try: 

150 return parse_http_date(date) 

151 except Exception: 

152 pass 

153 

154 

155# Base 36 functions: useful for generating compact URLs 

156 

157 

158def base36_to_int(s): 

159 """ 

160 Convert a base 36 string to an int. Raise ValueError if the input won't fit 

161 into an int. 

162 """ 

163 # To prevent overconsumption of server resources, reject any 

164 # base36 string that is longer than 13 base36 digits (13 digits 

165 # is sufficient to base36-encode any 64-bit integer) 

166 if len(s) > 13: 

167 raise ValueError("Base36 input too large") 

168 return int(s, 36) 

169 

170 

171def int_to_base36(i): 

172 """Convert an integer to a base36 string.""" 

173 char_set = "0123456789abcdefghijklmnopqrstuvwxyz" 

174 if i < 0: 174 ↛ 175line 174 didn't jump to line 175, because the condition on line 174 was never true

175 raise ValueError("Negative base36 conversion input.") 

176 if i < 36: 176 ↛ 177line 176 didn't jump to line 177, because the condition on line 176 was never true

177 return char_set[i] 

178 b36 = "" 

179 while i != 0: 

180 i, n = divmod(i, 36) 

181 b36 = char_set[n] + b36 

182 return b36 

183 

184 

185def urlsafe_base64_encode(s): 

186 """ 

187 Encode a bytestring to a base64 string for use in URLs. Strip any trailing 

188 equal signs. 

189 """ 

190 return base64.urlsafe_b64encode(s).rstrip(b"\n=").decode("ascii") 

191 

192 

193def urlsafe_base64_decode(s): 

194 """ 

195 Decode a base64 encoded string. Add back any trailing equal signs that 

196 might have been stripped. 

197 """ 

198 s = s.encode() 

199 try: 

200 return base64.urlsafe_b64decode(s.ljust(len(s) + len(s) % 4, b"=")) 

201 except (LookupError, BinasciiError) as e: 

202 raise ValueError(e) 

203 

204 

205def parse_etags(etag_str): 

206 """ 

207 Parse a string of ETags given in an If-None-Match or If-Match header as 

208 defined by RFC 7232. Return a list of quoted ETags, or ['*'] if all ETags 

209 should be matched. 

210 """ 

211 if etag_str.strip() == "*": 

212 return ["*"] 

213 else: 

214 # Parse each ETag individually, and return any that are valid. 

215 etag_matches = (ETAG_MATCH.match(etag.strip()) for etag in etag_str.split(",")) 

216 return [match[1] for match in etag_matches if match] 

217 

218 

219def quote_etag(etag_str): 

220 """ 

221 If the provided string is already a quoted ETag, return it. Otherwise, wrap 

222 the string in quotes, making it a strong ETag. 

223 """ 

224 if ETAG_MATCH.match(etag_str): 

225 return etag_str 

226 else: 

227 return '"%s"' % etag_str 

228 

229 

230def is_same_domain(host, pattern): 

231 """ 

232 Return ``True`` if the host is either an exact match or a match 

233 to the wildcard pattern. 

234 

235 Any pattern beginning with a period matches a domain and all of its 

236 subdomains. (e.g. ``.example.com`` matches ``example.com`` and 

237 ``foo.example.com``). Anything else is an exact string match. 

238 """ 

239 if not pattern: 239 ↛ 240line 239 didn't jump to line 240, because the condition on line 239 was never true

240 return False 

241 

242 pattern = pattern.lower() 

243 return ( 

244 pattern[0] == "." 

245 and (host.endswith(pattern) or host == pattern[1:]) 

246 or pattern == host 

247 ) 

248 

249 

250def url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False): 

251 """ 

252 Return ``True`` if the url uses an allowed host and a safe scheme. 

253 

254 Always return ``False`` on an empty url. 

255 

256 If ``require_https`` is ``True``, only 'https' will be considered a valid 

257 scheme, as opposed to 'http' and 'https' with the default, ``False``. 

258 

259 Note: "True" doesn't entail that a URL is "safe". It may still be e.g. 

260 quoted incorrectly. Ensure to also use django.utils.encoding.iri_to_uri() 

261 on the path component of untrusted URLs. 

262 """ 

263 if url is not None: 

264 url = url.strip() 

265 if not url: 

266 return False 

267 if allowed_hosts is None: 

268 allowed_hosts = set() 

269 elif isinstance(allowed_hosts, str): 

270 allowed_hosts = {allowed_hosts} 

271 # Chrome treats \ completely as / in paths but it could be part of some 

272 # basic auth credentials so we need to check both URLs. 

273 return _url_has_allowed_host_and_scheme( 

274 url, allowed_hosts, require_https=require_https 

275 ) and _url_has_allowed_host_and_scheme( 

276 url.replace("\\", "/"), allowed_hosts, require_https=require_https 

277 ) 

278 

279 

280# Copied from urllib.parse.urlparse() but uses fixed urlsplit() function. 

281def _urlparse(url, scheme="", allow_fragments=True): 

282 """Parse a URL into 6 components: 

283 <scheme>://<netloc>/<path>;<params>?<query>#<fragment> 

284 Return a 6-tuple: (scheme, netloc, path, params, query, fragment). 

285 Note that we don't break the components up in smaller bits 

286 (e.g. netloc is a single string) and we don't expand % escapes.""" 

287 url, scheme, _coerce_result = _coerce_args(url, scheme) 

288 splitresult = _urlsplit(url, scheme, allow_fragments) 

289 scheme, netloc, url, query, fragment = splitresult 

290 if scheme in uses_params and ";" in url: 

291 url, params = _splitparams(url) 

292 else: 

293 params = "" 

294 result = ParseResult(scheme, netloc, url, params, query, fragment) 

295 return _coerce_result(result) 

296 

297 

298# Copied from urllib.parse.urlsplit() with 

299# https://github.com/python/cpython/pull/661 applied. 

300def _urlsplit(url, scheme="", allow_fragments=True): 

301 """Parse a URL into 5 components: 

302 <scheme>://<netloc>/<path>?<query>#<fragment> 

303 Return a 5-tuple: (scheme, netloc, path, query, fragment). 

304 Note that we don't break the components up in smaller bits 

305 (e.g. netloc is a single string) and we don't expand % escapes.""" 

306 url, scheme, _coerce_result = _coerce_args(url, scheme) 

307 netloc = query = fragment = "" 

308 i = url.find(":") 

309 if i > 0: 

310 for c in url[:i]: 

311 if c not in scheme_chars: 

312 break 

313 else: 

314 scheme, url = url[:i].lower(), url[i + 1 :] 

315 

316 if url[:2] == "//": 

317 netloc, url = _splitnetloc(url, 2) 

318 if ("[" in netloc and "]" not in netloc) or ( 

319 "]" in netloc and "[" not in netloc 

320 ): 

321 raise ValueError("Invalid IPv6 URL") 

322 if allow_fragments and "#" in url: 

323 url, fragment = url.split("#", 1) 

324 if "?" in url: 

325 url, query = url.split("?", 1) 

326 v = SplitResult(scheme, netloc, url, query, fragment) 

327 return _coerce_result(v) 

328 

329 

330def _url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False): 

331 # Chrome considers any URL with more than two slashes to be absolute, but 

332 # urlparse is not so flexible. Treat any url with three slashes as unsafe. 

333 if url.startswith("///"): 

334 return False 

335 try: 

336 url_info = _urlparse(url) 

337 except ValueError: # e.g. invalid IPv6 addresses 

338 return False 

339 # Forbid URLs like http:///example.com - with a scheme, but without a hostname. 

340 # In that URL, example.com is not the hostname but, a path component. However, 

341 # Chrome will still consider example.com to be the hostname, so we must not 

342 # allow this syntax. 

343 if not url_info.netloc and url_info.scheme: 

344 return False 

345 # Forbid URLs that start with control characters. Some browsers (like 

346 # Chrome) ignore quite a few control characters at the start of a 

347 # URL and might consider the URL as scheme relative. 

348 if unicodedata.category(url[0])[0] == "C": 

349 return False 

350 scheme = url_info.scheme 

351 # Consider URLs without a scheme (e.g. //example.com/p) to be http. 

352 if not url_info.scheme and url_info.netloc: 

353 scheme = "http" 

354 valid_schemes = ["https"] if require_https else ["http", "https"] 

355 return (not url_info.netloc or url_info.netloc in allowed_hosts) and ( 

356 not scheme or scheme in valid_schemes 

357 ) 

358 

359 

360def escape_leading_slashes(url): 

361 """ 

362 If redirecting to an absolute path (two leading slashes), a slash must be 

363 escaped to prevent browsers from handling the path as schemaless and 

364 redirecting to another host. 

365 """ 

366 if url.startswith("//"): 366 ↛ 367line 366 didn't jump to line 367, because the condition on line 366 was never true

367 url = "/%2F{}".format(url[2:]) 

368 return url