Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/urllib3/util/url.py: 18%

1from __future__ import absolute_import

3import re

4from collections import namedtuple

6from ..exceptions import LocationParseError

7from ..packages import six

9url_attrs = ["scheme", "auth", "host", "port", "path", "query", "fragment"]

11# We only want to normalize urls with an HTTP(S) scheme.

12# urllib3 infers URLs without a scheme (None) to be http.

13NORMALIZABLE_SCHEMES = ("http", "https", None)

15# Almost all of these patterns were derived from the

16# 'rfc3986' module: https://github.com/python-hyper/rfc3986

17PERCENT_RE = re.compile(r"%[a-fA-F0-9]{2}")

18SCHEME_RE = re.compile(r"^(?:[a-zA-Z][a-zA-Z0-9+-]*:|/)")

19URI_RE = re.compile(

20 r"^(?:([a-zA-Z][a-zA-Z0-9+.-]*):)?"

21 r"(?://([^\\/?#]*))?"

22 r"([^?#]*)"

23 r"(?:\?([^#]*))?"

24 r"(?:#(.*))?$",

25 re.UNICODE | re.DOTALL,

26)

28IPV4_PAT = r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}"

29HEX_PAT = "[0-9A-Fa-f]{1,4}"

30LS32_PAT = "(?:{hex}:{hex}|{ipv4})".format(hex=HEX_PAT, ipv4=IPV4_PAT)

31_subs = {"hex": HEX_PAT, "ls32": LS32_PAT}

32_variations = [

33 # 6( h16 ":" ) ls32

34 "(?:%(hex)s:){6}%(ls32)s",

35 # "::" 5( h16 ":" ) ls32

36 "::(?:%(hex)s:){5}%(ls32)s",

37 # [ h16 ] "::" 4( h16 ":" ) ls32

38 "(?:%(hex)s)?::(?:%(hex)s:){4}%(ls32)s",

39 # [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32

40 "(?:(?:%(hex)s:)?%(hex)s)?::(?:%(hex)s:){3}%(ls32)s",

41 # [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32

42 "(?:(?:%(hex)s:){0,2}%(hex)s)?::(?:%(hex)s:){2}%(ls32)s",

43 # [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32

44 "(?:(?:%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s",

45 # [ *4( h16 ":" ) h16 ] "::" ls32

46 "(?:(?:%(hex)s:){0,4}%(hex)s)?::%(ls32)s",

47 # [ *5( h16 ":" ) h16 ] "::" h16

48 "(?:(?:%(hex)s:){0,5}%(hex)s)?::%(hex)s",

49 # [ *6( h16 ":" ) h16 ] "::"

50 "(?:(?:%(hex)s:){0,6}%(hex)s)?::",

51]

53UNRESERVED_PAT = r"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._!\-~"

54IPV6_PAT = "(?:" + "|".join([x % _subs for x in _variations]) + ")"

55ZONE_ID_PAT = "(?:%25|%)(?:[" + UNRESERVED_PAT + "]|%[a-fA-F0-9]{2})+"

56IPV6_ADDRZ_PAT = r"\[" + IPV6_PAT + r"(?:" + ZONE_ID_PAT + r")?\]"

57REG_NAME_PAT = r"(?:[^\[\]%:/?#]|%[a-fA-F0-9]{2})*"

58TARGET_RE = re.compile(r"^(/[^?#]*)(?:\?([^#]*))?(?:#.*)?$")

60IPV4_RE = re.compile("^" + IPV4_PAT + "$")

61IPV6_RE = re.compile("^" + IPV6_PAT + "$")

62IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT + "$")

63BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT[2:-2] + "$")

64ZONE_ID_RE = re.compile("(" + ZONE_ID_PAT + r")\]$")

66_HOST_PORT_PAT = ("^(%s|%s|%s)(?::0*([0-9]{0,5}))?$") % (

67 REG_NAME_PAT,

68 IPV4_PAT,

69 IPV6_ADDRZ_PAT,

70)

71_HOST_PORT_RE = re.compile(_HOST_PORT_PAT, re.UNICODE | re.DOTALL)

73UNRESERVED_CHARS = set(

74 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._-~"

75)

76SUB_DELIM_CHARS = set("!$&'()*+,;=")

77USERINFO_CHARS = UNRESERVED_CHARS | SUB_DELIM_CHARS | {":"}

78PATH_CHARS = USERINFO_CHARS | {"@", "/"}

79QUERY_CHARS = FRAGMENT_CHARS = PATH_CHARS | {"?"}

82class Url(namedtuple("Url", url_attrs)):

83 """

84 Data structure for representing an HTTP URL. Used as a return value for

85 :func:`parse_url`. Both the scheme and host are normalized as they are

86 both case-insensitive according to RFC 3986.

87 """

89 __slots__ = ()

91 def __new__(

92 cls,

93 scheme=None,

94 auth=None,

95 host=None,

96 port=None,

97 path=None,

98 query=None,

99 fragment=None,

100 ):

101 if path and not path.startswith("/"):

102 path = "/" + path

103 if scheme is not None:

104 scheme = scheme.lower()

105 return super(Url, cls).__new__(

106 cls, scheme, auth, host, port, path, query, fragment

107 )

108

109 @property

110 def hostname(self):

111 """For backwards-compatibility with urlparse. We're nice like that."""

112 return self.host

113

114 @property

115 def request_uri(self):

116 """Absolute path including the query string."""

117 uri = self.path or "/"

118

119 if self.query is not None:

120 uri += "?" + self.query

121

122 return uri

123

124 @property

125 def netloc(self):

126 """Network location including host and port"""

127 if self.port:

128 return "%s:%d" % (self.host, self.port)

129 return self.host

130

131 @property

132 def url(self):

133 """

134 Convert self into a url

135

136 This function should more or less round-trip with :func:`.parse_url`. The

137 returned url may not be exactly the same as the url inputted to

138 :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls

139 with a blank port will have : removed).

140

141 Example: ::

142

143 >>> U = parse_url('http://google.com/mail/')

144 >>> U.url

145 'http://google.com/mail/'

146 >>> Url('http', 'username:password', 'host.com', 80,

147 ... '/path', 'query', 'fragment').url

148 'http://username:password@host.com:80/path?query#fragment'

149 """

150 scheme, auth, host, port, path, query, fragment = self

151 url = u""

152

153 # We use "is not None" we want things to happen with empty strings (or 0 port)

154 if scheme is not None:

155 url += scheme + u"://"

156 if auth is not None:

157 url += auth + u"@"

158 if host is not None:

159 url += host

160 if port is not None:

161 url += u":" + str(port)

162 if path is not None:

163 url += path

164 if query is not None:

165 url += u"?" + query

166 if fragment is not None:

167 url += u"#" + fragment

168

169 return url

170

171 def __str__(self):

172 return self.url

173

174

175def split_first(s, delims):

176 """

177 .. deprecated:: 1.25

178

179 Given a string and an iterable of delimiters, split on the first found

180 delimiter. Return two split parts and the matched delimiter.

181

182 If not found, then the first part is the full input string.

183

184 Example::

185

186 >>> split_first('foo/bar?baz', '?/=')

187 ('foo', 'bar?baz', '/')

188 >>> split_first('foo/bar?baz', '123')

189 ('foo/bar?baz', '', None)

190

191 Scales linearly with number of delims. Not ideal for large number of delims.

192 """

193 min_idx = None

194 min_delim = None

195 for d in delims:

196 idx = s.find(d)

197 if idx < 0:

198 continue

199

200 if min_idx is None or idx < min_idx:

201 min_idx = idx

202 min_delim = d

203

204 if min_idx is None or min_idx < 0:

205 return s, "", None

206

207 return s[:min_idx], s[min_idx + 1 :], min_delim

208

209

210def _encode_invalid_chars(component, allowed_chars, encoding="utf-8"):

211 """Percent-encodes a URI component without reapplying

212 onto an already percent-encoded component.

213 """

214 if component is None:

215 return component

216

217 component = six.ensure_text(component)

218

219 # Normalize existing percent-encoded bytes.

220 # Try to see if the component we're encoding is already percent-encoded

221 # so we can skip all '%' characters but still encode all others.

222 component, percent_encodings = PERCENT_RE.subn(

223 lambda match: match.group(0).upper(), component

224 )

225

226 uri_bytes = component.encode("utf-8", "surrogatepass")

227 is_percent_encoded = percent_encodings == uri_bytes.count(b"%")

228 encoded_component = bytearray()

229

230 for i in range(0, len(uri_bytes)):

231 # Will return a single character bytestring on both Python 2 & 3

232 byte = uri_bytes[i : i + 1]

233 byte_ord = ord(byte)

234 if (is_percent_encoded and byte == b"%") or (

235 byte_ord < 128 and byte.decode() in allowed_chars

236 ):

237 encoded_component += byte

238 continue

239 encoded_component.extend(b"%" + (hex(byte_ord)[2:].encode().zfill(2).upper()))

240

241 return encoded_component.decode(encoding)

242

243

244def _remove_path_dot_segments(path):

245 # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code

246 segments = path.split("/") # Turn the path into a list of segments

247 output = [] # Initialize the variable to use to store output

248

249 for segment in segments:

250 # '.' is the current directory, so ignore it, it is superfluous

251 if segment == ".":

252 continue

253 # Anything other than '..', should be appended to the output

254 elif segment != "..":

255 output.append(segment)

256 # In this case segment == '..', if we can, we should pop the last

257 # element

258 elif output:

259 output.pop()

260

261 # If the path starts with '/' and the output is empty or the first string

262 # is non-empty

263 if path.startswith("/") and (not output or output[0]):

264 output.insert(0, "")

265

266 # If the path starts with '/.' or '/..' ensure we add one more empty

267 # string to add a trailing '/'

268 if path.endswith(("/.", "/..")):

269 output.append("")

270

271 return "/".join(output)

272

273

274def _normalize_host(host, scheme):

275 if host:

276 if isinstance(host, six.binary_type):

277 host = six.ensure_str(host)

278

279 if scheme in NORMALIZABLE_SCHEMES:

280 is_ipv6 = IPV6_ADDRZ_RE.match(host)

281 if is_ipv6:

282 # IPv6 hosts of the form 'a::b%zone' are encoded in a URL as

283 # such per RFC 6874: 'a::b%25zone'. Unquote the ZoneID

284 # separator as necessary to return a valid RFC 4007 scoped IP.

285 match = ZONE_ID_RE.search(host)

286 if match:

287 start, end = match.span(1)

288 zone_id = host[start:end]

289

290 if zone_id.startswith("%25") and zone_id != "%25":

291 zone_id = zone_id[3:]

292 else:

293 zone_id = zone_id[1:]

294 zone_id = "%" + _encode_invalid_chars(zone_id, UNRESERVED_CHARS)

295 return host[:start].lower() + zone_id + host[end:]

296 else:

297 return host.lower()

298 elif not IPV4_RE.match(host):

299 return six.ensure_str(

300 b".".join([_idna_encode(label) for label in host.split(".")])

301 )

302 return host

303

304

305def _idna_encode(name):

306 if name and any([ord(x) > 128 for x in name]):

307 try:

308 import idna

309 except ImportError:

310 six.raise_from(

311 LocationParseError("Unable to parse URL without the 'idna' module"),

312 None,

313 )

314 try:

315 return idna.encode(name.lower(), strict=True, std3_rules=True)

316 except idna.IDNAError:

317 six.raise_from(

318 LocationParseError(u"Name '%s' is not a valid IDNA label" % name), None

319 )

320 return name.lower().encode("ascii")

321

322

323def _encode_target(target):

324 """Percent-encodes a request target so that there are no invalid characters"""

325 path, query = TARGET_RE.match(target).groups()

326 target = _encode_invalid_chars(path, PATH_CHARS)

327 query = _encode_invalid_chars(query, QUERY_CHARS)

328 if query is not None:

329 target += "?" + query

330 return target

331

332

333def parse_url(url):

334 """

335 Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is

336 performed to parse incomplete urls. Fields not provided will be None.

337 This parser is RFC 3986 and RFC 6874 compliant.

338

339 The parser logic and helper functions are based heavily on

340 work done in the ``rfc3986`` module.

341

342 :param str url: URL to parse into a :class:`.Url` namedtuple.

343

344 Partly backwards-compatible with :mod:`urlparse`.

345

346 Example::

347

348 >>> parse_url('http://google.com/mail/')

349 Url(scheme='http', host='google.com', port=None, path='/mail/', ...)

350 >>> parse_url('google.com:80')

351 Url(scheme=None, host='google.com', port=80, path=None, ...)

352 >>> parse_url('/foo?bar')

353 Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)

354 """

355 if not url:

356 # Empty

357 return Url()

358

359 source_url = url

360 if not SCHEME_RE.search(url):

361 url = "//" + url

362

363 try:

364 scheme, authority, path, query, fragment = URI_RE.match(url).groups()

365 normalize_uri = scheme is None or scheme.lower() in NORMALIZABLE_SCHEMES

366

367 if scheme:

368 scheme = scheme.lower()

369

370 if authority:

371 auth, _, host_port = authority.rpartition("@")

372 auth = auth or None

373 host, port = _HOST_PORT_RE.match(host_port).groups()

374 if auth and normalize_uri:

375 auth = _encode_invalid_chars(auth, USERINFO_CHARS)

376 if port == "":

377 port = None

378 else:

379 auth, host, port = None, None, None

380

381 if port is not None:

382 port = int(port)

383 if not (0 <= port <= 65535):

384 raise LocationParseError(url)

385

386 host = _normalize_host(host, scheme)

387

388 if normalize_uri and path:

389 path = _remove_path_dot_segments(path)

390 path = _encode_invalid_chars(path, PATH_CHARS)

391 if normalize_uri and query:

392 query = _encode_invalid_chars(query, QUERY_CHARS)

393 if normalize_uri and fragment:

394 fragment = _encode_invalid_chars(fragment, FRAGMENT_CHARS)

395

396 except (ValueError, AttributeError):

397 return six.raise_from(LocationParseError(source_url), None)

398

399 # For the sake of backwards compatibility we put empty

400 # string values for path if there are any defined values

401 # beyond the path in the URL.

402 # TODO: Remove this when we break backwards compatibility.

403 if not path:

404 if query is not None or fragment is not None:

405 path = ""

406 else:

407 path = None

408

409 # Ensure that each part of the URL is a `str` for

410 # backwards compatibility.

411 if isinstance(url, six.text_type):

412 ensure_func = six.ensure_text

413 else:

414 ensure_func = six.ensure_str

415

416 def ensure_type(x):

417 return x if x is None else ensure_func(x)

418

419 return Url(

420 scheme=ensure_type(scheme),

421 auth=ensure_type(auth),

422 host=ensure_type(host),

423 port=port,

424 path=ensure_type(path),

425 query=ensure_type(query),

426 fragment=ensure_type(fragment),

427 )

428

429

430def get_host(url):

431 """

432 Deprecated. Use :func:`parse_url` instead.

433 """

434 p = parse_url(url)

435 return p.scheme or "http", p.hostname, p.port