Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/django/utils/regex_helper.py: 60%
193 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1"""
2Functions for reversing a regular expression (used in reverse URL resolving).
3Used internally by Django and not intended for external use.
5This is not, and is not intended to be, a complete reg-exp decompiler. It
6should be good enough for a large class of URLS, however.
7"""
8import re
10from django.utils.functional import SimpleLazyObject
12# Mapping of an escape character to a representative of that class. So, e.g.,
13# "\w" is replaced by "x" in a reverse URL. A value of None means to ignore
14# this sequence. Any missing key is mapped to itself.
15ESCAPE_MAPPINGS = {
16 "A": None,
17 "b": None,
18 "B": None,
19 "d": "0",
20 "D": "x",
21 "s": " ",
22 "S": "x",
23 "w": "x",
24 "W": "!",
25 "Z": None,
26}
29class Choice(list):
30 """Represent multiple possibilities at this point in a pattern string."""
33class Group(list):
34 """Represent a capturing group in the pattern string."""
37class NonCapture(list):
38 """Represent a non-capturing group in the pattern string."""
41def normalize(pattern):
42 r"""
43 Given a reg-exp pattern, normalize it to an iterable of forms that
44 suffice for reverse matching. This does the following:
46 (1) For any repeating sections, keeps the minimum number of occurrences
47 permitted (this means zero for optional groups).
48 (2) If an optional group includes parameters, include one occurrence of
49 that group (along with the zero occurrence case from step (1)).
50 (3) Select the first (essentially an arbitrary) element from any character
51 class. Select an arbitrary character for any unordered class (e.g. '.'
52 or '\w') in the pattern.
53 (4) Ignore look-ahead and look-behind assertions.
54 (5) Raise an error on any disjunctive ('|') constructs.
56 Django's URLs for forward resolving are either all positional arguments or
57 all keyword arguments. That is assumed here, as well. Although reverse
58 resolving can be done using positional args when keyword args are
59 specified, the two cannot be mixed in the same reverse() call.
60 """
61 # Do a linear scan to work out the special features of this pattern. The
62 # idea is that we scan once here and collect all the information we need to
63 # make future decisions.
64 result = []
65 non_capturing_groups = []
66 consume_next = True
67 pattern_iter = next_char(iter(pattern))
68 num_args = 0
70 # A "while" loop is used here because later on we need to be able to peek
71 # at the next character and possibly go around without consuming another
72 # one at the top of the loop.
73 try:
74 ch, escaped = next(pattern_iter)
75 except StopIteration:
76 return [("", [])]
78 try:
79 while True:
80 if escaped:
81 result.append(ch)
82 elif ch == ".": 82 ↛ 84line 82 didn't jump to line 84, because the condition on line 82 was never true
83 # Replace "any character" with an arbitrary representative.
84 result.append(".")
85 elif ch == "|": 85 ↛ 87line 85 didn't jump to line 87, because the condition on line 85 was never true
86 # FIXME: One day we'll should do this, but not in 1.0.
87 raise NotImplementedError("Awaiting Implementation")
88 elif ch == "^":
89 pass
90 elif ch == "$":
91 break
92 elif ch == ")": 92 ↛ 99line 92 didn't jump to line 99, because the condition on line 92 was never true
93 # This can only be the end of a non-capturing group, since all
94 # other unescaped parentheses are handled by the grouping
95 # section later (and the full group is handled there).
96 #
97 # We regroup everything inside the capturing group so that it
98 # can be quantified, if necessary.
99 start = non_capturing_groups.pop()
100 inner = NonCapture(result[start:])
101 result = result[:start] + [inner]
102 elif ch == "[": 102 ↛ 104line 102 didn't jump to line 104, because the condition on line 102 was never true
103 # Replace ranges with the first character in the range.
104 ch, escaped = next(pattern_iter)
105 result.append(ch)
106 ch, escaped = next(pattern_iter)
107 while escaped or ch != "]":
108 ch, escaped = next(pattern_iter)
109 elif ch == "(":
110 # Some kind of group.
111 ch, escaped = next(pattern_iter)
112 if ch != "?" or escaped: 112 ↛ 114line 112 didn't jump to line 114, because the condition on line 112 was never true
113 # A positional group
114 name = "_%d" % num_args
115 num_args += 1
116 result.append(Group((("%%(%s)s" % name), name)))
117 walk_to_end(ch, pattern_iter)
118 else:
119 ch, escaped = next(pattern_iter)
120 if ch in "!=<": 120 ↛ 123line 120 didn't jump to line 123, because the condition on line 120 was never true
121 # All of these are ignorable. Walk to the end of the
122 # group.
123 walk_to_end(ch, pattern_iter)
124 elif ch == ":": 124 ↛ 126line 124 didn't jump to line 126, because the condition on line 124 was never true
125 # Non-capturing group
126 non_capturing_groups.append(len(result))
127 elif ch != "P": 127 ↛ 130line 127 didn't jump to line 130, because the condition on line 127 was never true
128 # Anything else, other than a named group, is something
129 # we cannot reverse.
130 raise ValueError("Non-reversible reg-exp portion: '(?%s'" % ch)
131 else:
132 ch, escaped = next(pattern_iter)
133 if ch not in ("<", "="): 133 ↛ 134line 133 didn't jump to line 134, because the condition on line 133 was never true
134 raise ValueError(
135 "Non-reversible reg-exp portion: '(?P%s'" % ch
136 )
137 # We are in a named capturing group. Extra the name and
138 # then skip to the end.
139 if ch == "<": 139 ↛ 143line 139 didn't jump to line 143, because the condition on line 139 was never false
140 terminal_char = ">"
141 # We are in a named backreference.
142 else:
143 terminal_char = ")"
144 name = []
145 ch, escaped = next(pattern_iter)
146 while ch != terminal_char:
147 name.append(ch)
148 ch, escaped = next(pattern_iter)
149 param = "".join(name)
150 # Named backreferences have already consumed the
151 # parenthesis.
152 if terminal_char != ")": 152 ↛ 156line 152 didn't jump to line 156, because the condition on line 152 was never false
153 result.append(Group((("%%(%s)s" % param), param)))
154 walk_to_end(ch, pattern_iter)
155 else:
156 result.append(Group((("%%(%s)s" % param), None)))
157 elif ch in "*?+{":
158 # Quantifiers affect the previous item in the result list.
159 count, ch = get_quantifier(ch, pattern_iter)
160 if ch: 160 ↛ 166line 160 didn't jump to line 166, because the condition on line 160 was never false
161 # We had to look ahead, but it wasn't need to compute the
162 # quantifier, so use this character next time around the
163 # main loop.
164 consume_next = False
166 if count == 0: 166 ↛ 177line 166 didn't jump to line 177, because the condition on line 166 was never false
167 if contains(result[-1], Group): 167 ↛ 174line 167 didn't jump to line 174, because the condition on line 167 was never true
168 # If we are quantifying a capturing group (or
169 # something containing such a group) and the minimum is
170 # zero, we must also handle the case of one occurrence
171 # being present. All the quantifiers (except {0,0},
172 # which we conveniently ignore) that have a 0 minimum
173 # also allow a single occurrence.
174 result[-1] = Choice([None, result[-1]])
175 else:
176 result.pop()
177 elif count > 1:
178 result.extend([result[-1]] * (count - 1))
179 else:
180 # Anything else is a literal.
181 result.append(ch)
183 if consume_next:
184 ch, escaped = next(pattern_iter)
185 consume_next = True
186 except StopIteration: 186 ↛ 188line 186 didn't jump to line 188
187 pass
188 except NotImplementedError:
189 # A case of using the disjunctive form. No results for you!
190 return [("", [])]
192 return list(zip(*flatten_result(result)))
195def next_char(input_iter):
196 r"""
197 An iterator that yields the next character from "pattern_iter", respecting
198 escape sequences. An escaped character is replaced by a representative of
199 its class (e.g. \w -> "x"). If the escaped character is one that is
200 skipped, it is not returned (the next character is returned instead).
202 Yield the next character, along with a boolean indicating whether it is a
203 raw (unescaped) character or not.
204 """
205 for ch in input_iter:
206 if ch != "\\":
207 yield ch, False
208 continue
209 ch = next(input_iter)
210 representative = ESCAPE_MAPPINGS.get(ch, ch)
211 if representative is None:
212 continue
213 yield representative, True
216def walk_to_end(ch, input_iter):
217 """
218 The iterator is currently inside a capturing group. Walk to the close of
219 this group, skipping over any nested groups and handling escaped
220 parentheses correctly.
221 """
222 if ch == "(": 222 ↛ 223line 222 didn't jump to line 223, because the condition on line 222 was never true
223 nesting = 1
224 else:
225 nesting = 0
226 for ch, escaped in input_iter: 226 ↛ exitline 226 didn't return from function 'walk_to_end', because the loop on line 226 didn't complete
227 if escaped: 227 ↛ 228line 227 didn't jump to line 228, because the condition on line 227 was never true
228 continue
229 elif ch == "(": 229 ↛ 230line 229 didn't jump to line 230, because the condition on line 229 was never true
230 nesting += 1
231 elif ch == ")":
232 if not nesting: 232 ↛ 234line 232 didn't jump to line 234, because the condition on line 232 was never false
233 return
234 nesting -= 1
237def get_quantifier(ch, input_iter):
238 """
239 Parse a quantifier from the input, where "ch" is the first character in the
240 quantifier.
242 Return the minimum number of occurrences permitted by the quantifier and
243 either None or the next character from the input_iter if the next character
244 is not part of the quantifier.
245 """
246 if ch in "*?+": 246 ↛ 257line 246 didn't jump to line 257, because the condition on line 246 was never false
247 try:
248 ch2, escaped = next(input_iter)
249 except StopIteration:
250 ch2 = None
251 if ch2 == "?": 251 ↛ 252line 251 didn't jump to line 252, because the condition on line 251 was never true
252 ch2 = None
253 if ch == "+": 253 ↛ 254line 253 didn't jump to line 254, because the condition on line 253 was never true
254 return 1, ch2
255 return 0, ch2
257 quant = []
258 while ch != "}":
259 ch, escaped = next(input_iter)
260 quant.append(ch)
261 quant = quant[:-1]
262 values = "".join(quant).split(",")
264 # Consume the trailing '?', if necessary.
265 try:
266 ch, escaped = next(input_iter)
267 except StopIteration:
268 ch = None
269 if ch == "?":
270 ch = None
271 return int(values[0]), ch
274def contains(source, inst):
275 """
276 Return True if the "source" contains an instance of "inst". False,
277 otherwise.
278 """
279 if isinstance(source, inst): 279 ↛ 280line 279 didn't jump to line 280, because the condition on line 279 was never true
280 return True
281 if isinstance(source, NonCapture): 281 ↛ 282line 281 didn't jump to line 282, because the condition on line 281 was never true
282 for elt in source:
283 if contains(elt, inst):
284 return True
285 return False
288def flatten_result(source):
289 """
290 Turn the given source sequence into a list of reg-exp possibilities and
291 their arguments. Return a list of strings and a list of argument lists.
292 Each of the two lists will be of the same length.
293 """
294 if source is None: 294 ↛ 295line 294 didn't jump to line 295, because the condition on line 294 was never true
295 return [""], [[]]
296 if isinstance(source, Group): 296 ↛ 297line 296 didn't jump to line 297, because the condition on line 296 was never true
297 if source[1] is None:
298 params = []
299 else:
300 params = [source[1]]
301 return [source[0]], [params]
302 result = [""]
303 result_args = [[]]
304 pos = last = 0
305 for pos, elt in enumerate(source):
306 if isinstance(elt, str):
307 continue
308 piece = "".join(source[last:pos])
309 if isinstance(elt, Group): 309 ↛ 313line 309 didn't jump to line 313, because the condition on line 309 was never false
310 piece += elt[0]
311 param = elt[1]
312 else:
313 param = None
314 last = pos + 1
315 for i in range(len(result)):
316 result[i] += piece
317 if param: 317 ↛ 315line 317 didn't jump to line 315, because the condition on line 317 was never false
318 result_args[i].append(param)
319 if isinstance(elt, (Choice, NonCapture)): 319 ↛ 320line 319 didn't jump to line 320, because the condition on line 319 was never true
320 if isinstance(elt, NonCapture):
321 elt = [elt]
322 inner_result, inner_args = [], []
323 for item in elt:
324 res, args = flatten_result(item)
325 inner_result.extend(res)
326 inner_args.extend(args)
327 new_result = []
328 new_args = []
329 for item, args in zip(result, result_args):
330 for i_item, i_args in zip(inner_result, inner_args):
331 new_result.append(item + i_item)
332 new_args.append(args[:] + i_args)
333 result = new_result
334 result_args = new_args
335 if pos >= last:
336 piece = "".join(source[last:])
337 for i in range(len(result)):
338 result[i] += piece
339 return result, result_args
342def _lazy_re_compile(regex, flags=0):
343 """Lazily compile a regex with flags."""
345 def _compile():
346 # Compile the regex if it was not passed pre-compiled.
347 if isinstance(regex, (str, bytes)):
348 return re.compile(regex, flags)
349 else:
350 assert not flags, "flags must be empty if regex is passed pre-compiled"
351 return regex
353 return SimpleLazyObject(_compile)