Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/django/utils/regex_helper.py: 60%

193 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" 

2Functions for reversing a regular expression (used in reverse URL resolving). 

3Used internally by Django and not intended for external use. 

4 

5This is not, and is not intended to be, a complete reg-exp decompiler. It 

6should be good enough for a large class of URLS, however. 

7""" 

8import re 

9 

10from django.utils.functional import SimpleLazyObject 

11 

12# Mapping of an escape character to a representative of that class. So, e.g., 

13# "\w" is replaced by "x" in a reverse URL. A value of None means to ignore 

14# this sequence. Any missing key is mapped to itself. 

15ESCAPE_MAPPINGS = { 

16 "A": None, 

17 "b": None, 

18 "B": None, 

19 "d": "0", 

20 "D": "x", 

21 "s": " ", 

22 "S": "x", 

23 "w": "x", 

24 "W": "!", 

25 "Z": None, 

26} 

27 

28 

29class Choice(list): 

30 """Represent multiple possibilities at this point in a pattern string.""" 

31 

32 

33class Group(list): 

34 """Represent a capturing group in the pattern string.""" 

35 

36 

37class NonCapture(list): 

38 """Represent a non-capturing group in the pattern string.""" 

39 

40 

41def normalize(pattern): 

42 r""" 

43 Given a reg-exp pattern, normalize it to an iterable of forms that 

44 suffice for reverse matching. This does the following: 

45 

46 (1) For any repeating sections, keeps the minimum number of occurrences 

47 permitted (this means zero for optional groups). 

48 (2) If an optional group includes parameters, include one occurrence of 

49 that group (along with the zero occurrence case from step (1)). 

50 (3) Select the first (essentially an arbitrary) element from any character 

51 class. Select an arbitrary character for any unordered class (e.g. '.' 

52 or '\w') in the pattern. 

53 (4) Ignore look-ahead and look-behind assertions. 

54 (5) Raise an error on any disjunctive ('|') constructs. 

55 

56 Django's URLs for forward resolving are either all positional arguments or 

57 all keyword arguments. That is assumed here, as well. Although reverse 

58 resolving can be done using positional args when keyword args are 

59 specified, the two cannot be mixed in the same reverse() call. 

60 """ 

61 # Do a linear scan to work out the special features of this pattern. The 

62 # idea is that we scan once here and collect all the information we need to 

63 # make future decisions. 

64 result = [] 

65 non_capturing_groups = [] 

66 consume_next = True 

67 pattern_iter = next_char(iter(pattern)) 

68 num_args = 0 

69 

70 # A "while" loop is used here because later on we need to be able to peek 

71 # at the next character and possibly go around without consuming another 

72 # one at the top of the loop. 

73 try: 

74 ch, escaped = next(pattern_iter) 

75 except StopIteration: 

76 return [("", [])] 

77 

78 try: 

79 while True: 

80 if escaped: 

81 result.append(ch) 

82 elif ch == ".": 82 ↛ 84line 82 didn't jump to line 84, because the condition on line 82 was never true

83 # Replace "any character" with an arbitrary representative. 

84 result.append(".") 

85 elif ch == "|": 85 ↛ 87line 85 didn't jump to line 87, because the condition on line 85 was never true

86 # FIXME: One day we'll should do this, but not in 1.0. 

87 raise NotImplementedError("Awaiting Implementation") 

88 elif ch == "^": 

89 pass 

90 elif ch == "$": 

91 break 

92 elif ch == ")": 92 ↛ 99line 92 didn't jump to line 99, because the condition on line 92 was never true

93 # This can only be the end of a non-capturing group, since all 

94 # other unescaped parentheses are handled by the grouping 

95 # section later (and the full group is handled there). 

96 # 

97 # We regroup everything inside the capturing group so that it 

98 # can be quantified, if necessary. 

99 start = non_capturing_groups.pop() 

100 inner = NonCapture(result[start:]) 

101 result = result[:start] + [inner] 

102 elif ch == "[": 102 ↛ 104line 102 didn't jump to line 104, because the condition on line 102 was never true

103 # Replace ranges with the first character in the range. 

104 ch, escaped = next(pattern_iter) 

105 result.append(ch) 

106 ch, escaped = next(pattern_iter) 

107 while escaped or ch != "]": 

108 ch, escaped = next(pattern_iter) 

109 elif ch == "(": 

110 # Some kind of group. 

111 ch, escaped = next(pattern_iter) 

112 if ch != "?" or escaped: 112 ↛ 114line 112 didn't jump to line 114, because the condition on line 112 was never true

113 # A positional group 

114 name = "_%d" % num_args 

115 num_args += 1 

116 result.append(Group((("%%(%s)s" % name), name))) 

117 walk_to_end(ch, pattern_iter) 

118 else: 

119 ch, escaped = next(pattern_iter) 

120 if ch in "!=<": 120 ↛ 123line 120 didn't jump to line 123, because the condition on line 120 was never true

121 # All of these are ignorable. Walk to the end of the 

122 # group. 

123 walk_to_end(ch, pattern_iter) 

124 elif ch == ":": 124 ↛ 126line 124 didn't jump to line 126, because the condition on line 124 was never true

125 # Non-capturing group 

126 non_capturing_groups.append(len(result)) 

127 elif ch != "P": 127 ↛ 130line 127 didn't jump to line 130, because the condition on line 127 was never true

128 # Anything else, other than a named group, is something 

129 # we cannot reverse. 

130 raise ValueError("Non-reversible reg-exp portion: '(?%s'" % ch) 

131 else: 

132 ch, escaped = next(pattern_iter) 

133 if ch not in ("<", "="): 133 ↛ 134line 133 didn't jump to line 134, because the condition on line 133 was never true

134 raise ValueError( 

135 "Non-reversible reg-exp portion: '(?P%s'" % ch 

136 ) 

137 # We are in a named capturing group. Extra the name and 

138 # then skip to the end. 

139 if ch == "<": 139 ↛ 143line 139 didn't jump to line 143, because the condition on line 139 was never false

140 terminal_char = ">" 

141 # We are in a named backreference. 

142 else: 

143 terminal_char = ")" 

144 name = [] 

145 ch, escaped = next(pattern_iter) 

146 while ch != terminal_char: 

147 name.append(ch) 

148 ch, escaped = next(pattern_iter) 

149 param = "".join(name) 

150 # Named backreferences have already consumed the 

151 # parenthesis. 

152 if terminal_char != ")": 152 ↛ 156line 152 didn't jump to line 156, because the condition on line 152 was never false

153 result.append(Group((("%%(%s)s" % param), param))) 

154 walk_to_end(ch, pattern_iter) 

155 else: 

156 result.append(Group((("%%(%s)s" % param), None))) 

157 elif ch in "*?+{": 

158 # Quantifiers affect the previous item in the result list. 

159 count, ch = get_quantifier(ch, pattern_iter) 

160 if ch: 160 ↛ 166line 160 didn't jump to line 166, because the condition on line 160 was never false

161 # We had to look ahead, but it wasn't need to compute the 

162 # quantifier, so use this character next time around the 

163 # main loop. 

164 consume_next = False 

165 

166 if count == 0: 166 ↛ 177line 166 didn't jump to line 177, because the condition on line 166 was never false

167 if contains(result[-1], Group): 167 ↛ 174line 167 didn't jump to line 174, because the condition on line 167 was never true

168 # If we are quantifying a capturing group (or 

169 # something containing such a group) and the minimum is 

170 # zero, we must also handle the case of one occurrence 

171 # being present. All the quantifiers (except {0,0}, 

172 # which we conveniently ignore) that have a 0 minimum 

173 # also allow a single occurrence. 

174 result[-1] = Choice([None, result[-1]]) 

175 else: 

176 result.pop() 

177 elif count > 1: 

178 result.extend([result[-1]] * (count - 1)) 

179 else: 

180 # Anything else is a literal. 

181 result.append(ch) 

182 

183 if consume_next: 

184 ch, escaped = next(pattern_iter) 

185 consume_next = True 

186 except StopIteration: 186 ↛ 188line 186 didn't jump to line 188

187 pass 

188 except NotImplementedError: 

189 # A case of using the disjunctive form. No results for you! 

190 return [("", [])] 

191 

192 return list(zip(*flatten_result(result))) 

193 

194 

195def next_char(input_iter): 

196 r""" 

197 An iterator that yields the next character from "pattern_iter", respecting 

198 escape sequences. An escaped character is replaced by a representative of 

199 its class (e.g. \w -> "x"). If the escaped character is one that is 

200 skipped, it is not returned (the next character is returned instead). 

201 

202 Yield the next character, along with a boolean indicating whether it is a 

203 raw (unescaped) character or not. 

204 """ 

205 for ch in input_iter: 

206 if ch != "\\": 

207 yield ch, False 

208 continue 

209 ch = next(input_iter) 

210 representative = ESCAPE_MAPPINGS.get(ch, ch) 

211 if representative is None: 

212 continue 

213 yield representative, True 

214 

215 

216def walk_to_end(ch, input_iter): 

217 """ 

218 The iterator is currently inside a capturing group. Walk to the close of 

219 this group, skipping over any nested groups and handling escaped 

220 parentheses correctly. 

221 """ 

222 if ch == "(": 222 ↛ 223line 222 didn't jump to line 223, because the condition on line 222 was never true

223 nesting = 1 

224 else: 

225 nesting = 0 

226 for ch, escaped in input_iter: 226 ↛ exitline 226 didn't return from function 'walk_to_end', because the loop on line 226 didn't complete

227 if escaped: 227 ↛ 228line 227 didn't jump to line 228, because the condition on line 227 was never true

228 continue 

229 elif ch == "(": 229 ↛ 230line 229 didn't jump to line 230, because the condition on line 229 was never true

230 nesting += 1 

231 elif ch == ")": 

232 if not nesting: 232 ↛ 234line 232 didn't jump to line 234, because the condition on line 232 was never false

233 return 

234 nesting -= 1 

235 

236 

237def get_quantifier(ch, input_iter): 

238 """ 

239 Parse a quantifier from the input, where "ch" is the first character in the 

240 quantifier. 

241 

242 Return the minimum number of occurrences permitted by the quantifier and 

243 either None or the next character from the input_iter if the next character 

244 is not part of the quantifier. 

245 """ 

246 if ch in "*?+": 246 ↛ 257line 246 didn't jump to line 257, because the condition on line 246 was never false

247 try: 

248 ch2, escaped = next(input_iter) 

249 except StopIteration: 

250 ch2 = None 

251 if ch2 == "?": 251 ↛ 252line 251 didn't jump to line 252, because the condition on line 251 was never true

252 ch2 = None 

253 if ch == "+": 253 ↛ 254line 253 didn't jump to line 254, because the condition on line 253 was never true

254 return 1, ch2 

255 return 0, ch2 

256 

257 quant = [] 

258 while ch != "}": 

259 ch, escaped = next(input_iter) 

260 quant.append(ch) 

261 quant = quant[:-1] 

262 values = "".join(quant).split(",") 

263 

264 # Consume the trailing '?', if necessary. 

265 try: 

266 ch, escaped = next(input_iter) 

267 except StopIteration: 

268 ch = None 

269 if ch == "?": 

270 ch = None 

271 return int(values[0]), ch 

272 

273 

274def contains(source, inst): 

275 """ 

276 Return True if the "source" contains an instance of "inst". False, 

277 otherwise. 

278 """ 

279 if isinstance(source, inst): 279 ↛ 280line 279 didn't jump to line 280, because the condition on line 279 was never true

280 return True 

281 if isinstance(source, NonCapture): 281 ↛ 282line 281 didn't jump to line 282, because the condition on line 281 was never true

282 for elt in source: 

283 if contains(elt, inst): 

284 return True 

285 return False 

286 

287 

288def flatten_result(source): 

289 """ 

290 Turn the given source sequence into a list of reg-exp possibilities and 

291 their arguments. Return a list of strings and a list of argument lists. 

292 Each of the two lists will be of the same length. 

293 """ 

294 if source is None: 294 ↛ 295line 294 didn't jump to line 295, because the condition on line 294 was never true

295 return [""], [[]] 

296 if isinstance(source, Group): 296 ↛ 297line 296 didn't jump to line 297, because the condition on line 296 was never true

297 if source[1] is None: 

298 params = [] 

299 else: 

300 params = [source[1]] 

301 return [source[0]], [params] 

302 result = [""] 

303 result_args = [[]] 

304 pos = last = 0 

305 for pos, elt in enumerate(source): 

306 if isinstance(elt, str): 

307 continue 

308 piece = "".join(source[last:pos]) 

309 if isinstance(elt, Group): 309 ↛ 313line 309 didn't jump to line 313, because the condition on line 309 was never false

310 piece += elt[0] 

311 param = elt[1] 

312 else: 

313 param = None 

314 last = pos + 1 

315 for i in range(len(result)): 

316 result[i] += piece 

317 if param: 317 ↛ 315line 317 didn't jump to line 315, because the condition on line 317 was never false

318 result_args[i].append(param) 

319 if isinstance(elt, (Choice, NonCapture)): 319 ↛ 320line 319 didn't jump to line 320, because the condition on line 319 was never true

320 if isinstance(elt, NonCapture): 

321 elt = [elt] 

322 inner_result, inner_args = [], [] 

323 for item in elt: 

324 res, args = flatten_result(item) 

325 inner_result.extend(res) 

326 inner_args.extend(args) 

327 new_result = [] 

328 new_args = [] 

329 for item, args in zip(result, result_args): 

330 for i_item, i_args in zip(inner_result, inner_args): 

331 new_result.append(item + i_item) 

332 new_args.append(args[:] + i_args) 

333 result = new_result 

334 result_args = new_args 

335 if pos >= last: 

336 piece = "".join(source[last:]) 

337 for i in range(len(result)): 

338 result[i] += piece 

339 return result, result_args 

340 

341 

342def _lazy_re_compile(regex, flags=0): 

343 """Lazily compile a regex with flags.""" 

344 

345 def _compile(): 

346 # Compile the regex if it was not passed pre-compiled. 

347 if isinstance(regex, (str, bytes)): 

348 return re.compile(regex, flags) 

349 else: 

350 assert not flags, "flags must be empty if regex is passed pre-compiled" 

351 return regex 

352 

353 return SimpleLazyObject(_compile)