Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/django/test/html.py: 18%

1"""Compare two HTML documents."""

3from html.parser import HTMLParser

5from django.utils.regex_helper import _lazy_re_compile

7# ASCII whitespace is U+0009 TAB, U+000A LF, U+000C FF, U+000D CR, or U+0020

8# SPACE.

9# https://infra.spec.whatwg.org/#ascii-whitespace

10ASCII_WHITESPACE = _lazy_re_compile(r"[\t\n\f\r ]+")

12# https://html.spec.whatwg.org/#attributes-3

13BOOLEAN_ATTRIBUTES = {

14 "allowfullscreen",

15 "async",

16 "autofocus",

17 "autoplay",

18 "checked",

19 "controls",

20 "default",

21 "defer ",

22 "disabled",

23 "formnovalidate",

24 "hidden",

25 "ismap",

26 "itemscope",

27 "loop",

28 "multiple",

29 "muted",

30 "nomodule",

31 "novalidate",

32 "open",

33 "playsinline",

34 "readonly",

35 "required",

36 "reversed",

37 "selected",

38 # Attributes for deprecated tags.

39 "truespeed",

40}

43def normalize_whitespace(string):

44 return ASCII_WHITESPACE.sub(" ", string)

47def normalize_attributes(attributes):

48 normalized = []

49 for name, value in attributes:

50 if name == "class" and value:

51 # Special case handling of 'class' attribute, so that comparisons

52 # of DOM instances are not sensitive to ordering of classes.

53 value = " ".join(

54 sorted(value for value in ASCII_WHITESPACE.split(value) if value)

55 )

56 # Boolean attributes without a value is same as attribute with value

57 # that equals the attributes name. For example:

58 # <input checked> == <input checked="checked">

59 if name in BOOLEAN_ATTRIBUTES:

60 if not value or value == name:

61 value = None

62 elif value is None:

63 value = ""

64 normalized.append((name, value))

65 return normalized

68class Element:

69 def __init__(self, name, attributes):

70 self.name = name

71 self.attributes = sorted(attributes)

72 self.children = []

74 def append(self, element):

75 if isinstance(element, str):

76 element = normalize_whitespace(element)

77 if self.children and isinstance(self.children[-1], str):

78 self.children[-1] += element

79 self.children[-1] = normalize_whitespace(self.children[-1])

80 return

81 elif self.children:

82 # removing last children if it is only whitespace

83 # this can result in incorrect dom representations since

84 # whitespace between inline tags like <span> is significant

85 if isinstance(self.children[-1], str) and self.children[-1].isspace():

86 self.children.pop()

87 if element:

88 self.children.append(element)

90 def finalize(self):

91 def rstrip_last_element(children):

92 if children and isinstance(children[-1], str):

93 children[-1] = children[-1].rstrip()

94 if not children[-1]:

95 children.pop()

96 children = rstrip_last_element(children)

97 return children

99 rstrip_last_element(self.children)

100 for i, child in enumerate(self.children):

101 if isinstance(child, str):

102 self.children[i] = child.strip()

103 elif hasattr(child, "finalize"):

104 child.finalize()

105

106 def __eq__(self, element):

107 if not hasattr(element, "name") or self.name != element.name:

108 return False

109 if self.attributes != element.attributes:

110 return False

111 return self.children == element.children

112

113 def __hash__(self):

114 return hash((self.name, *self.attributes))

115

116 def _count(self, element, count=True):

117 if not isinstance(element, str) and self == element:

118 return 1

119 if isinstance(element, RootElement) and self.children == element.children:

120 return 1

121 i = 0

122 elem_child_idx = 0

123 for child in self.children:

124 # child is text content and element is also text content, then

125 # make a simple "text" in "text"

126 if isinstance(child, str):

127 if isinstance(element, str):

128 if count:

129 i += child.count(element)

130 elif element in child:

131 return 1

132 else:

133 # Look for element wholly within this child.

134 i += child._count(element, count=count)

135 if not count and i:

136 return i

137 # Also look for a sequence of element's children among self's

138 # children. self.children == element.children is tested above,

139 # but will fail if self has additional children. Ex: '<a/><b/>'

140 # is contained in '<a/><b/><c/>'.

141 if isinstance(element, RootElement) and element.children:

142 elem_child = element.children[elem_child_idx]

143 # Start or continue match, advance index.

144 if elem_child == child:

145 elem_child_idx += 1

146 # Match found, reset index.

147 if elem_child_idx == len(element.children):

148 i += 1

149 elem_child_idx = 0

150 # No match, reset index.

151 else:

152 elem_child_idx = 0

153 return i

154

155 def __contains__(self, element):

156 return self._count(element, count=False) > 0

157

158 def count(self, element):

159 return self._count(element, count=True)

160

161 def __getitem__(self, key):

162 return self.children[key]

163

164 def __str__(self):

165 output = "<%s" % self.name

166 for key, value in self.attributes:

167 if value is not None:

168 output += ' %s="%s"' % (key, value)

169 else:

170 output += " %s" % key

171 if self.children:

172 output += ">\n"

173 output += "".join(str(c) for c in self.children)

174 output += "\n</%s>" % self.name

175 else:

176 output += ">"

177 return output

178

179 def __repr__(self):

180 return str(self)

181

182

183class RootElement(Element):

184 def __init__(self):

185 super().__init__(None, ())

186

187 def __str__(self):

188 return "".join(str(c) for c in self.children)

189

190

191class HTMLParseError(Exception):

192 pass

193

194

195class Parser(HTMLParser):

196 # https://html.spec.whatwg.org/#void-elements

197 SELF_CLOSING_TAGS = {

198 "area",

199 "base",

200 "br",

201 "col",

202 "embed",

203 "hr",

204 "img",

205 "input",

206 "link",

207 "meta",

208 "param",

209 "source",

210 "track",

211 "wbr",

212 # Deprecated tags

213 "frame",

214 "spacer",

215 }

216

217 def __init__(self):

218 super().__init__()

219 self.root = RootElement()

220 self.open_tags = []

221 self.element_positions = {}

222

223 def error(self, msg):

224 raise HTMLParseError(msg, self.getpos())

225

226 def format_position(self, position=None, element=None):

227 if not position and element:

228 position = self.element_positions[element]

229 if position is None:

230 position = self.getpos()

231 if hasattr(position, "lineno"):

232 position = position.lineno, position.offset

233 return "Line %d, Column %d" % position

234

235 @property

236 def current(self):

237 if self.open_tags:

238 return self.open_tags[-1]

239 else:

240 return self.root

241

242 def handle_startendtag(self, tag, attrs):

243 self.handle_starttag(tag, attrs)

244 if tag not in self.SELF_CLOSING_TAGS:

245 self.handle_endtag(tag)

246

247 def handle_starttag(self, tag, attrs):

248 attrs = normalize_attributes(attrs)

249 element = Element(tag, attrs)

250 self.current.append(element)

251 if tag not in self.SELF_CLOSING_TAGS:

252 self.open_tags.append(element)

253 self.element_positions[element] = self.getpos()

254

255 def handle_endtag(self, tag):

256 if not self.open_tags:

257 self.error("Unexpected end tag `%s` (%s)" % (tag, self.format_position()))

258 element = self.open_tags.pop()

259 while element.name != tag:

260 if not self.open_tags:

261 self.error(

262 "Unexpected end tag `%s` (%s)" % (tag, self.format_position())

263 )

264 element = self.open_tags.pop()

265

266 def handle_data(self, data):

267 self.current.append(data)

268

269

270def parse_html(html):

271 """

272 Take a string that contains HTML and turn it into a Python object structure

273 that can be easily compared against other HTML on semantic equivalence.

274 Syntactical differences like which quotation is used on arguments will be

275 ignored.

276 """

277 parser = Parser()

278 parser.feed(html)

279 parser.close()

280 document = parser.root

281 document.finalize()

282 # Removing ROOT element if it's not necessary

283 if len(document.children) == 1 and not isinstance(document.children[0], str):

284 document = document.children[0]

285 return document