Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/django/utils/text.py: 24%
210 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1import html.entities
2import re
3import unicodedata
4from gzip import GzipFile
5from gzip import compress as gzip_compress
6from io import BytesIO
8from django.core.exceptions import SuspiciousFileOperation
9from django.utils.functional import SimpleLazyObject, keep_lazy_text, lazy
10from django.utils.regex_helper import _lazy_re_compile
11from django.utils.translation import gettext as _
12from django.utils.translation import gettext_lazy, pgettext
15@keep_lazy_text
16def capfirst(x):
17 """Capitalize the first letter of a string."""
18 if not x: 18 ↛ 19line 18 didn't jump to line 19, because the condition on line 18 was never true
19 return x
20 if not isinstance(x, str): 20 ↛ 21line 20 didn't jump to line 21, because the condition on line 20 was never true
21 x = str(x)
22 return x[0].upper() + x[1:]
25# Set up regular expressions
26re_words = _lazy_re_compile(r"<[^>]+?>|([^<>\s]+)", re.S)
27re_chars = _lazy_re_compile(r"<[^>]+?>|(.)", re.S)
28re_tag = _lazy_re_compile(r"<(/)?(\S+?)(?:(\s*/)|\s.*?)?>", re.S)
29re_newlines = _lazy_re_compile(r"\r\n|\r") # Used in normalize_newlines
30re_camel_case = _lazy_re_compile(r"(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))")
33@keep_lazy_text
34def wrap(text, width):
35 """
36 A word-wrap function that preserves existing line breaks. Expects that
37 existing line breaks are posix newlines.
39 Preserve all white space except added line breaks consume the space on
40 which they break the line.
42 Don't wrap long words, thus the output text may have lines longer than
43 ``width``.
44 """
46 def _generator():
47 for line in text.splitlines(True): # True keeps trailing linebreaks
48 max_width = min((line.endswith("\n") and width + 1 or width), width)
49 while len(line) > max_width:
50 space = line[: max_width + 1].rfind(" ") + 1
51 if space == 0:
52 space = line.find(" ") + 1
53 if space == 0:
54 yield line
55 line = ""
56 break
57 yield "%s\n" % line[: space - 1]
58 line = line[space:]
59 max_width = min((line.endswith("\n") and width + 1 or width), width)
60 if line:
61 yield line
63 return "".join(_generator())
66class Truncator(SimpleLazyObject):
67 """
68 An object used to truncate text, either by characters or words.
69 """
71 def __init__(self, text):
72 super().__init__(lambda: str(text))
74 def add_truncation_text(self, text, truncate=None):
75 if truncate is None:
76 truncate = pgettext(
77 "String to return when truncating text", "%(truncated_text)s…"
78 )
79 if "%(truncated_text)s" in truncate:
80 return truncate % {"truncated_text": text}
81 # The truncation text didn't contain the %(truncated_text)s string
82 # replacement argument so just append it to the text.
83 if text.endswith(truncate):
84 # But don't append the truncation text if the current text already
85 # ends in this.
86 return text
87 return "%s%s" % (text, truncate)
89 def chars(self, num, truncate=None, html=False):
90 """
91 Return the text truncated to be no longer than the specified number
92 of characters.
94 `truncate` specifies what should be used to notify that the string has
95 been truncated, defaulting to a translatable string of an ellipsis.
96 """
97 self._setup()
98 length = int(num)
99 text = unicodedata.normalize("NFC", self._wrapped)
101 # Calculate the length to truncate to (max length - end_text length)
102 truncate_len = length
103 for char in self.add_truncation_text("", truncate):
104 if not unicodedata.combining(char):
105 truncate_len -= 1
106 if truncate_len == 0:
107 break
108 if html:
109 return self._truncate_html(length, truncate, text, truncate_len, False)
110 return self._text_chars(length, truncate, text, truncate_len)
112 def _text_chars(self, length, truncate, text, truncate_len):
113 """Truncate a string after a certain number of chars."""
114 s_len = 0
115 end_index = None
116 for i, char in enumerate(text):
117 if unicodedata.combining(char):
118 # Don't consider combining characters
119 # as adding to the string length
120 continue
121 s_len += 1
122 if end_index is None and s_len > truncate_len:
123 end_index = i
124 if s_len > length:
125 # Return the truncated string
126 return self.add_truncation_text(text[: end_index or 0], truncate)
128 # Return the original string since no truncation was necessary
129 return text
131 def words(self, num, truncate=None, html=False):
132 """
133 Truncate a string after a certain number of words. `truncate` specifies
134 what should be used to notify that the string has been truncated,
135 defaulting to ellipsis.
136 """
137 self._setup()
138 length = int(num)
139 if html:
140 return self._truncate_html(length, truncate, self._wrapped, length, True)
141 return self._text_words(length, truncate)
143 def _text_words(self, length, truncate):
144 """
145 Truncate a string after a certain number of words.
147 Strip newlines in the string.
148 """
149 words = self._wrapped.split()
150 if len(words) > length:
151 words = words[:length]
152 return self.add_truncation_text(" ".join(words), truncate)
153 return " ".join(words)
155 def _truncate_html(self, length, truncate, text, truncate_len, words):
156 """
157 Truncate HTML to a certain number of chars (not counting tags and
158 comments), or, if words is True, then to a certain number of words.
159 Close opened tags if they were correctly closed in the given HTML.
161 Preserve newlines in the HTML.
162 """
163 if words and length <= 0:
164 return ""
166 html4_singlets = (
167 "br",
168 "col",
169 "link",
170 "base",
171 "img",
172 "param",
173 "area",
174 "hr",
175 "input",
176 )
178 # Count non-HTML chars/words and keep note of open tags
179 pos = 0
180 end_text_pos = 0
181 current_len = 0
182 open_tags = []
184 regex = re_words if words else re_chars
186 while current_len <= length:
187 m = regex.search(text, pos)
188 if not m:
189 # Checked through whole string
190 break
191 pos = m.end(0)
192 if m[1]:
193 # It's an actual non-HTML word or char
194 current_len += 1
195 if current_len == truncate_len:
196 end_text_pos = pos
197 continue
198 # Check for tag
199 tag = re_tag.match(m[0])
200 if not tag or current_len >= truncate_len:
201 # Don't worry about non tags or tags after our truncate point
202 continue
203 closing_tag, tagname, self_closing = tag.groups()
204 # Element names are always case-insensitive
205 tagname = tagname.lower()
206 if self_closing or tagname in html4_singlets:
207 pass
208 elif closing_tag:
209 # Check for match in open tags list
210 try:
211 i = open_tags.index(tagname)
212 except ValueError:
213 pass
214 else:
215 # SGML: An end tag closes, back to the matching start tag,
216 # all unclosed intervening start tags with omitted end tags
217 open_tags = open_tags[i + 1 :]
218 else:
219 # Add it to the start of the open tags list
220 open_tags.insert(0, tagname)
222 if current_len <= length:
223 return text
224 out = text[:end_text_pos]
225 truncate_text = self.add_truncation_text("", truncate)
226 if truncate_text:
227 out += truncate_text
228 # Close any tags still open
229 for tag in open_tags:
230 out += "</%s>" % tag
231 # Return string
232 return out
235@keep_lazy_text
236def get_valid_filename(name):
237 """
238 Return the given string converted to a string that can be used for a clean
239 filename. Remove leading and trailing spaces; convert other spaces to
240 underscores; and remove anything that is not an alphanumeric, dash,
241 underscore, or dot.
242 >>> get_valid_filename("john's portrait in 2004.jpg")
243 'johns_portrait_in_2004.jpg'
244 """
245 s = str(name).strip().replace(" ", "_")
246 s = re.sub(r"(?u)[^-\w.]", "", s)
247 if s in {"", ".", ".."}: 247 ↛ 248line 247 didn't jump to line 248, because the condition on line 247 was never true
248 raise SuspiciousFileOperation("Could not derive file name from '%s'" % name)
249 return s
252@keep_lazy_text
253def get_text_list(list_, last_word=gettext_lazy("or")):
254 """
255 >>> get_text_list(['a', 'b', 'c', 'd'])
256 'a, b, c or d'
257 >>> get_text_list(['a', 'b', 'c'], 'and')
258 'a, b and c'
259 >>> get_text_list(['a', 'b'], 'and')
260 'a and b'
261 >>> get_text_list(['a'])
262 'a'
263 >>> get_text_list([])
264 ''
265 """
266 if not list_:
267 return ""
268 if len(list_) == 1:
269 return str(list_[0])
270 return "%s %s %s" % (
271 # Translators: This string is used as a separator between list elements
272 _(", ").join(str(i) for i in list_[:-1]),
273 str(last_word),
274 str(list_[-1]),
275 )
278@keep_lazy_text
279def normalize_newlines(text):
280 """Normalize CRLF and CR newlines to just LF."""
281 return re_newlines.sub("\n", str(text))
284@keep_lazy_text
285def phone2numeric(phone):
286 """Convert a phone number with letters into its numeric equivalent."""
287 char2number = {
288 "a": "2",
289 "b": "2",
290 "c": "2",
291 "d": "3",
292 "e": "3",
293 "f": "3",
294 "g": "4",
295 "h": "4",
296 "i": "4",
297 "j": "5",
298 "k": "5",
299 "l": "5",
300 "m": "6",
301 "n": "6",
302 "o": "6",
303 "p": "7",
304 "q": "7",
305 "r": "7",
306 "s": "7",
307 "t": "8",
308 "u": "8",
309 "v": "8",
310 "w": "9",
311 "x": "9",
312 "y": "9",
313 "z": "9",
314 }
315 return "".join(char2number.get(c, c) for c in phone.lower())
318def compress_string(s):
319 return gzip_compress(s, compresslevel=6, mtime=0)
322class StreamingBuffer(BytesIO):
323 def read(self):
324 ret = self.getvalue()
325 self.seek(0)
326 self.truncate()
327 return ret
330# Like compress_string, but for iterators of strings.
331def compress_sequence(sequence):
332 buf = StreamingBuffer()
333 with GzipFile(mode="wb", compresslevel=6, fileobj=buf, mtime=0) as zfile:
334 # Output headers...
335 yield buf.read()
336 for item in sequence:
337 zfile.write(item)
338 data = buf.read()
339 if data:
340 yield data
341 yield buf.read()
344# Expression to match some_token and some_token="with spaces" (and similarly
345# for single-quoted strings).
346smart_split_re = _lazy_re_compile(
347 r"""
348 ((?:
349 [^\s'"]*
350 (?:
351 (?:"(?:[^"\\]|\\.)*" | '(?:[^'\\]|\\.)*')
352 [^\s'"]*
353 )+
354 ) | \S+)
355""",
356 re.VERBOSE,
357)
360def smart_split(text):
361 r"""
362 Generator that splits a string by spaces, leaving quoted phrases together.
363 Supports both single and double quotes, and supports escaping quotes with
364 backslashes. In the output, strings will keep their initial and trailing
365 quote marks and escaped quotes will remain escaped (the results can then
366 be further processed with unescape_string_literal()).
368 >>> list(smart_split(r'This is "a person\'s" test.'))
369 ['This', 'is', '"a person\\\'s"', 'test.']
370 >>> list(smart_split(r"Another 'person\'s' test."))
371 ['Another', "'person\\'s'", 'test.']
372 >>> list(smart_split(r'A "\"funky\" style" test.'))
373 ['A', '"\\"funky\\" style"', 'test.']
374 """
375 for bit in smart_split_re.finditer(str(text)):
376 yield bit[0]
379def _replace_entity(match):
380 text = match[1]
381 if text[0] == "#":
382 text = text[1:]
383 try:
384 if text[0] in "xX":
385 c = int(text[1:], 16)
386 else:
387 c = int(text)
388 return chr(c)
389 except ValueError:
390 return match[0]
391 else:
392 try:
393 return chr(html.entities.name2codepoint[text])
394 except KeyError:
395 return match[0]
398_entity_re = _lazy_re_compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));")
401@keep_lazy_text
402def unescape_string_literal(s):
403 r"""
404 Convert quoted string literals to unquoted strings with escaped quotes and
405 backslashes unquoted::
407 >>> unescape_string_literal('"abc"')
408 'abc'
409 >>> unescape_string_literal("'abc'")
410 'abc'
411 >>> unescape_string_literal('"a \"bc\""')
412 'a "bc"'
413 >>> unescape_string_literal("'\'ab\' c'")
414 "'ab' c"
415 """
416 if s[0] not in "\"'" or s[-1] != s[0]:
417 raise ValueError("Not a string literal: %r" % s)
418 quote = s[0]
419 return s[1:-1].replace(r"\%s" % quote, quote).replace(r"\\", "\\")
422@keep_lazy_text
423def slugify(value, allow_unicode=False):
424 """
425 Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
426 dashes to single dashes. Remove characters that aren't alphanumerics,
427 underscores, or hyphens. Convert to lowercase. Also strip leading and
428 trailing whitespace, dashes, and underscores.
429 """
430 value = str(value)
431 if allow_unicode: 431 ↛ 432line 431 didn't jump to line 432, because the condition on line 431 was never true
432 value = unicodedata.normalize("NFKC", value)
433 else:
434 value = (
435 unicodedata.normalize("NFKD", value)
436 .encode("ascii", "ignore")
437 .decode("ascii")
438 )
439 value = re.sub(r"[^\w\s-]", "", value.lower())
440 return re.sub(r"[-\s]+", "-", value).strip("-_")
443def camel_case_to_spaces(value):
444 """
445 Split CamelCase and convert to lowercase. Strip surrounding whitespace.
446 """
447 return re_camel_case.sub(r" \1", value).strip().lower()
450def _format_lazy(format_string, *args, **kwargs):
451 """
452 Apply str.format() on 'format_string' where format_string, args,
453 and/or kwargs might be lazy.
454 """
455 return format_string.format(*args, **kwargs)
458format_lazy = lazy(_format_lazy, str)