Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/charset_normalizer/legacy.py: 54%
40 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1import warnings
2from typing import Dict, Optional, Union
4from .api import from_bytes, from_fp, from_path, normalize
5from .constant import CHARDET_CORRESPONDENCE
6from .models import CharsetMatch, CharsetMatches
9def detect(byte_str: bytes) -> Dict[str, Optional[Union[str, float]]]:
10 """
11 chardet legacy method
12 Detect the encoding of the given byte string. It should be mostly backward-compatible.
13 Encoding name will match Chardet own writing whenever possible. (Not on encoding name unsupported by it)
14 This function is deprecated and should be used to migrate your project easily, consult the documentation for
15 further information. Not planned for removal.
17 :param byte_str: The byte sequence to examine.
18 """
19 if not isinstance(byte_str, (bytearray, bytes)):
20 raise TypeError( # pragma: nocover
21 "Expected object of type bytes or bytearray, got: "
22 "{0}".format(type(byte_str))
23 )
25 if isinstance(byte_str, bytearray):
26 byte_str = bytes(byte_str)
28 r = from_bytes(byte_str).best()
30 encoding = r.encoding if r is not None else None
31 language = r.language if r is not None and r.language != "Unknown" else ""
32 confidence = 1.0 - r.chaos if r is not None else None
34 # Note: CharsetNormalizer does not return 'UTF-8-SIG' as the sig get stripped in the detection/normalization process
35 # but chardet does return 'utf-8-sig' and it is a valid codec name.
36 if r is not None and encoding == "utf_8" and r.bom:
37 encoding += "_sig"
39 return {
40 "encoding": encoding
41 if encoding not in CHARDET_CORRESPONDENCE
42 else CHARDET_CORRESPONDENCE[encoding],
43 "language": language,
44 "confidence": confidence,
45 }
48class CharsetNormalizerMatch(CharsetMatch):
49 pass
52class CharsetNormalizerMatches(CharsetMatches):
53 @staticmethod
54 def from_fp(*args, **kwargs): # type: ignore
55 warnings.warn( # pragma: nocover
56 "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
57 "and scheduled to be removed in 3.0",
58 DeprecationWarning,
59 )
60 return from_fp(*args, **kwargs) # pragma: nocover
62 @staticmethod
63 def from_bytes(*args, **kwargs): # type: ignore
64 warnings.warn( # pragma: nocover
65 "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
66 "and scheduled to be removed in 3.0",
67 DeprecationWarning,
68 )
69 return from_bytes(*args, **kwargs) # pragma: nocover
71 @staticmethod
72 def from_path(*args, **kwargs): # type: ignore
73 warnings.warn( # pragma: nocover
74 "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
75 "and scheduled to be removed in 3.0",
76 DeprecationWarning,
77 )
78 return from_path(*args, **kwargs) # pragma: nocover
80 @staticmethod
81 def normalize(*args, **kwargs): # type: ignore
82 warnings.warn( # pragma: nocover
83 "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "
84 "and scheduled to be removed in 3.0",
85 DeprecationWarning,
86 )
87 return normalize(*args, **kwargs) # pragma: nocover
90class CharsetDetector(CharsetNormalizerMatches):
91 pass
94class CharsetDoctor(CharsetNormalizerMatches):
95 pass