Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/charset

1import warnings

2from typing import Dict, Optional, Union

4from .api import from_bytes, from_fp, from_path, normalize

5from .constant import CHARDET_CORRESPONDENCE

6from .models import CharsetMatch, CharsetMatches

9def detect(byte_str: bytes) -> Dict[str, Optional[Union[str, float]]]:

10 """

11 chardet legacy method

12 Detect the encoding of the given byte string. It should be mostly backward-compatible.

13 Encoding name will match Chardet own writing whenever possible. (Not on encoding name unsupported by it)

14 This function is deprecated and should be used to migrate your project easily, consult the documentation for

15 further information. Not planned for removal.

17 :param byte_str: The byte sequence to examine.

18 """

19 if not isinstance(byte_str, (bytearray, bytes)):

20 raise TypeError( # pragma: nocover

21 "Expected object of type bytes or bytearray, got: "

22 "{0}".format(type(byte_str))

23 )

25 if isinstance(byte_str, bytearray):

26 byte_str = bytes(byte_str)

28 r = from_bytes(byte_str).best()

30 encoding = r.encoding if r is not None else None

31 language = r.language if r is not None and r.language != "Unknown" else ""

32 confidence = 1.0 - r.chaos if r is not None else None

34 # Note: CharsetNormalizer does not return 'UTF-8-SIG' as the sig get stripped in the detection/normalization process

35 # but chardet does return 'utf-8-sig' and it is a valid codec name.

36 if r is not None and encoding == "utf_8" and r.bom:

37 encoding += "_sig"

39 return {

40 "encoding": encoding

41 if encoding not in CHARDET_CORRESPONDENCE

42 else CHARDET_CORRESPONDENCE[encoding],

43 "language": language,

44 "confidence": confidence,

45 }

48class CharsetNormalizerMatch(CharsetMatch):

49 pass

52class CharsetNormalizerMatches(CharsetMatches):

53 @staticmethod

54 def from_fp(*args, **kwargs): # type: ignore

55 warnings.warn( # pragma: nocover

56 "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "

57 "and scheduled to be removed in 3.0",

58 DeprecationWarning,

59 )

60 return from_fp(*args, **kwargs) # pragma: nocover

62 @staticmethod

63 def from_bytes(*args, **kwargs): # type: ignore

64 warnings.warn( # pragma: nocover

65 "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "

66 "and scheduled to be removed in 3.0",

67 DeprecationWarning,

68 )

69 return from_bytes(*args, **kwargs) # pragma: nocover

71 @staticmethod

72 def from_path(*args, **kwargs): # type: ignore

73 warnings.warn( # pragma: nocover

74 "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "

75 "and scheduled to be removed in 3.0",

76 DeprecationWarning,

77 )

78 return from_path(*args, **kwargs) # pragma: nocover

80 @staticmethod

81 def normalize(*args, **kwargs): # type: ignore

82 warnings.warn( # pragma: nocover

83 "staticmethod from_fp, from_bytes, from_path and normalize are deprecated "

84 "and scheduled to be removed in 3.0",

85 DeprecationWarning,

86 )

87 return normalize(*args, **kwargs) # pragma: nocover

90class CharsetDetector(CharsetNormalizerMatches):

91 pass

94class CharsetDoctor(CharsetNormalizerMatches):

95 pass

Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/charset_normalizer/legacy.py: 54%

40 statements