Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/charset_normalizer/legacy.py: 54%

40 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1import warnings 

2from typing import Dict, Optional, Union 

3 

4from .api import from_bytes, from_fp, from_path, normalize 

5from .constant import CHARDET_CORRESPONDENCE 

6from .models import CharsetMatch, CharsetMatches 

7 

8 

9def detect(byte_str: bytes) -> Dict[str, Optional[Union[str, float]]]: 

10 """ 

11 chardet legacy method 

12 Detect the encoding of the given byte string. It should be mostly backward-compatible. 

13 Encoding name will match Chardet own writing whenever possible. (Not on encoding name unsupported by it) 

14 This function is deprecated and should be used to migrate your project easily, consult the documentation for 

15 further information. Not planned for removal. 

16 

17 :param byte_str: The byte sequence to examine. 

18 """ 

19 if not isinstance(byte_str, (bytearray, bytes)): 

20 raise TypeError( # pragma: nocover 

21 "Expected object of type bytes or bytearray, got: " 

22 "{0}".format(type(byte_str)) 

23 ) 

24 

25 if isinstance(byte_str, bytearray): 

26 byte_str = bytes(byte_str) 

27 

28 r = from_bytes(byte_str).best() 

29 

30 encoding = r.encoding if r is not None else None 

31 language = r.language if r is not None and r.language != "Unknown" else "" 

32 confidence = 1.0 - r.chaos if r is not None else None 

33 

34 # Note: CharsetNormalizer does not return 'UTF-8-SIG' as the sig get stripped in the detection/normalization process 

35 # but chardet does return 'utf-8-sig' and it is a valid codec name. 

36 if r is not None and encoding == "utf_8" and r.bom: 

37 encoding += "_sig" 

38 

39 return { 

40 "encoding": encoding 

41 if encoding not in CHARDET_CORRESPONDENCE 

42 else CHARDET_CORRESPONDENCE[encoding], 

43 "language": language, 

44 "confidence": confidence, 

45 } 

46 

47 

48class CharsetNormalizerMatch(CharsetMatch): 

49 pass 

50 

51 

52class CharsetNormalizerMatches(CharsetMatches): 

53 @staticmethod 

54 def from_fp(*args, **kwargs): # type: ignore 

55 warnings.warn( # pragma: nocover 

56 "staticmethod from_fp, from_bytes, from_path and normalize are deprecated " 

57 "and scheduled to be removed in 3.0", 

58 DeprecationWarning, 

59 ) 

60 return from_fp(*args, **kwargs) # pragma: nocover 

61 

62 @staticmethod 

63 def from_bytes(*args, **kwargs): # type: ignore 

64 warnings.warn( # pragma: nocover 

65 "staticmethod from_fp, from_bytes, from_path and normalize are deprecated " 

66 "and scheduled to be removed in 3.0", 

67 DeprecationWarning, 

68 ) 

69 return from_bytes(*args, **kwargs) # pragma: nocover 

70 

71 @staticmethod 

72 def from_path(*args, **kwargs): # type: ignore 

73 warnings.warn( # pragma: nocover 

74 "staticmethod from_fp, from_bytes, from_path and normalize are deprecated " 

75 "and scheduled to be removed in 3.0", 

76 DeprecationWarning, 

77 ) 

78 return from_path(*args, **kwargs) # pragma: nocover 

79 

80 @staticmethod 

81 def normalize(*args, **kwargs): # type: ignore 

82 warnings.warn( # pragma: nocover 

83 "staticmethod from_fp, from_bytes, from_path and normalize are deprecated " 

84 "and scheduled to be removed in 3.0", 

85 DeprecationWarning, 

86 ) 

87 return normalize(*args, **kwargs) # pragma: nocover 

88 

89 

90class CharsetDetector(CharsetNormalizerMatches): 

91 pass 

92 

93 

94class CharsetDoctor(CharsetNormalizerMatches): 

95 pass