Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/interchange/utils.py: 67%

44 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" 

2Utility functions and objects for implementing the interchange API. 

3""" 

4 

5from __future__ import annotations 

6 

7import re 

8import typing 

9 

10import numpy as np 

11 

12from pandas._typing import DtypeObj 

13 

14import pandas as pd 

15from pandas.api.types import is_datetime64_dtype 

16 

17 

18class ArrowCTypes: 

19 """ 

20 Enum for Apache Arrow C type format strings. 

21 

22 The Arrow C data interface: 

23 https://arrow.apache.org/docs/format/CDataInterface.html#data-type-description-format-strings 

24 """ 

25 

26 NULL = "n" 

27 BOOL = "b" 

28 INT8 = "c" 

29 UINT8 = "C" 

30 INT16 = "s" 

31 UINT16 = "S" 

32 INT32 = "i" 

33 UINT32 = "I" 

34 INT64 = "l" 

35 UINT64 = "L" 

36 FLOAT16 = "e" 

37 FLOAT32 = "f" 

38 FLOAT64 = "g" 

39 STRING = "u" # utf-8 

40 DATE32 = "tdD" 

41 DATE64 = "tdm" 

42 # Resoulution: 

43 # - seconds -> 's' 

44 # - milliseconds -> 'm' 

45 # - microseconds -> 'u' 

46 # - nanoseconds -> 'n' 

47 TIMESTAMP = "ts{resolution}:{tz}" 

48 TIME = "tt{resolution}" 

49 

50 

51class Endianness: 

52 """Enum indicating the byte-order of a data-type.""" 

53 

54 LITTLE = "<" 

55 BIG = ">" 

56 NATIVE = "=" 

57 NA = "|" 

58 

59 

60def dtype_to_arrow_c_fmt(dtype: DtypeObj) -> str: 

61 """ 

62 Represent pandas `dtype` as a format string in Apache Arrow C notation. 

63 

64 Parameters 

65 ---------- 

66 dtype : np.dtype 

67 Datatype of pandas DataFrame to represent. 

68 

69 Returns 

70 ------- 

71 str 

72 Format string in Apache Arrow C notation of the given `dtype`. 

73 """ 

74 if isinstance(dtype, pd.CategoricalDtype): 

75 return ArrowCTypes.INT64 

76 elif dtype == np.dtype("O"): 

77 return ArrowCTypes.STRING 

78 

79 format_str = getattr(ArrowCTypes, dtype.name.upper(), None) 

80 if format_str is not None: 

81 return format_str 

82 

83 if is_datetime64_dtype(dtype): 

84 # Selecting the first char of resolution string: 

85 # dtype.str -> '<M8[ns]' 

86 resolution = re.findall(r"\[(.*)\]", typing.cast(np.dtype, dtype).str)[0][:1] 

87 return ArrowCTypes.TIMESTAMP.format(resolution=resolution, tz="") 

88 

89 raise NotImplementedError( 

90 f"Conversion of {dtype} to Arrow C format string is not implemented." 

91 ) 

92 

93 

94class NoBufferPresent(Exception): 

95 """Exception to signal that there is no requested buffer."""