Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/faker/utils/distribution.py: 29%

56 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1import bisect 

2import itertools 

3 

4from random import Random 

5from typing import Generator, Iterable, Optional, Sequence, TypeVar 

6 

7from faker.generator import random as mod_random 

8 

9 

10def random_sample(random: Optional[Random] = None) -> float: 

11 if random is None: 

12 random = mod_random 

13 return random.uniform(0.0, 1.0) 

14 

15 

16def cumsum(it: Iterable[float]) -> Generator[float, None, None]: 

17 total: float = 0 

18 for x in it: 

19 total += x 

20 yield total 

21 

22 

23T = TypeVar("T") 

24 

25 

26def choices_distribution_unique( 

27 a: Sequence[T], 

28 p: Optional[Sequence[float]], 

29 random: Optional[Random] = None, 

30 length: int = 1, 

31) -> Sequence[T]: 

32 # As of Python 3.7, there isn't a way to sample unique elements that takes 

33 # weight into account. 

34 if random is None: 

35 random = mod_random 

36 

37 assert p is not None 

38 assert len(a) == len(p) 

39 assert len(a) >= length, "You can't request more unique samples than elements in the dataset." 

40 

41 choices = [] 

42 items = list(a) 

43 probabilities = list(p) 

44 for i in range(length): 

45 cdf = tuple(cumsum(probabilities)) 

46 normal = cdf[-1] 

47 cdf2 = [i / normal for i in cdf] 

48 uniform_sample = random_sample(random=random) 

49 idx = bisect.bisect_right(cdf2, uniform_sample) 

50 item = items[idx] 

51 choices.append(item) 

52 probabilities.pop(idx) 

53 items.pop(idx) 

54 return choices 

55 

56 

57def choices_distribution( 

58 a: Sequence[T], 

59 p: Optional[Sequence[float]], 

60 random: Optional[Random] = None, 

61 length: int = 1, 

62) -> Sequence[T]: 

63 if random is None: 63 ↛ 64line 63 didn't jump to line 64, because the condition on line 63 was never true

64 random = mod_random 

65 

66 if p is not None: 

67 assert len(a) == len(p) 

68 

69 if hasattr(random, "choices"): 69 ↛ 75line 69 didn't jump to line 75, because the condition on line 69 was never false

70 if length == 1 and p is None: 

71 return [random.choice(a)] 

72 else: 

73 return random.choices(a, weights=p, k=length) 

74 else: 

75 choices = [] 

76 

77 if p is None: 

78 p = itertools.repeat(1, len(a)) # type: ignore 

79 

80 cdf = list(cumsum(p)) # type: ignore 

81 normal = cdf[-1] 

82 cdf2 = [i / normal for i in cdf] 

83 for i in range(length): 

84 uniform_sample = random_sample(random=random) 

85 idx = bisect.bisect_right(cdf2, uniform_sample) 

86 item = a[idx] 

87 choices.append(item) 

88 return choices