Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/faker/utils/distribution.py: 29%
56 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1import bisect
2import itertools
4from random import Random
5from typing import Generator, Iterable, Optional, Sequence, TypeVar
7from faker.generator import random as mod_random
10def random_sample(random: Optional[Random] = None) -> float:
11 if random is None:
12 random = mod_random
13 return random.uniform(0.0, 1.0)
16def cumsum(it: Iterable[float]) -> Generator[float, None, None]:
17 total: float = 0
18 for x in it:
19 total += x
20 yield total
23T = TypeVar("T")
26def choices_distribution_unique(
27 a: Sequence[T],
28 p: Optional[Sequence[float]],
29 random: Optional[Random] = None,
30 length: int = 1,
31) -> Sequence[T]:
32 # As of Python 3.7, there isn't a way to sample unique elements that takes
33 # weight into account.
34 if random is None:
35 random = mod_random
37 assert p is not None
38 assert len(a) == len(p)
39 assert len(a) >= length, "You can't request more unique samples than elements in the dataset."
41 choices = []
42 items = list(a)
43 probabilities = list(p)
44 for i in range(length):
45 cdf = tuple(cumsum(probabilities))
46 normal = cdf[-1]
47 cdf2 = [i / normal for i in cdf]
48 uniform_sample = random_sample(random=random)
49 idx = bisect.bisect_right(cdf2, uniform_sample)
50 item = items[idx]
51 choices.append(item)
52 probabilities.pop(idx)
53 items.pop(idx)
54 return choices
57def choices_distribution(
58 a: Sequence[T],
59 p: Optional[Sequence[float]],
60 random: Optional[Random] = None,
61 length: int = 1,
62) -> Sequence[T]:
63 if random is None: 63 ↛ 64line 63 didn't jump to line 64, because the condition on line 63 was never true
64 random = mod_random
66 if p is not None:
67 assert len(a) == len(p)
69 if hasattr(random, "choices"): 69 ↛ 75line 69 didn't jump to line 75, because the condition on line 69 was never false
70 if length == 1 and p is None:
71 return [random.choice(a)]
72 else:
73 return random.choices(a, weights=p, k=length)
74 else:
75 choices = []
77 if p is None:
78 p = itertools.repeat(1, len(a)) # type: ignore
80 cdf = list(cumsum(p)) # type: ignore
81 normal = cdf[-1]
82 cdf2 = [i / normal for i in cdf]
83 for i in range(length):
84 uniform_sample = random_sample(random=random)
85 idx = bisect.bisect_right(cdf2, uniform_sample)
86 item = a[idx]
87 choices.append(item)
88 return choices