Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/strings/object

1from __future__ import annotations

3from collections.abc import Callable # noqa: PDF001

4import re

5import textwrap

6from typing import TYPE_CHECKING

7import unicodedata

9import numpy as np

11import pandas._libs.lib as lib

12import pandas._libs.missing as libmissing

13import pandas._libs.ops as libops

14from pandas._typing import (

15 NpDtype,

16 Scalar,

17)

19from pandas.core.dtypes.common import is_scalar

20from pandas.core.dtypes.missing import isna

22from pandas.core.strings.base import BaseStringArrayMethods

24if TYPE_CHECKING: 24 ↛ 25line 24 didn't jump to line 25, because the condition on line 24 was never true

25 from pandas import Series

28class ObjectStringArrayMixin(BaseStringArrayMethods):

29 """

30 String Methods operating on object-dtype ndarrays.

31 """

33 _str_na_value = np.nan

35 def __len__(self):

36 # For typing, _str_map relies on the object being sized.

37 raise NotImplementedError

39 def _str_map(

40 self, f, na_value=None, dtype: NpDtype | None = None, convert: bool = True

41 ):

42 """

43 Map a callable over valid elements of the array.

45 Parameters

46 ----------

47 f : Callable

48 A function to call on each non-NA element.

49 na_value : Scalar, optional

50 The value to set for NA values. Might also be used for the

51 fill value if the callable `f` raises an exception.

52 This defaults to ``self._str_na_value`` which is ``np.nan``

53 for object-dtype and Categorical and ``pd.NA`` for StringArray.

54 dtype : Dtype, optional

55 The dtype of the result array.

56 convert : bool, default True

57 Whether to call `maybe_convert_objects` on the resulting ndarray

58 """

59 if dtype is None:

60 dtype = np.dtype("object")

61 if na_value is None:

62 na_value = self._str_na_value

64 if not len(self):

65 return np.array([], dtype=dtype)

67 arr = np.asarray(self, dtype=object)

68 mask = isna(arr)

69 map_convert = convert and not np.all(mask)

70 try:

71 result = lib.map_infer_mask(arr, f, mask.view(np.uint8), map_convert)

72 except (TypeError, AttributeError) as err:

73 # Reraise the exception if callable `f` got wrong number of args.

74 # The user may want to be warned by this, instead of getting NaN

75 p_err = (

76 r"((takes)|(missing)) (?(2)from \d+ to )?\d+ "

77 r"(?(3)required )positional arguments?"

78 )

80 if len(err.args) >= 1 and re.search(p_err, err.args[0]):

81 # FIXME: this should be totally avoidable

82 raise err

84 def g(x):

85 # This type of fallback behavior can be removed once

86 # we remove object-dtype .str accessor.

87 try:

88 return f(x)

89 except (TypeError, AttributeError):

90 return na_value

92 return self._str_map(g, na_value=na_value, dtype=dtype)

93 if not isinstance(result, np.ndarray):

94 return result

95 if na_value is not np.nan:

96 np.putmask(result, mask, na_value)

97 if convert and result.dtype == object:

98 result = lib.maybe_convert_objects(result)

99 return result

100

101 def _str_count(self, pat, flags=0):

102 regex = re.compile(pat, flags=flags)

103 f = lambda x: len(regex.findall(x))

104 return self._str_map(f, dtype="int64")

105

106 def _str_pad(self, width, side="left", fillchar=" "):

107 if side == "left":

108 f = lambda x: x.rjust(width, fillchar)

109 elif side == "right":

110 f = lambda x: x.ljust(width, fillchar)

111 elif side == "both":

112 f = lambda x: x.center(width, fillchar)

113 else: # pragma: no cover

114 raise ValueError("Invalid side")

115 return self._str_map(f)

116

117 def _str_contains(self, pat, case=True, flags=0, na=np.nan, regex: bool = True):

118 if regex:

119 if not case:

120 flags |= re.IGNORECASE

121

122 pat = re.compile(pat, flags=flags)

123

124 f = lambda x: pat.search(x) is not None

125 else:

126 if case:

127 f = lambda x: pat in x

128 else:

129 upper_pat = pat.upper()

130 f = lambda x: upper_pat in x.upper()

131 return self._str_map(f, na, dtype=np.dtype("bool"))

132

133 def _str_startswith(self, pat, na=None):

134 f = lambda x: x.startswith(pat)

135 return self._str_map(f, na_value=na, dtype=np.dtype(bool))

136

137 def _str_endswith(self, pat, na=None):

138 f = lambda x: x.endswith(pat)

139 return self._str_map(f, na_value=na, dtype=np.dtype(bool))

140

141 def _str_replace(

142 self,

143 pat: str | re.Pattern,

144 repl: str | Callable,

145 n: int = -1,

146 case: bool = True,

147 flags: int = 0,

148 regex: bool = True,

149 ):

150 if case is False:

151 # add case flag, if provided

152 flags |= re.IGNORECASE

153

154 if regex or flags or callable(repl):

155 if not isinstance(pat, re.Pattern):

156 if regex is False:

157 pat = re.escape(pat)

158 pat = re.compile(pat, flags=flags)

159

160 n = n if n >= 0 else 0

161 f = lambda x: pat.sub(repl=repl, string=x, count=n)

162 else:

163 f = lambda x: x.replace(pat, repl, n)

164

165 return self._str_map(f, dtype=str)

166

167 def _str_repeat(self, repeats):

168 if is_scalar(repeats):

169

170 def scalar_rep(x):

171 try:

172 return bytes.__mul__(x, repeats)

173 except TypeError:

174 return str.__mul__(x, repeats)

175

176 return self._str_map(scalar_rep, dtype=str)

177 else:

178 from pandas.core.arrays.string_ import BaseStringArray

179

180 def rep(x, r):

181 if x is libmissing.NA:

182 return x

183 try:

184 return bytes.__mul__(x, r)

185 except TypeError:

186 return str.__mul__(x, r)

187

188 repeats = np.asarray(repeats, dtype=object)

189 result = libops.vec_binop(np.asarray(self), repeats, rep)

190 if isinstance(self, BaseStringArray):

191 # Not going through map, so we have to do this here.

192 result = type(self)._from_sequence(result)

193 return result

194

195 def _str_match(

196 self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None

197 ):

198 if not case:

199 flags |= re.IGNORECASE

200

201 regex = re.compile(pat, flags=flags)

202

203 f = lambda x: regex.match(x) is not None

204 return self._str_map(f, na_value=na, dtype=np.dtype(bool))

205

206 def _str_fullmatch(

207 self,

208 pat: str | re.Pattern,

209 case: bool = True,

210 flags: int = 0,

211 na: Scalar | None = None,

212 ):

213 if not case:

214 flags |= re.IGNORECASE

215

216 regex = re.compile(pat, flags=flags)

217

218 f = lambda x: regex.fullmatch(x) is not None

219 return self._str_map(f, na_value=na, dtype=np.dtype(bool))

220

221 def _str_encode(self, encoding, errors="strict"):

222 f = lambda x: x.encode(encoding, errors=errors)

223 return self._str_map(f, dtype=object)

224

225 def _str_find(self, sub, start=0, end=None):

226 return self._str_find_(sub, start, end, side="left")

227

228 def _str_rfind(self, sub, start=0, end=None):

229 return self._str_find_(sub, start, end, side="right")

230

231 def _str_find_(self, sub, start, end, side):

232 if side == "left":

233 method = "find"

234 elif side == "right":

235 method = "rfind"

236 else: # pragma: no cover

237 raise ValueError("Invalid side")

238

239 if end is None:

240 f = lambda x: getattr(x, method)(sub, start)

241 else:

242 f = lambda x: getattr(x, method)(sub, start, end)

243 return self._str_map(f, dtype="int64")

244

245 def _str_findall(self, pat, flags=0):

246 regex = re.compile(pat, flags=flags)

247 return self._str_map(regex.findall, dtype="object")

248

249 def _str_get(self, i):

250 def f(x):

251 if isinstance(x, dict):

252 return x.get(i)

253 elif len(x) > i >= -len(x):

254 return x[i]

255 return self._str_na_value

256

257 return self._str_map(f)

258

259 def _str_index(self, sub, start=0, end=None):

260 if end:

261 f = lambda x: x.index(sub, start, end)

262 else:

263 f = lambda x: x.index(sub, start, end)

264 return self._str_map(f, dtype="int64")

265

266 def _str_rindex(self, sub, start=0, end=None):

267 if end:

268 f = lambda x: x.rindex(sub, start, end)

269 else:

270 f = lambda x: x.rindex(sub, start, end)

271 return self._str_map(f, dtype="int64")

272

273 def _str_join(self, sep):

274 return self._str_map(sep.join)

275

276 def _str_partition(self, sep, expand):

277 result = self._str_map(lambda x: x.partition(sep), dtype="object")

278 return result

279

280 def _str_rpartition(self, sep, expand):

281 return self._str_map(lambda x: x.rpartition(sep), dtype="object")

282

283 def _str_len(self):

284 return self._str_map(len, dtype="int64")

285

286 def _str_slice(self, start=None, stop=None, step=None):

287 obj = slice(start, stop, step)

288 return self._str_map(lambda x: x[obj])

289

290 def _str_slice_replace(self, start=None, stop=None, repl=None):

291 if repl is None:

292 repl = ""

293

294 def f(x):

295 if x[start:stop] == "":

296 local_stop = start

297 else:

298 local_stop = stop

299 y = ""

300 if start is not None:

301 y += x[:start]

302 y += repl

303 if stop is not None:

304 y += x[local_stop:]

305 return y

306

307 return self._str_map(f)

308

309 def _str_split(

310 self,

311 pat: str | re.Pattern | None = None,

312 n=-1,

313 expand=False,

314 regex: bool | None = None,

315 ):

316 if pat is None:

317 if n is None or n == 0:

318 n = -1

319 f = lambda x: x.split(pat, n)

320 else:

321 new_pat: str | re.Pattern

322 if regex is True or isinstance(pat, re.Pattern):

323 new_pat = re.compile(pat)

324 elif regex is False:

325 new_pat = pat

326 # regex is None so link to old behavior #43563

327 else:

328 if len(pat) == 1:

329 new_pat = pat

330 else:

331 new_pat = re.compile(pat)

332

333 if isinstance(new_pat, re.Pattern):

334 if n is None or n == -1:

335 n = 0

336 f = lambda x: new_pat.split(x, maxsplit=n)

337 else:

338 if n is None or n == 0:

339 n = -1

340 f = lambda x: x.split(pat, n)

341 return self._str_map(f, dtype=object)

342

343 def _str_rsplit(self, pat=None, n=-1):

344 if n is None or n == 0:

345 n = -1

346 f = lambda x: x.rsplit(pat, n)

347 return self._str_map(f, dtype="object")

348

349 def _str_translate(self, table):

350 return self._str_map(lambda x: x.translate(table))

351

352 def _str_wrap(self, width, **kwargs):

353 kwargs["width"] = width

354 tw = textwrap.TextWrapper(**kwargs)

355 return self._str_map(lambda s: "\n".join(tw.wrap(s)))

356

357 def _str_get_dummies(self, sep="|"):

358 from pandas import Series

359

360 arr = Series(self).fillna("")

361 try:

362 arr = sep + arr + sep

363 except (TypeError, NotImplementedError):

364 arr = sep + arr.astype(str) + sep

365

366 tags: set[str] = set()

367 for ts in Series(arr).str.split(sep):

368 tags.update(ts)

369 tags2 = sorted(tags - {""})

370

371 dummies = np.empty((len(arr), len(tags2)), dtype=np.int64)

372

373 for i, t in enumerate(tags2):

374 pat = sep + t + sep

375 dummies[:, i] = lib.map_infer(arr.to_numpy(), lambda x: pat in x)

376 return dummies, tags2

377

378 def _str_upper(self):

379 return self._str_map(lambda x: x.upper())

380

381 def _str_isalnum(self):

382 return self._str_map(str.isalnum, dtype="bool")

383

384 def _str_isalpha(self):

385 return self._str_map(str.isalpha, dtype="bool")

386

387 def _str_isdecimal(self):

388 return self._str_map(str.isdecimal, dtype="bool")

389

390 def _str_isdigit(self):

391 return self._str_map(str.isdigit, dtype="bool")

392

393 def _str_islower(self):

394 return self._str_map(str.islower, dtype="bool")

395

396 def _str_isnumeric(self):

397 return self._str_map(str.isnumeric, dtype="bool")

398

399 def _str_isspace(self):

400 return self._str_map(str.isspace, dtype="bool")

401

402 def _str_istitle(self):

403 return self._str_map(str.istitle, dtype="bool")

404

405 def _str_isupper(self):

406 return self._str_map(str.isupper, dtype="bool")

407

408 def _str_capitalize(self):

409 return self._str_map(str.capitalize)

410

411 def _str_casefold(self):

412 return self._str_map(str.casefold)

413

414 def _str_title(self):

415 return self._str_map(str.title)

416

417 def _str_swapcase(self):

418 return self._str_map(str.swapcase)

419

420 def _str_lower(self):

421 return self._str_map(str.lower)

422

423 def _str_normalize(self, form):

424 f = lambda x: unicodedata.normalize(form, x)

425 return self._str_map(f)

426

427 def _str_strip(self, to_strip=None):

428 return self._str_map(lambda x: x.strip(to_strip))

429

430 def _str_lstrip(self, to_strip=None):

431 return self._str_map(lambda x: x.lstrip(to_strip))

432

433 def _str_rstrip(self, to_strip=None):

434 return self._str_map(lambda x: x.rstrip(to_strip))

435

436 def _str_removeprefix(self, prefix: str) -> Series:

437 # outstanding question on whether to use native methods for users on Python 3.9+

438 # https://github.com/pandas-dev/pandas/pull/39226#issuecomment-836719770,

439 # in which case we could do return self._str_map(str.removeprefix)

440

441 def removeprefix(text: str) -> str:

442 if text.startswith(prefix):

443 return text[len(prefix) :]

444 return text

445

446 return self._str_map(removeprefix)

447

448 def _str_removesuffix(self, suffix: str) -> Series:

449 # this could be used on Python 3.9+

450 # f = lambda x: x.removesuffix(suffix)

451 # return self._str_map(str.removesuffix)

452

453 def removesuffix(text: str) -> str:

454 if text.endswith(suffix):

455 return text[: -len(suffix)]

456 return text

457

458 return self._str_map(removesuffix)

459

460 def _str_extract(self, pat: str, flags: int = 0, expand: bool = True):

461 regex = re.compile(pat, flags=flags)

462 na_value = self._str_na_value

463

464 if not expand:

465

466 def g(x):

467 m = regex.search(x)

468 return m.groups()[0] if m else na_value

469

470 return self._str_map(g, convert=False)

471

472 empty_row = [na_value] * regex.groups

473

474 def f(x):

475 if not isinstance(x, str):

476 return empty_row

477 m = regex.search(x)

478 if m:

479 return [na_value if item is None else item for item in m.groups()]

480 else:

481 return empty_row

482

483 return [f(val) for val in np.asarray(self)]

Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/strings/object_array.py: 15%

304 statements