Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/io/json/

1from __future__ import annotations

3from abc import (

4 ABC,

5 abstractmethod,

7from collections import abc

8import functools

9from io import StringIO

10from itertools import islice

11from typing import (

12 TYPE_CHECKING,

13 Any,

14 Callable,

15 Generic,

16 Literal,

17 Mapping,

18 TypeVar,

19 overload,

20)

22import numpy as np

24import pandas._libs.json as json

25from pandas._libs.tslibs import iNaT

26from pandas._typing import (

27 CompressionOptions,

28 DtypeArg,

29 FilePath,

30 IndexLabel,

31 JSONSerializable,

32 ReadBuffer,

33 StorageOptions,

34 WriteBuffer,

35)

36from pandas.errors import AbstractMethodError

37from pandas.util._decorators import (

38 deprecate_kwarg,

39 deprecate_nonkeyword_arguments,

40 doc,

41)

43from pandas.core.dtypes.common import (

44 ensure_str,

45 is_period_dtype,

46)

48from pandas import (

49 DataFrame,

50 MultiIndex,

51 Series,

52 isna,

53 notna,

54 to_datetime,

55)

56from pandas.core.construction import create_series_with_explicit_dtype

57from pandas.core.reshape.concat import concat

58from pandas.core.shared_docs import _shared_docs

60from pandas.io.common import (

61 IOHandles,

62 _extension_to_compression,

63 file_exists,

64 get_handle,

65 is_fsspec_url,

66 is_url,

67 stringify_path,

68)

69from pandas.io.json._normalize import convert_to_line_delimits

70from pandas.io.json._table_schema import (

71 build_table_schema,

72 parse_table_schema,

73)

74from pandas.io.parsers.readers import validate_integer

76if TYPE_CHECKING: 76 ↛ 77line 76 didn't jump to line 77, because the condition on line 76 was never true

77 from pandas.core.generic import NDFrame

79FrameSeriesStrT = TypeVar("FrameSeriesStrT", bound=Literal["frame", "series"])

81loads = json.loads

82dumps = json.dumps

85# interface to/from

86@overload

87def to_json(

88 path_or_buf: FilePath | WriteBuffer[str] | WriteBuffer[bytes],

89 obj: NDFrame,

90 orient: str | None = ...,

91 date_format: str = ...,

92 double_precision: int = ...,

93 force_ascii: bool = ...,

94 date_unit: str = ...,

95 default_handler: Callable[[Any], JSONSerializable] | None = ...,

96 lines: bool = ...,

97 compression: CompressionOptions = ...,

98 index: bool = ...,

99 indent: int = ...,

100 storage_options: StorageOptions = ...,

101) -> None:

102 ...

103

104

105@overload

106def to_json(

107 path_or_buf: None,

108 obj: NDFrame,

109 orient: str | None = ...,

110 date_format: str = ...,

111 double_precision: int = ...,

112 force_ascii: bool = ...,

113 date_unit: str = ...,

114 default_handler: Callable[[Any], JSONSerializable] | None = ...,

115 lines: bool = ...,

116 compression: CompressionOptions = ...,

117 index: bool = ...,

118 indent: int = ...,

119 storage_options: StorageOptions = ...,

120) -> str:

121 ...

122

123

124def to_json(

125 path_or_buf: FilePath | WriteBuffer[str] | WriteBuffer[bytes] | None,

126 obj: NDFrame,

127 orient: str | None = None,

128 date_format: str = "epoch",

129 double_precision: int = 10,

130 force_ascii: bool = True,

131 date_unit: str = "ms",

132 default_handler: Callable[[Any], JSONSerializable] | None = None,

133 lines: bool = False,

134 compression: CompressionOptions = "infer",

135 index: bool = True,

136 indent: int = 0,

137 storage_options: StorageOptions = None,

138) -> str | None:

139

140 if not index and orient not in ["split", "table"]:

141 raise ValueError(

142 "'index=False' is only valid when 'orient' is 'split' or 'table'"

143 )

144

145 if lines and orient != "records":

146 raise ValueError("'lines' keyword only valid when 'orient' is records")

147

148 if orient == "table" and isinstance(obj, Series):

149 obj = obj.to_frame(name=obj.name or "values")

150

151 writer: type[Writer]

152 if orient == "table" and isinstance(obj, DataFrame):

153 writer = JSONTableWriter

154 elif isinstance(obj, Series):

155 writer = SeriesWriter

156 elif isinstance(obj, DataFrame):

157 writer = FrameWriter

158 else:

159 raise NotImplementedError("'obj' should be a Series or a DataFrame")

160

161 s = writer(

162 obj,

163 orient=orient,

164 date_format=date_format,

165 double_precision=double_precision,

166 ensure_ascii=force_ascii,

167 date_unit=date_unit,

168 default_handler=default_handler,

169 index=index,

170 indent=indent,

171 ).write()

172

173 if lines:

174 s = convert_to_line_delimits(s)

175

176 if path_or_buf is not None:

177 # apply compression and byte/text conversion

178 with get_handle(

179 path_or_buf, "w", compression=compression, storage_options=storage_options

180 ) as handles:

181 handles.handle.write(s)

182 else:

183 return s

184 return None

185

186

187class Writer(ABC):

188 _default_orient: str

189

190 def __init__(

191 self,

192 obj,

193 orient: str | None,

194 date_format: str,

195 double_precision: int,

196 ensure_ascii: bool,

197 date_unit: str,

198 index: bool,

199 default_handler: Callable[[Any], JSONSerializable] | None = None,

200 indent: int = 0,

201 ) -> None:

202 self.obj = obj

203

204 if orient is None:

205 orient = self._default_orient

206

207 self.orient = orient

208 self.date_format = date_format

209 self.double_precision = double_precision

210 self.ensure_ascii = ensure_ascii

211 self.date_unit = date_unit

212 self.default_handler = default_handler

213 self.index = index

214 self.indent = indent

215

216 self.is_copy = None

217 self._format_axes()

218

219 def _format_axes(self):

220 raise AbstractMethodError(self)

221

222 def write(self) -> str:

223 iso_dates = self.date_format == "iso"

224 return dumps(

225 self.obj_to_write,

226 orient=self.orient,

227 double_precision=self.double_precision,

228 ensure_ascii=self.ensure_ascii,

229 date_unit=self.date_unit,

230 iso_dates=iso_dates,

231 default_handler=self.default_handler,

232 indent=self.indent,

233 )

234

235 @property

236 @abstractmethod

237 def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]:

238 """Object to write in JSON format."""

239 pass

240

241

242class SeriesWriter(Writer):

243 _default_orient = "index"

244

245 @property

246 def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]:

247 if not self.index and self.orient == "split":

248 return {"name": self.obj.name, "data": self.obj.values}

249 else:

250 return self.obj

251

252 def _format_axes(self):

253 if not self.obj.index.is_unique and self.orient == "index":

254 raise ValueError(f"Series index must be unique for orient='{self.orient}'")

255

256

257class FrameWriter(Writer):

258 _default_orient = "columns"

259

260 @property

261 def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]:

262 if not self.index and self.orient == "split":

263 obj_to_write = self.obj.to_dict(orient="split")

264 del obj_to_write["index"]

265 else:

266 obj_to_write = self.obj

267 return obj_to_write

268

269 def _format_axes(self):

270 """

271 Try to format axes if they are datelike.

272 """

273 if not self.obj.index.is_unique and self.orient in ("index", "columns"):

274 raise ValueError(

275 f"DataFrame index must be unique for orient='{self.orient}'."

276 )

277 if not self.obj.columns.is_unique and self.orient in (

278 "index",

279 "columns",

280 "records",

281 ):

282 raise ValueError(

283 f"DataFrame columns must be unique for orient='{self.orient}'."

284 )

285

286

287class JSONTableWriter(FrameWriter):

288 _default_orient = "records"

289

290 def __init__(

291 self,

292 obj,

293 orient: str | None,

294 date_format: str,

295 double_precision: int,

296 ensure_ascii: bool,

297 date_unit: str,

298 index: bool,

299 default_handler: Callable[[Any], JSONSerializable] | None = None,

300 indent: int = 0,

301 ) -> None:

302 """

303 Adds a `schema` attribute with the Table Schema, resets

304 the index (can't do in caller, because the schema inference needs

305 to know what the index is, forces orient to records, and forces

306 date_format to 'iso'.

307 """

308 super().__init__(

309 obj,

310 orient,

311 date_format,

312 double_precision,

313 ensure_ascii,

314 date_unit,

315 index,

316 default_handler=default_handler,

317 indent=indent,

318 )

319

320 if date_format != "iso":

321 msg = (

322 "Trying to write with `orient='table'` and "

323 f"`date_format='{date_format}'`. Table Schema requires dates "

324 "to be formatted with `date_format='iso'`"

325 )

326 raise ValueError(msg)

327

328 self.schema = build_table_schema(obj, index=self.index)

329

330 # NotImplemented on a column MultiIndex

331 if obj.ndim == 2 and isinstance(obj.columns, MultiIndex):

332 raise NotImplementedError(

333 "orient='table' is not supported for MultiIndex columns"

334 )

335

336 # TODO: Do this timedelta properly in objToJSON.c See GH #15137

337 if (

338 (obj.ndim == 1)

339 and (obj.name in set(obj.index.names))

340 or len(obj.columns.intersection(obj.index.names))

341 ):

342 msg = "Overlapping names between the index and columns"

343 raise ValueError(msg)

344

345 obj = obj.copy()

346 timedeltas = obj.select_dtypes(include=["timedelta"]).columns

347 if len(timedeltas):

348 obj[timedeltas] = obj[timedeltas].applymap(lambda x: x.isoformat())

349 # Convert PeriodIndex to datetimes before serializing

350 if is_period_dtype(obj.index.dtype):

351 obj.index = obj.index.to_timestamp()

352

353 # exclude index from obj if index=False

354 if not self.index:

355 self.obj = obj.reset_index(drop=True)

356 else:

357 self.obj = obj.reset_index(drop=False)

358 self.date_format = "iso"

359 self.orient = "records"

360 self.index = index

361

362 @property

363 def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]:

364 return {"schema": self.schema, "data": self.obj}

365

366

367@overload

368def read_json(

369 path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes],

370 *,

371 orient: str | None = ...,

372 typ: Literal["frame"] = ...,

373 dtype: DtypeArg | None = ...,

374 convert_axes=...,

375 convert_dates: bool | list[str] = ...,

376 keep_default_dates: bool = ...,

377 numpy: bool = ...,

378 precise_float: bool = ...,

379 date_unit: str | None = ...,

380 encoding: str | None = ...,

381 encoding_errors: str | None = ...,

382 lines: bool = ...,

383 chunksize: int,

384 compression: CompressionOptions = ...,

385 nrows: int | None = ...,

386 storage_options: StorageOptions = ...,

387) -> JsonReader[Literal["frame"]]:

388 ...

389

390

391@overload

392def read_json(

393 path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes],

394 *,

395 orient: str | None = ...,

396 typ: Literal["series"],

397 dtype: DtypeArg | None = ...,

398 convert_axes=...,

399 convert_dates: bool | list[str] = ...,

400 keep_default_dates: bool = ...,

401 numpy: bool = ...,

402 precise_float: bool = ...,

403 date_unit: str | None = ...,

404 encoding: str | None = ...,

405 encoding_errors: str | None = ...,

406 lines: bool = ...,

407 chunksize: int,

408 compression: CompressionOptions = ...,

409 nrows: int | None = ...,

410 storage_options: StorageOptions = ...,

411) -> JsonReader[Literal["series"]]:

412 ...

413

414

415@overload

416def read_json(

417 path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes],

418 *,

419 orient: str | None = ...,

420 typ: Literal["series"],

421 dtype: DtypeArg | None = ...,

422 convert_axes=...,

423 convert_dates: bool | list[str] = ...,

424 keep_default_dates: bool = ...,

425 numpy: bool = ...,

426 precise_float: bool = ...,

427 date_unit: str | None = ...,

428 encoding: str | None = ...,

429 encoding_errors: str | None = ...,

430 lines: bool = ...,

431 chunksize: None = ...,

432 compression: CompressionOptions = ...,

433 nrows: int | None = ...,

434 storage_options: StorageOptions = ...,

435) -> Series:

436 ...

437

438

439@overload

440def read_json(

441 path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes],

442 orient: str | None = ...,

443 typ: Literal["frame"] = ...,

444 dtype: DtypeArg | None = ...,

445 convert_axes=...,

446 convert_dates: bool | list[str] = ...,

447 keep_default_dates: bool = ...,

448 numpy: bool = ...,

449 precise_float: bool = ...,

450 date_unit: str | None = ...,

451 encoding: str | None = ...,

452 encoding_errors: str | None = ...,

453 lines: bool = ...,

454 chunksize: None = ...,

455 compression: CompressionOptions = ...,

456 nrows: int | None = ...,

457 storage_options: StorageOptions = ...,

458) -> DataFrame:

459 ...

460

461

462@doc(

463 storage_options=_shared_docs["storage_options"],

464 decompression_options=_shared_docs["decompression_options"] % "path_or_buf",

465)

466@deprecate_kwarg(old_arg_name="numpy", new_arg_name=None)

467@deprecate_nonkeyword_arguments(version="2.0", allowed_args=["path_or_buf"])

468def read_json(

469 path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes],

470 orient: str | None = None,

471 typ: Literal["frame", "series"] = "frame",

472 dtype: DtypeArg | None = None,

473 convert_axes=None,

474 convert_dates: bool | list[str] = True,

475 keep_default_dates: bool = True,

476 numpy: bool = False,

477 precise_float: bool = False,

478 date_unit: str | None = None,

479 encoding: str | None = None,

480 encoding_errors: str | None = "strict",

481 lines: bool = False,

482 chunksize: int | None = None,

483 compression: CompressionOptions = "infer",

484 nrows: int | None = None,

485 storage_options: StorageOptions = None,

486) -> DataFrame | Series | JsonReader:

487 """

488 Convert a JSON string to pandas object.

489

490 Parameters

491 ----------

492 path_or_buf : a valid JSON str, path object or file-like object

493 Any valid string path is acceptable. The string could be a URL. Valid

494 URL schemes include http, ftp, s3, and file. For file URLs, a host is

495 expected. A local file could be:

496 ``file://localhost/path/to/table.json``.

497

498 If you want to pass in a path object, pandas accepts any

499 ``os.PathLike``.

500

501 By file-like object, we refer to objects with a ``read()`` method,

502 such as a file handle (e.g. via builtin ``open`` function)

503 or ``StringIO``.

504 orient : str

505 Indication of expected JSON string format.

506 Compatible JSON strings can be produced by ``to_json()`` with a

507 corresponding orient value.

508 The set of possible orients is:

509

510 - ``'split'`` : dict like

511 ``{{index -> [index], columns -> [columns], data -> [values]}}``

512 - ``'records'`` : list like

513 ``[{{column -> value}}, ... , {{column -> value}}]``

514 - ``'index'`` : dict like ``{{index -> {{column -> value}}}}``

515 - ``'columns'`` : dict like ``{{column -> {{index -> value}}}}``

516 - ``'values'`` : just the values array

517

518 The allowed and default values depend on the value

519 of the `typ` parameter.

520

521 * when ``typ == 'series'``,

522

523 - allowed orients are ``{{'split','records','index'}}``

524 - default is ``'index'``

525 - The Series index must be unique for orient ``'index'``.

526

527 * when ``typ == 'frame'``,

528

529 - allowed orients are ``{{'split','records','index',

530 'columns','values', 'table'}}``

531 - default is ``'columns'``

532 - The DataFrame index must be unique for orients ``'index'`` and

533 ``'columns'``.

534 - The DataFrame columns must be unique for orients ``'index'``,

535 ``'columns'``, and ``'records'``.

536

537 typ : {{'frame', 'series'}}, default 'frame'

538 The type of object to recover.

539

540 dtype : bool or dict, default None

541 If True, infer dtypes; if a dict of column to dtype, then use those;

542 if False, then don't infer dtypes at all, applies only to the data.

543

544 For all ``orient`` values except ``'table'``, default is True.

545

546 .. versionchanged:: 0.25.0

547

548 Not applicable for ``orient='table'``.

549

550 convert_axes : bool, default None

551 Try to convert the axes to the proper dtypes.

552

553 For all ``orient`` values except ``'table'``, default is True.

554

555 .. versionchanged:: 0.25.0

556

557 Not applicable for ``orient='table'``.

558

559 convert_dates : bool or list of str, default True

560 If True then default datelike columns may be converted (depending on

561 keep_default_dates).

562 If False, no dates will be converted.

563 If a list of column names, then those columns will be converted and

564 default datelike columns may also be converted (depending on

565 keep_default_dates).

566

567 keep_default_dates : bool, default True

568 If parsing dates (convert_dates is not False), then try to parse the

569 default datelike columns.

570 A column label is datelike if

571

572 * it ends with ``'_at'``,

573

574 * it ends with ``'_time'``,

575

576 * it begins with ``'timestamp'``,

577

578 * it is ``'modified'``, or

579

580 * it is ``'date'``.

581

582 numpy : bool, default False

583 Direct decoding to numpy arrays. Supports numeric data only, but

584 non-numeric column and index labels are supported. Note also that the

585 JSON ordering MUST be the same for each term if numpy=True.

586

587 .. deprecated:: 1.0.0

588

589 precise_float : bool, default False

590 Set to enable usage of higher precision (strtod) function when

591 decoding string to double values. Default (False) is to use fast but

592 less precise builtin functionality.

593

594 date_unit : str, default None

595 The timestamp unit to detect if converting dates. The default behaviour

596 is to try and detect the correct precision, but if this is not desired

597 then pass one of 's', 'ms', 'us' or 'ns' to force parsing only seconds,

598 milliseconds, microseconds or nanoseconds respectively.

599

600 encoding : str, default is 'utf-8'

601 The encoding to use to decode py3 bytes.

602

603 encoding_errors : str, optional, default "strict"

604 How encoding errors are treated. `List of possible values

605 <https://docs.python.org/3/library/codecs.html#error-handlers>`_ .

606

607 .. versionadded:: 1.3.0

608

609 lines : bool, default False

610 Read the file as a json object per line.

611

612 chunksize : int, optional

613 Return JsonReader object for iteration.

614 See the `line-delimited json docs

615 <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#line-delimited-json>`_

616 for more information on ``chunksize``.

617 This can only be passed if `lines=True`.

618 If this is None, the file will be read into memory all at once.

619

620 .. versionchanged:: 1.2

621

622 ``JsonReader`` is a context manager.

623

624 {decompression_options}

625

626 .. versionchanged:: 1.4.0 Zstandard support.

627

628 nrows : int, optional

629 The number of lines from the line-delimited jsonfile that has to be read.

630 This can only be passed if `lines=True`.

631 If this is None, all the rows will be returned.

632

633 .. versionadded:: 1.1

634

635 {storage_options}

636

637 .. versionadded:: 1.2.0

638

639 Returns

640 -------

641 Series or DataFrame

642 The type returned depends on the value of `typ`.

643

644 See Also

645 --------

646 DataFrame.to_json : Convert a DataFrame to a JSON string.

647 Series.to_json : Convert a Series to a JSON string.

648 json_normalize : Normalize semi-structured JSON data into a flat table.

649

650 Notes

651 -----

652 Specific to ``orient='table'``, if a :class:`DataFrame` with a literal

653 :class:`Index` name of `index` gets written with :func:`to_json`, the

654 subsequent read operation will incorrectly set the :class:`Index` name to

655 ``None``. This is because `index` is also used by :func:`DataFrame.to_json`

656 to denote a missing :class:`Index` name, and the subsequent

657 :func:`read_json` operation cannot distinguish between the two. The same

658 limitation is encountered with a :class:`MultiIndex` and any names

659 beginning with ``'level_'``.

660

661 Examples

662 --------

663 >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']],

664 ... index=['row 1', 'row 2'],

665 ... columns=['col 1', 'col 2'])

666

667 Encoding/decoding a Dataframe using ``'split'`` formatted JSON:

668

669 >>> df.to_json(orient='split')

670 '\

671{{\

672"columns":["col 1","col 2"],\

673"index":["row 1","row 2"],\

674"data":[["a","b"],["c","d"]]\

675}}\

676'

677 >>> pd.read_json(_, orient='split')

678 col 1 col 2

679 row 1 a b

680 row 2 c d

681

682 Encoding/decoding a Dataframe using ``'index'`` formatted JSON:

683

684 >>> df.to_json(orient='index')

685 '{{"row 1":{{"col 1":"a","col 2":"b"}},"row 2":{{"col 1":"c","col 2":"d"}}}}'

686

687 >>> pd.read_json(_, orient='index')

688 col 1 col 2

689 row 1 a b

690 row 2 c d

691

692 Encoding/decoding a Dataframe using ``'records'`` formatted JSON.

693 Note that index labels are not preserved with this encoding.

694

695 >>> df.to_json(orient='records')

696 '[{{"col 1":"a","col 2":"b"}},{{"col 1":"c","col 2":"d"}}]'

697 >>> pd.read_json(_, orient='records')

698 col 1 col 2

699 0 a b

700 1 c d

701

702 Encoding with Table Schema

703

704 >>> df.to_json(orient='table')

705 '\

706{{"schema":{{"fields":[\

707{{"name":"index","type":"string"}},\

708{{"name":"col 1","type":"string"}},\

709{{"name":"col 2","type":"string"}}],\

710"primaryKey":["index"],\

711"pandas_version":"1.4.0"}},\

712"data":[\

713{{"index":"row 1","col 1":"a","col 2":"b"}},\

714{{"index":"row 2","col 1":"c","col 2":"d"}}]\

715}}\

716'

717 """

718 if orient == "table" and dtype:

719 raise ValueError("cannot pass both dtype and orient='table'")

720 if orient == "table" and convert_axes:

721 raise ValueError("cannot pass both convert_axes and orient='table'")

722

723 if dtype is None and orient != "table":

724 # error: Incompatible types in assignment (expression has type "bool", variable

725 # has type "Union[ExtensionDtype, str, dtype[Any], Type[str], Type[float],

726 # Type[int], Type[complex], Type[bool], Type[object], Dict[Hashable,

727 # Union[ExtensionDtype, Union[str, dtype[Any]], Type[str], Type[float],

728 # Type[int], Type[complex], Type[bool], Type[object]]], None]")

729 dtype = True # type: ignore[assignment]

730 if convert_axes is None and orient != "table":

731 convert_axes = True

732

733 json_reader = JsonReader(

734 path_or_buf,

735 orient=orient,

736 typ=typ,

737 dtype=dtype,

738 convert_axes=convert_axes,

739 convert_dates=convert_dates,

740 keep_default_dates=keep_default_dates,

741 numpy=numpy,

742 precise_float=precise_float,

743 date_unit=date_unit,

744 encoding=encoding,

745 lines=lines,

746 chunksize=chunksize,

747 compression=compression,

748 nrows=nrows,

749 storage_options=storage_options,

750 encoding_errors=encoding_errors,

751 )

752

753 if chunksize:

754 return json_reader

755

756 with json_reader:

757 return json_reader.read()

758

759

760class JsonReader(abc.Iterator, Generic[FrameSeriesStrT]):

761 """

762 JsonReader provides an interface for reading in a JSON file.

763

764 If initialized with ``lines=True`` and ``chunksize``, can be iterated over

765 ``chunksize`` lines at a time. Otherwise, calling ``read`` reads in the

766 whole document.

767 """

768

769 def __init__(

770 self,

771 filepath_or_buffer,

772 orient,

773 typ: FrameSeriesStrT,

774 dtype,

775 convert_axes,

776 convert_dates,

777 keep_default_dates: bool,

778 numpy: bool,

779 precise_float: bool,

780 date_unit,

781 encoding,

782 lines: bool,

783 chunksize: int | None,

784 compression: CompressionOptions,

785 nrows: int | None,

786 storage_options: StorageOptions = None,

787 encoding_errors: str | None = "strict",

788 ) -> None:

789

790 self.orient = orient

791 self.typ = typ

792 self.dtype = dtype

793 self.convert_axes = convert_axes

794 self.convert_dates = convert_dates

795 self.keep_default_dates = keep_default_dates

796 self.numpy = numpy

797 self.precise_float = precise_float

798 self.date_unit = date_unit

799 self.encoding = encoding

800 self.compression = compression

801 self.storage_options = storage_options

802 self.lines = lines

803 self.chunksize = chunksize

804 self.nrows_seen = 0

805 self.nrows = nrows

806 self.encoding_errors = encoding_errors

807 self.handles: IOHandles[str] | None = None

808

809 if self.chunksize is not None:

810 self.chunksize = validate_integer("chunksize", self.chunksize, 1)

811 if not self.lines:

812 raise ValueError("chunksize can only be passed if lines=True")

813 if self.nrows is not None:

814 self.nrows = validate_integer("nrows", self.nrows, 0)

815 if not self.lines:

816 raise ValueError("nrows can only be passed if lines=True")

817

818 data = self._get_data_from_filepath(filepath_or_buffer)

819 self.data = self._preprocess_data(data)

820

821 def _preprocess_data(self, data):

822 """

823 At this point, the data either has a `read` attribute (e.g. a file

824 object or a StringIO) or is a string that is a JSON document.

825

826 If self.chunksize, we prepare the data for the `__next__` method.

827 Otherwise, we read it into memory for the `read` method.

828 """

829 if hasattr(data, "read") and not (self.chunksize or self.nrows):

830 with self:

831 data = data.read()

832 if not hasattr(data, "read") and (self.chunksize or self.nrows):

833 data = StringIO(data)

834

835 return data

836

837 def _get_data_from_filepath(self, filepath_or_buffer):

838 """

839 The function read_json accepts three input types:

840 1. filepath (string-like)

841 2. file-like object (e.g. open file object, StringIO)

842 3. JSON string

843

844 This method turns (1) into (2) to simplify the rest of the processing.

845 It returns input types (2) and (3) unchanged.

846

847 It raises FileNotFoundError if the input is a string ending in

848 one of .json, .json.gz, .json.bz2, etc. but no such file exists.

849 """

850 # if it is a string but the file does not exist, it might be a JSON string

851 filepath_or_buffer = stringify_path(filepath_or_buffer)

852 if (

853 not isinstance(filepath_or_buffer, str)

854 or is_url(filepath_or_buffer)

855 or is_fsspec_url(filepath_or_buffer)

856 or file_exists(filepath_or_buffer)

857 ):

858 self.handles = get_handle(

859 filepath_or_buffer,

860 "r",

861 encoding=self.encoding,

862 compression=self.compression,

863 storage_options=self.storage_options,

864 errors=self.encoding_errors,

865 )

866 filepath_or_buffer = self.handles.handle

867 elif (

868 isinstance(filepath_or_buffer, str)

869 and filepath_or_buffer.lower().endswith(

870 (".json",) + tuple(f".json{c}" for c in _extension_to_compression)

871 )

872 and not file_exists(filepath_or_buffer)

873 ):

874 raise FileNotFoundError(f"File {filepath_or_buffer} does not exist")

875

876 return filepath_or_buffer

877

878 def _combine_lines(self, lines) -> str:

879 """

880 Combines a list of JSON objects into one JSON object.

881 """

882 return (

883 f'[{",".join([line for line in (line.strip() for line in lines) if line])}]'

884 )

885

886 @overload

887 def read(self: JsonReader[Literal["frame"]]) -> DataFrame:

888 ...

889

890 @overload

891 def read(self: JsonReader[Literal["series"]]) -> Series:

892 ...

893

894 @overload

895 def read(self: JsonReader[Literal["frame", "series"]]) -> DataFrame | Series:

896 ...

897

898 def read(self) -> DataFrame | Series:

899 """

900 Read the whole JSON input into a pandas object.

901 """

902 obj: DataFrame | Series

903 if self.lines:

904 if self.chunksize:

905 obj = concat(self)

906 elif self.nrows:

907 lines = list(islice(self.data, self.nrows))

908 lines_json = self._combine_lines(lines)

909 obj = self._get_object_parser(lines_json)

910 else:

911 data = ensure_str(self.data)

912 data_lines = data.split("\n")

913 obj = self._get_object_parser(self._combine_lines(data_lines))

914 else:

915 obj = self._get_object_parser(self.data)

916 self.close()

917 return obj

918

919 def _get_object_parser(self, json) -> DataFrame | Series:

920 """

921 Parses a json document into a pandas object.

922 """

923 typ = self.typ

924 dtype = self.dtype

925 kwargs = {

926 "orient": self.orient,

927 "dtype": self.dtype,

928 "convert_axes": self.convert_axes,

929 "convert_dates": self.convert_dates,

930 "keep_default_dates": self.keep_default_dates,

931 "numpy": self.numpy,

932 "precise_float": self.precise_float,

933 "date_unit": self.date_unit,

934 }

935 obj = None

936 if typ == "frame":

937 obj = FrameParser(json, **kwargs).parse()

938

939 if typ == "series" or obj is None:

940 if not isinstance(dtype, bool):

941 kwargs["dtype"] = dtype

942 obj = SeriesParser(json, **kwargs).parse()

943

944 return obj

945

946 def close(self) -> None:

947 """

948 If we opened a stream earlier, in _get_data_from_filepath, we should

949 close it.

950

951 If an open stream or file was passed, we leave it open.

952 """

953 if self.handles is not None:

954 self.handles.close()

955

956 def __iter__(self: JsonReader[FrameSeriesStrT]) -> JsonReader[FrameSeriesStrT]:

957 return self

958

959 @overload

960 def __next__(self: JsonReader[Literal["frame"]]) -> DataFrame:

961 ...

962

963 @overload

964 def __next__(self: JsonReader[Literal["series"]]) -> Series:

965 ...

966

967 @overload

968 def __next__(self: JsonReader[Literal["frame", "series"]]) -> DataFrame | Series:

969 ...

970

971 def __next__(self) -> DataFrame | Series:

972 if self.nrows:

973 if self.nrows_seen >= self.nrows:

974 self.close()

975 raise StopIteration

976

977 lines = list(islice(self.data, self.chunksize))

978 if lines:

979 lines_json = self._combine_lines(lines)

980 obj = self._get_object_parser(lines_json)

981

982 # Make sure that the returned objects have the right index.

983 obj.index = range(self.nrows_seen, self.nrows_seen + len(obj))

984 self.nrows_seen += len(obj)

985

986 return obj

987

988 self.close()

989 raise StopIteration

990

991 def __enter__(self) -> JsonReader[FrameSeriesStrT]:

992 return self

993

994 def __exit__(self, exc_type, exc_value, traceback) -> None:

995 self.close()

996

997

998class Parser:

999 _split_keys: tuple[str, ...]

1000 _default_orient: str

1001

1002 _STAMP_UNITS = ("s", "ms", "us", "ns")

1003 _MIN_STAMPS = {

1004 "s": 31536000,

1005 "ms": 31536000000,

1006 "us": 31536000000000,

1007 "ns": 31536000000000000,

1008 }

1009

1010 def __init__(

1011 self,

1012 json,

1013 orient,

1014 dtype: DtypeArg | None = None,

1015 convert_axes: bool = True,

1016 convert_dates: bool | list[str] = True,

1017 keep_default_dates: bool = False,

1018 numpy: bool = False,

1019 precise_float: bool = False,

1020 date_unit=None,

1021 ) -> None:

1022 self.json = json

1023

1024 if orient is None:

1025 orient = self._default_orient

1026

1027 self.orient = orient

1028

1029 self.dtype = dtype

1030

1031 if orient == "split":

1032 numpy = False

1033

1034 if date_unit is not None:

1035 date_unit = date_unit.lower()

1036 if date_unit not in self._STAMP_UNITS:

1037 raise ValueError(f"date_unit must be one of {self._STAMP_UNITS}")

1038 self.min_stamp = self._MIN_STAMPS[date_unit]

1039 else:

1040 self.min_stamp = self._MIN_STAMPS["s"]

1041

1042 self.numpy = numpy

1043 self.precise_float = precise_float

1044 self.convert_axes = convert_axes

1045 self.convert_dates = convert_dates

1046 self.date_unit = date_unit

1047 self.keep_default_dates = keep_default_dates

1048 self.obj: DataFrame | Series | None = None

1049

1050 def check_keys_split(self, decoded) -> None:

1051 """

1052 Checks that dict has only the appropriate keys for orient='split'.

1053 """

1054 bad_keys = set(decoded.keys()).difference(set(self._split_keys))

1055 if bad_keys:

1056 bad_keys_joined = ", ".join(bad_keys)

1057 raise ValueError(f"JSON data had unexpected key(s): {bad_keys_joined}")

1058

1059 def parse(self):

1060

1061 if self.numpy:

1062 self._parse_numpy()

1063 else:

1064 self._parse_no_numpy()

1065

1066 if self.obj is None:

1067 return None

1068 if self.convert_axes:

1069 self._convert_axes()

1070 self._try_convert_types()

1071 return self.obj

1072

1073 def _parse_numpy(self):

1074 raise AbstractMethodError(self)

1075

1076 def _parse_no_numpy(self):

1077 raise AbstractMethodError(self)

1078

1079 def _convert_axes(self):

1080 """

1081 Try to convert axes.

1082 """

1083 obj = self.obj

1084 assert obj is not None # for mypy

1085 for axis_name in obj._AXIS_ORDERS:

1086 new_axis, result = self._try_convert_data(

1087 name=axis_name,

1088 data=obj._get_axis(axis_name),

1089 use_dtypes=False,

1090 convert_dates=True,

1091 )

1092 if result:

1093 setattr(self.obj, axis_name, new_axis)

1094

1095 def _try_convert_types(self):

1096 raise AbstractMethodError(self)

1097

1098 def _try_convert_data(

1099 self,

1100 name,

1101 data,

1102 use_dtypes: bool = True,

1103 convert_dates: bool | list[str] = True,

1104 ):

1105 """

1106 Try to parse a ndarray like into a column by inferring dtype.

1107 """

1108 # don't try to coerce, unless a force conversion

1109 if use_dtypes:

1110 if not self.dtype:

1111 if all(notna(data)):

1112 return data, False

1113 return data.fillna(np.nan), True

1114

1115 # error: Non-overlapping identity check (left operand type:

1116 # "Union[ExtensionDtype, str, dtype[Any], Type[object],

1117 # Dict[Hashable, Union[ExtensionDtype, Union[str, dtype[Any]],

1118 # Type[str], Type[float], Type[int], Type[complex], Type[bool],

1119 # Type[object]]]]", right operand type: "Literal[True]")

1120 elif self.dtype is True: # type: ignore[comparison-overlap]

1121 pass

1122 else:

1123 # dtype to force

1124 dtype = (

1125 self.dtype.get(name) if isinstance(self.dtype, dict) else self.dtype

1126 )

1127 if dtype is not None:

1128 try:

1129 return data.astype(dtype), True

1130 except (TypeError, ValueError):

1131 return data, False

1132

1133 if convert_dates:

1134 new_data, result = self._try_convert_to_date(data)

1135 if result:

1136 return new_data, True

1137

1138 if data.dtype == "object":

1139

1140 # try float

1141 try:

1142 data = data.astype("float64")

1143 except (TypeError, ValueError):

1144 pass

1145

1146 if data.dtype.kind == "f":

1147

1148 if data.dtype != "float64":

1149

1150 # coerce floats to 64

1151 try:

1152 data = data.astype("float64")

1153 except (TypeError, ValueError):

1154 pass

1155

1156 # don't coerce 0-len data

1157 if len(data) and (data.dtype == "float" or data.dtype == "object"):

1158

1159 # coerce ints if we can

1160 try:

1161 new_data = data.astype("int64")

1162 if (new_data == data).all():

1163 data = new_data

1164 except (TypeError, ValueError, OverflowError):

1165 pass

1166

1167 # coerce ints to 64

1168 if data.dtype == "int":

1169

1170 # coerce floats to 64

1171 try:

1172 data = data.astype("int64")

1173 except (TypeError, ValueError):

1174 pass

1175

1176 # if we have an index, we want to preserve dtypes

1177 if name == "index" and len(data):

1178 if self.orient == "split":

1179 return data, False

1180

1181 return data, True

1182

1183 def _try_convert_to_date(self, data):

1184 """

1185 Try to parse a ndarray like into a date column.

1186

1187 Try to coerce object in epoch/iso formats and integer/float in epoch

1188 formats. Return a boolean if parsing was successful.

1189 """

1190 # no conversion on empty

1191 if not len(data):

1192 return data, False

1193

1194 new_data = data

1195 if new_data.dtype == "object":

1196 try:

1197 new_data = data.astype("int64")

1198 except (TypeError, ValueError, OverflowError):

1199 pass

1200

1201 # ignore numbers that are out of range

1202 if issubclass(new_data.dtype.type, np.number):

1203 in_range = (

1204 isna(new_data._values)

1205 | (new_data > self.min_stamp)

1206 | (new_data._values == iNaT)

1207 )

1208 if not in_range.all():

1209 return data, False

1210

1211 date_units = (self.date_unit,) if self.date_unit else self._STAMP_UNITS

1212 for date_unit in date_units:

1213 try:

1214 new_data = to_datetime(new_data, errors="raise", unit=date_unit)

1215 except (ValueError, OverflowError, TypeError):

1216 continue

1217 return new_data, True

1218 return data, False

1219

1220 def _try_convert_dates(self):

1221 raise AbstractMethodError(self)

1222

1223

1224class SeriesParser(Parser):

1225 _default_orient = "index"

1226 _split_keys = ("name", "index", "data")

1227

1228 def _parse_no_numpy(self):

1229 data = loads(self.json, precise_float=self.precise_float)

1230

1231 if self.orient == "split":

1232 decoded = {str(k): v for k, v in data.items()}

1233 self.check_keys_split(decoded)

1234 self.obj = create_series_with_explicit_dtype(**decoded)

1235 else:

1236 self.obj = create_series_with_explicit_dtype(data, dtype_if_empty=object)

1237

1238 def _parse_numpy(self):

1239 load_kwargs = {

1240 "dtype": None,

1241 "numpy": True,

1242 "precise_float": self.precise_float,

1243 }

1244 if self.orient in ["columns", "index"]:

1245 load_kwargs["labelled"] = True

1246 loads_ = functools.partial(loads, **load_kwargs)

1247 data = loads_(self.json)

1248

1249 if self.orient == "split":

1250 decoded = {str(k): v for k, v in data.items()}

1251 self.check_keys_split(decoded)

1252 self.obj = create_series_with_explicit_dtype(**decoded)

1253 elif self.orient in ["columns", "index"]:

1254 # error: "create_series_with_explicit_dtype"

1255 # gets multiple values for keyword argument "dtype_if_empty

1256 self.obj = create_series_with_explicit_dtype(

1257 *data, dtype_if_empty=object

1258 ) # type: ignore[misc]

1259 else:

1260 self.obj = create_series_with_explicit_dtype(data, dtype_if_empty=object)

1261

1262 def _try_convert_types(self):

1263 if self.obj is None:

1264 return

1265 obj, result = self._try_convert_data(

1266 "data", self.obj, convert_dates=self.convert_dates

1267 )

1268 if result:

1269 self.obj = obj

1270

1271

1272class FrameParser(Parser):

1273 _default_orient = "columns"

1274 _split_keys = ("columns", "index", "data")

1275

1276 def _parse_numpy(self):

1277

1278 json = self.json

1279 orient = self.orient

1280

1281 if orient == "columns":

1282 args = loads(

1283 json,

1284 dtype=None,

1285 numpy=True,

1286 labelled=True,

1287 precise_float=self.precise_float,

1288 )

1289 if len(args):

1290 args = (args[0].T, args[2], args[1])

1291 self.obj = DataFrame(*args)

1292 elif orient == "split":

1293 decoded = loads(

1294 json, dtype=None, numpy=True, precise_float=self.precise_float

1295 )

1296 decoded = {str(k): v for k, v in decoded.items()}

1297 self.check_keys_split(decoded)

1298 self.obj = DataFrame(**decoded)

1299 elif orient == "values":

1300 self.obj = DataFrame(

1301 loads(json, dtype=None, numpy=True, precise_float=self.precise_float)

1302 )

1303 else:

1304 self.obj = DataFrame(

1305 *loads(

1306 json,

1307 dtype=None,

1308 numpy=True,

1309 labelled=True,

1310 precise_float=self.precise_float,

1311 )

1312 )

1313

1314 def _parse_no_numpy(self):

1315

1316 json = self.json

1317 orient = self.orient

1318

1319 if orient == "columns":

1320 self.obj = DataFrame(

1321 loads(json, precise_float=self.precise_float), dtype=None

1322 )

1323 elif orient == "split":

1324 decoded = {

1325 str(k): v

1326 for k, v in loads(json, precise_float=self.precise_float).items()

1327 }

1328 self.check_keys_split(decoded)

1329 self.obj = DataFrame(dtype=None, **decoded)

1330 elif orient == "index":

1331 self.obj = DataFrame.from_dict(

1332 loads(json, precise_float=self.precise_float),

1333 dtype=None,

1334 orient="index",

1335 )

1336 elif orient == "table":

1337 self.obj = parse_table_schema(json, precise_float=self.precise_float)

1338 else:

1339 self.obj = DataFrame(

1340 loads(json, precise_float=self.precise_float), dtype=None

1341 )

1342

1343 def _process_converter(self, f, filt=None):

1344 """

1345 Take a conversion function and possibly recreate the frame.

1346 """

1347 if filt is None:

1348 filt = lambda col, c: True

1349

1350 obj = self.obj

1351 assert obj is not None # for mypy

1352

1353 needs_new_obj = False

1354 new_obj = {}

1355 for i, (col, c) in enumerate(obj.items()):

1356 if filt(col, c):

1357 new_data, result = f(col, c)

1358 if result:

1359 c = new_data

1360 needs_new_obj = True

1361 new_obj[i] = c

1362

1363 if needs_new_obj:

1364

1365 # possibly handle dup columns

1366 new_frame = DataFrame(new_obj, index=obj.index)

1367 new_frame.columns = obj.columns

1368 self.obj = new_frame

1369

1370 def _try_convert_types(self):

1371 if self.obj is None:

1372 return

1373 if self.convert_dates:

1374 self._try_convert_dates()

1375

1376 self._process_converter(

1377 lambda col, c: self._try_convert_data(col, c, convert_dates=False)

1378 )

1379

1380 def _try_convert_dates(self):

1381 if self.obj is None:

1382 return

1383

1384 # our columns to parse

1385 convert_dates_list_bool = self.convert_dates

1386 if isinstance(convert_dates_list_bool, bool):

1387 convert_dates_list_bool = []

1388 convert_dates = set(convert_dates_list_bool)

1389

1390 def is_ok(col) -> bool:

1391 """

1392 Return if this col is ok to try for a date parse.

1393 """

1394 if not isinstance(col, str):

1395 return False

1396

1397 col_lower = col.lower()

1398 if (

1399 col_lower.endswith("_at")

1400 or col_lower.endswith("_time")

1401 or col_lower == "modified"

1402 or col_lower == "date"

1403 or col_lower == "datetime"

1404 or col_lower.startswith("timestamp")

1405 ):

1406 return True

1407 return False

1408

1409 self._process_converter(

1410 lambda col, c: self._try_convert_to_date(c),

1411 lambda col, c: (

1412 (self.keep_default_dates and is_ok(col)) or col in convert_dates

1413 ),

1414 )

Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/io/json/_json.py: 19%

498 statements