Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/io/sql.py: 13%

684 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" 

2Collection of query wrappers / abstractions to both facilitate data 

3retrieval and to reduce dependency on DB-specific API. 

4""" 

5 

6from __future__ import annotations 

7 

8from contextlib import contextmanager 

9from datetime import ( 

10 date, 

11 datetime, 

12 time, 

13) 

14from functools import partial 

15import re 

16from typing import ( 

17 TYPE_CHECKING, 

18 Any, 

19 Iterator, 

20 cast, 

21 overload, 

22) 

23import warnings 

24 

25import numpy as np 

26 

27import pandas._libs.lib as lib 

28from pandas._typing import ( 

29 DateTimeErrorChoices, 

30 DtypeArg, 

31 IndexLabel, 

32) 

33from pandas.compat._optional import import_optional_dependency 

34from pandas.errors import ( 

35 AbstractMethodError, 

36 DatabaseError, 

37) 

38from pandas.util._exceptions import find_stack_level 

39 

40from pandas.core.dtypes.common import ( 

41 is_datetime64tz_dtype, 

42 is_dict_like, 

43 is_integer, 

44 is_list_like, 

45) 

46from pandas.core.dtypes.dtypes import DatetimeTZDtype 

47from pandas.core.dtypes.missing import isna 

48 

49from pandas import get_option 

50from pandas.core.api import ( 

51 DataFrame, 

52 Series, 

53) 

54from pandas.core.base import PandasObject 

55import pandas.core.common as com 

56from pandas.core.tools.datetimes import to_datetime 

57 

58if TYPE_CHECKING: 58 ↛ 59line 58 didn't jump to line 59, because the condition on line 58 was never true

59 from sqlalchemy import Table 

60 

61 

62# ----------------------------------------------------------------------------- 

63# -- Helper functions 

64 

65 

66def _convert_params(sql, params): 

67 """Convert SQL and params args to DBAPI2.0 compliant format.""" 

68 args = [sql] 

69 if params is not None: 

70 if hasattr(params, "keys"): # test if params is a mapping 

71 args += [params] 

72 else: 

73 args += [list(params)] 

74 return args 

75 

76 

77def _process_parse_dates_argument(parse_dates): 

78 """Process parse_dates argument for read_sql functions""" 

79 # handle non-list entries for parse_dates gracefully 

80 if parse_dates is True or parse_dates is None or parse_dates is False: 

81 parse_dates = [] 

82 

83 elif not hasattr(parse_dates, "__iter__"): 

84 parse_dates = [parse_dates] 

85 return parse_dates 

86 

87 

88def _handle_date_column( 

89 col, utc: bool | None = None, format: str | dict[str, Any] | None = None 

90): 

91 if isinstance(format, dict): 

92 # GH35185 Allow custom error values in parse_dates argument of 

93 # read_sql like functions. 

94 # Format can take on custom to_datetime argument values such as 

95 # {"errors": "coerce"} or {"dayfirst": True} 

96 error: DateTimeErrorChoices = format.pop("errors", None) or "ignore" 

97 return to_datetime(col, errors=error, **format) 

98 else: 

99 # Allow passing of formatting string for integers 

100 # GH17855 

101 if format is None and ( 

102 issubclass(col.dtype.type, np.floating) 

103 or issubclass(col.dtype.type, np.integer) 

104 ): 

105 format = "s" 

106 if format in ["D", "d", "h", "m", "s", "ms", "us", "ns"]: 

107 return to_datetime(col, errors="coerce", unit=format, utc=utc) 

108 elif is_datetime64tz_dtype(col.dtype): 

109 # coerce to UTC timezone 

110 # GH11216 

111 return to_datetime(col, utc=True) 

112 else: 

113 return to_datetime(col, errors="coerce", format=format, utc=utc) 

114 

115 

116def _parse_date_columns(data_frame, parse_dates): 

117 """ 

118 Force non-datetime columns to be read as such. 

119 Supports both string formatted and integer timestamp columns. 

120 """ 

121 parse_dates = _process_parse_dates_argument(parse_dates) 

122 

123 # we want to coerce datetime64_tz dtypes for now to UTC 

124 # we could in theory do a 'nice' conversion from a FixedOffset tz 

125 # GH11216 

126 for col_name, df_col in data_frame.items(): 

127 if is_datetime64tz_dtype(df_col.dtype) or col_name in parse_dates: 

128 try: 

129 fmt = parse_dates[col_name] 

130 except TypeError: 

131 fmt = None 

132 data_frame[col_name] = _handle_date_column(df_col, format=fmt) 

133 

134 return data_frame 

135 

136 

137def _wrap_result( 

138 data, 

139 columns, 

140 index_col=None, 

141 coerce_float: bool = True, 

142 parse_dates=None, 

143 dtype: DtypeArg | None = None, 

144): 

145 """Wrap result set of query in a DataFrame.""" 

146 frame = DataFrame.from_records(data, columns=columns, coerce_float=coerce_float) 

147 

148 if dtype: 

149 frame = frame.astype(dtype) 

150 

151 frame = _parse_date_columns(frame, parse_dates) 

152 

153 if index_col is not None: 

154 frame.set_index(index_col, inplace=True) 

155 

156 return frame 

157 

158 

159def execute(sql, con, params=None): 

160 """ 

161 Execute the given SQL query using the provided connection object. 

162 

163 Parameters 

164 ---------- 

165 sql : string 

166 SQL query to be executed. 

167 con : SQLAlchemy connectable(engine/connection) or sqlite3 connection 

168 Using SQLAlchemy makes it possible to use any DB supported by the 

169 library. 

170 If a DBAPI2 object, only sqlite3 is supported. 

171 params : list or tuple, optional, default: None 

172 List of parameters to pass to execute method. 

173 

174 Returns 

175 ------- 

176 Results Iterable 

177 """ 

178 pandas_sql = pandasSQL_builder(con) 

179 args = _convert_params(sql, params) 

180 return pandas_sql.execute(*args) 

181 

182 

183# ----------------------------------------------------------------------------- 

184# -- Read and write to DataFrames 

185 

186 

187@overload 

188def read_sql_table( 

189 table_name, 

190 con, 

191 schema=..., 

192 index_col: str | list[str] | None = ..., 

193 coerce_float=..., 

194 parse_dates: list[str] | dict[str, str] | None = ..., 

195 columns: list[str] | None = ..., 

196 chunksize: None = ..., 

197) -> DataFrame: 

198 ... 

199 

200 

201@overload 

202def read_sql_table( 

203 table_name, 

204 con, 

205 schema=..., 

206 index_col: str | list[str] | None = ..., 

207 coerce_float=..., 

208 parse_dates: list[str] | dict[str, str] | None = ..., 

209 columns: list[str] | None = ..., 

210 chunksize: int = ..., 

211) -> Iterator[DataFrame]: 

212 ... 

213 

214 

215def read_sql_table( 

216 table_name: str, 

217 con, 

218 schema: str | None = None, 

219 index_col: str | list[str] | None = None, 

220 coerce_float: bool = True, 

221 parse_dates: list[str] | dict[str, str] | None = None, 

222 columns: list[str] | None = None, 

223 chunksize: int | None = None, 

224) -> DataFrame | Iterator[DataFrame]: 

225 """ 

226 Read SQL database table into a DataFrame. 

227 

228 Given a table name and a SQLAlchemy connectable, returns a DataFrame. 

229 This function does not support DBAPI connections. 

230 

231 Parameters 

232 ---------- 

233 table_name : str 

234 Name of SQL table in database. 

235 con : SQLAlchemy connectable or str 

236 A database URI could be provided as str. 

237 SQLite DBAPI connection mode not supported. 

238 schema : str, default None 

239 Name of SQL schema in database to query (if database flavor 

240 supports this). Uses default schema if None (default). 

241 index_col : str or list of str, optional, default: None 

242 Column(s) to set as index(MultiIndex). 

243 coerce_float : bool, default True 

244 Attempts to convert values of non-string, non-numeric objects (like 

245 decimal.Decimal) to floating point. Can result in loss of Precision. 

246 parse_dates : list or dict, default None 

247 - List of column names to parse as dates. 

248 - Dict of ``{column_name: format string}`` where format string is 

249 strftime compatible in case of parsing string times or is one of 

250 (D, s, ns, ms, us) in case of parsing integer timestamps. 

251 - Dict of ``{column_name: arg dict}``, where the arg dict corresponds 

252 to the keyword arguments of :func:`pandas.to_datetime` 

253 Especially useful with databases without native Datetime support, 

254 such as SQLite. 

255 columns : list, default None 

256 List of column names to select from SQL table. 

257 chunksize : int, default None 

258 If specified, returns an iterator where `chunksize` is the number of 

259 rows to include in each chunk. 

260 

261 Returns 

262 ------- 

263 DataFrame or Iterator[DataFrame] 

264 A SQL table is returned as two-dimensional data structure with labeled 

265 axes. 

266 

267 See Also 

268 -------- 

269 read_sql_query : Read SQL query into a DataFrame. 

270 read_sql : Read SQL query or database table into a DataFrame. 

271 

272 Notes 

273 ----- 

274 Any datetime values with time zone information will be converted to UTC. 

275 

276 Examples 

277 -------- 

278 >>> pd.read_sql_table('table_name', 'postgres:///db_name') # doctest:+SKIP 

279 """ 

280 pandas_sql = pandasSQL_builder(con, schema=schema) 

281 if not pandas_sql.has_table(table_name): 

282 raise ValueError(f"Table {table_name} not found") 

283 

284 # error: Item "SQLiteDatabase" of "Union[SQLDatabase, SQLiteDatabase]" 

285 # has no attribute "read_table" 

286 table = pandas_sql.read_table( # type: ignore[union-attr] 

287 table_name, 

288 index_col=index_col, 

289 coerce_float=coerce_float, 

290 parse_dates=parse_dates, 

291 columns=columns, 

292 chunksize=chunksize, 

293 ) 

294 

295 if table is not None: 

296 return table 

297 else: 

298 raise ValueError(f"Table {table_name} not found", con) 

299 

300 

301@overload 

302def read_sql_query( 

303 sql, 

304 con, 

305 index_col: str | list[str] | None = ..., 

306 coerce_float=..., 

307 params: list[str] | dict[str, str] | None = ..., 

308 parse_dates: list[str] | dict[str, str] | None = ..., 

309 chunksize: None = ..., 

310 dtype: DtypeArg | None = ..., 

311) -> DataFrame: 

312 ... 

313 

314 

315@overload 

316def read_sql_query( 

317 sql, 

318 con, 

319 index_col: str | list[str] | None = ..., 

320 coerce_float=..., 

321 params: list[str] | dict[str, str] | None = ..., 

322 parse_dates: list[str] | dict[str, str] | None = ..., 

323 chunksize: int = ..., 

324 dtype: DtypeArg | None = ..., 

325) -> Iterator[DataFrame]: 

326 ... 

327 

328 

329def read_sql_query( 

330 sql, 

331 con, 

332 index_col: str | list[str] | None = None, 

333 coerce_float: bool = True, 

334 params: list[str] | dict[str, str] | None = None, 

335 parse_dates: list[str] | dict[str, str] | None = None, 

336 chunksize: int | None = None, 

337 dtype: DtypeArg | None = None, 

338) -> DataFrame | Iterator[DataFrame]: 

339 """ 

340 Read SQL query into a DataFrame. 

341 

342 Returns a DataFrame corresponding to the result set of the query 

343 string. Optionally provide an `index_col` parameter to use one of the 

344 columns as the index, otherwise default integer index will be used. 

345 

346 Parameters 

347 ---------- 

348 sql : str SQL query or SQLAlchemy Selectable (select or text object) 

349 SQL query to be executed. 

350 con : SQLAlchemy connectable, str, or sqlite3 connection 

351 Using SQLAlchemy makes it possible to use any DB supported by that 

352 library. If a DBAPI2 object, only sqlite3 is supported. 

353 index_col : str or list of str, optional, default: None 

354 Column(s) to set as index(MultiIndex). 

355 coerce_float : bool, default True 

356 Attempts to convert values of non-string, non-numeric objects (like 

357 decimal.Decimal) to floating point. Useful for SQL result sets. 

358 params : list, tuple or dict, optional, default: None 

359 List of parameters to pass to execute method. The syntax used 

360 to pass parameters is database driver dependent. Check your 

361 database driver documentation for which of the five syntax styles, 

362 described in PEP 249's paramstyle, is supported. 

363 Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}. 

364 parse_dates : list or dict, default: None 

365 - List of column names to parse as dates. 

366 - Dict of ``{column_name: format string}`` where format string is 

367 strftime compatible in case of parsing string times, or is one of 

368 (D, s, ns, ms, us) in case of parsing integer timestamps. 

369 - Dict of ``{column_name: arg dict}``, where the arg dict corresponds 

370 to the keyword arguments of :func:`pandas.to_datetime` 

371 Especially useful with databases without native Datetime support, 

372 such as SQLite. 

373 chunksize : int, default None 

374 If specified, return an iterator where `chunksize` is the number of 

375 rows to include in each chunk. 

376 dtype : Type name or dict of columns 

377 Data type for data or columns. E.g. np.float64 or 

378 {‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}. 

379 

380 .. versionadded:: 1.3.0 

381 

382 Returns 

383 ------- 

384 DataFrame or Iterator[DataFrame] 

385 

386 See Also 

387 -------- 

388 read_sql_table : Read SQL database table into a DataFrame. 

389 read_sql : Read SQL query or database table into a DataFrame. 

390 

391 Notes 

392 ----- 

393 Any datetime values with time zone information parsed via the `parse_dates` 

394 parameter will be converted to UTC. 

395 """ 

396 pandas_sql = pandasSQL_builder(con) 

397 return pandas_sql.read_query( 

398 sql, 

399 index_col=index_col, 

400 params=params, 

401 coerce_float=coerce_float, 

402 parse_dates=parse_dates, 

403 chunksize=chunksize, 

404 dtype=dtype, 

405 ) 

406 

407 

408@overload 

409def read_sql( 

410 sql, 

411 con, 

412 index_col: str | list[str] | None = ..., 

413 coerce_float=..., 

414 params=..., 

415 parse_dates=..., 

416 columns: list[str] = ..., 

417 chunksize: None = ..., 

418) -> DataFrame: 

419 ... 

420 

421 

422@overload 

423def read_sql( 

424 sql, 

425 con, 

426 index_col: str | list[str] | None = ..., 

427 coerce_float=..., 

428 params=..., 

429 parse_dates=..., 

430 columns: list[str] = ..., 

431 chunksize: int = ..., 

432) -> Iterator[DataFrame]: 

433 ... 

434 

435 

436def read_sql( 

437 sql, 

438 con, 

439 index_col: str | list[str] | None = None, 

440 coerce_float: bool = True, 

441 params=None, 

442 parse_dates=None, 

443 columns: list[str] | None = None, 

444 chunksize: int | None = None, 

445) -> DataFrame | Iterator[DataFrame]: 

446 """ 

447 Read SQL query or database table into a DataFrame. 

448 

449 This function is a convenience wrapper around ``read_sql_table`` and 

450 ``read_sql_query`` (for backward compatibility). It will delegate 

451 to the specific function depending on the provided input. A SQL query 

452 will be routed to ``read_sql_query``, while a database table name will 

453 be routed to ``read_sql_table``. Note that the delegated function might 

454 have more specific notes about their functionality not listed here. 

455 

456 Parameters 

457 ---------- 

458 sql : str or SQLAlchemy Selectable (select or text object) 

459 SQL query to be executed or a table name. 

460 con : SQLAlchemy connectable, str, or sqlite3 connection 

461 Using SQLAlchemy makes it possible to use any DB supported by that 

462 library. If a DBAPI2 object, only sqlite3 is supported. The user is responsible 

463 for engine disposal and connection closure for the SQLAlchemy connectable; str 

464 connections are closed automatically. See 

465 `here <https://docs.sqlalchemy.org/en/13/core/connections.html>`_. 

466 index_col : str or list of str, optional, default: None 

467 Column(s) to set as index(MultiIndex). 

468 coerce_float : bool, default True 

469 Attempts to convert values of non-string, non-numeric objects (like 

470 decimal.Decimal) to floating point, useful for SQL result sets. 

471 params : list, tuple or dict, optional, default: None 

472 List of parameters to pass to execute method. The syntax used 

473 to pass parameters is database driver dependent. Check your 

474 database driver documentation for which of the five syntax styles, 

475 described in PEP 249's paramstyle, is supported. 

476 Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}. 

477 parse_dates : list or dict, default: None 

478 - List of column names to parse as dates. 

479 - Dict of ``{column_name: format string}`` where format string is 

480 strftime compatible in case of parsing string times, or is one of 

481 (D, s, ns, ms, us) in case of parsing integer timestamps. 

482 - Dict of ``{column_name: arg dict}``, where the arg dict corresponds 

483 to the keyword arguments of :func:`pandas.to_datetime` 

484 Especially useful with databases without native Datetime support, 

485 such as SQLite. 

486 columns : list, default: None 

487 List of column names to select from SQL table (only used when reading 

488 a table). 

489 chunksize : int, default None 

490 If specified, return an iterator where `chunksize` is the 

491 number of rows to include in each chunk. 

492 

493 Returns 

494 ------- 

495 DataFrame or Iterator[DataFrame] 

496 

497 See Also 

498 -------- 

499 read_sql_table : Read SQL database table into a DataFrame. 

500 read_sql_query : Read SQL query into a DataFrame. 

501 

502 Examples 

503 -------- 

504 Read data from SQL via either a SQL query or a SQL tablename. 

505 When using a SQLite database only SQL queries are accepted, 

506 providing only the SQL tablename will result in an error. 

507 

508 >>> from sqlite3 import connect 

509 >>> conn = connect(':memory:') 

510 >>> df = pd.DataFrame(data=[[0, '10/11/12'], [1, '12/11/10']], 

511 ... columns=['int_column', 'date_column']) 

512 >>> df.to_sql('test_data', conn) 

513 2 

514 

515 >>> pd.read_sql('SELECT int_column, date_column FROM test_data', conn) 

516 int_column date_column 

517 0 0 10/11/12 

518 1 1 12/11/10 

519 

520 >>> pd.read_sql('test_data', 'postgres:///db_name') # doctest:+SKIP 

521 

522 Apply date parsing to columns through the ``parse_dates`` argument 

523 

524 >>> pd.read_sql('SELECT int_column, date_column FROM test_data', 

525 ... conn, 

526 ... parse_dates=["date_column"]) 

527 int_column date_column 

528 0 0 2012-10-11 

529 1 1 2010-12-11 

530 

531 The ``parse_dates`` argument calls ``pd.to_datetime`` on the provided columns. 

532 Custom argument values for applying ``pd.to_datetime`` on a column are specified 

533 via a dictionary format: 

534 1. Ignore errors while parsing the values of "date_column" 

535 

536 >>> pd.read_sql('SELECT int_column, date_column FROM test_data', 

537 ... conn, 

538 ... parse_dates={"date_column": {"errors": "ignore"}}) 

539 int_column date_column 

540 0 0 2012-10-11 

541 1 1 2010-12-11 

542 

543 2. Apply a dayfirst date parsing order on the values of "date_column" 

544 

545 >>> pd.read_sql('SELECT int_column, date_column FROM test_data', 

546 ... conn, 

547 ... parse_dates={"date_column": {"dayfirst": True}}) 

548 int_column date_column 

549 0 0 2012-11-10 

550 1 1 2010-11-12 

551 

552 3. Apply custom formatting when date parsing the values of "date_column" 

553 

554 >>> pd.read_sql('SELECT int_column, date_column FROM test_data', 

555 ... conn, 

556 ... parse_dates={"date_column": {"format": "%d/%m/%y"}}) 

557 int_column date_column 

558 0 0 2012-11-10 

559 1 1 2010-11-12 

560 """ 

561 pandas_sql = pandasSQL_builder(con) 

562 

563 if isinstance(pandas_sql, SQLiteDatabase): 

564 return pandas_sql.read_query( 

565 sql, 

566 index_col=index_col, 

567 params=params, 

568 coerce_float=coerce_float, 

569 parse_dates=parse_dates, 

570 chunksize=chunksize, 

571 ) 

572 

573 try: 

574 _is_table_name = pandas_sql.has_table(sql) 

575 except Exception: 

576 # using generic exception to catch errors from sql drivers (GH24988) 

577 _is_table_name = False 

578 

579 if _is_table_name: 

580 pandas_sql.meta.reflect(bind=pandas_sql.connectable, only=[sql]) 

581 return pandas_sql.read_table( 

582 sql, 

583 index_col=index_col, 

584 coerce_float=coerce_float, 

585 parse_dates=parse_dates, 

586 columns=columns, 

587 chunksize=chunksize, 

588 ) 

589 else: 

590 return pandas_sql.read_query( 

591 sql, 

592 index_col=index_col, 

593 params=params, 

594 coerce_float=coerce_float, 

595 parse_dates=parse_dates, 

596 chunksize=chunksize, 

597 ) 

598 

599 

600def to_sql( 

601 frame, 

602 name: str, 

603 con, 

604 schema: str | None = None, 

605 if_exists: str = "fail", 

606 index: bool = True, 

607 index_label: IndexLabel = None, 

608 chunksize: int | None = None, 

609 dtype: DtypeArg | None = None, 

610 method: str | None = None, 

611 engine: str = "auto", 

612 **engine_kwargs, 

613) -> int | None: 

614 """ 

615 Write records stored in a DataFrame to a SQL database. 

616 

617 Parameters 

618 ---------- 

619 frame : DataFrame, Series 

620 name : str 

621 Name of SQL table. 

622 con : SQLAlchemy connectable(engine/connection) or database string URI 

623 or sqlite3 DBAPI2 connection 

624 Using SQLAlchemy makes it possible to use any DB supported by that 

625 library. 

626 If a DBAPI2 object, only sqlite3 is supported. 

627 schema : str, optional 

628 Name of SQL schema in database to write to (if database flavor 

629 supports this). If None, use default schema (default). 

630 if_exists : {'fail', 'replace', 'append'}, default 'fail' 

631 - fail: If table exists, do nothing. 

632 - replace: If table exists, drop it, recreate it, and insert data. 

633 - append: If table exists, insert data. Create if does not exist. 

634 index : bool, default True 

635 Write DataFrame index as a column. 

636 index_label : str or sequence, optional 

637 Column label for index column(s). If None is given (default) and 

638 `index` is True, then the index names are used. 

639 A sequence should be given if the DataFrame uses MultiIndex. 

640 chunksize : int, optional 

641 Specify the number of rows in each batch to be written at a time. 

642 By default, all rows will be written at once. 

643 dtype : dict or scalar, optional 

644 Specifying the datatype for columns. If a dictionary is used, the 

645 keys should be the column names and the values should be the 

646 SQLAlchemy types or strings for the sqlite3 fallback mode. If a 

647 scalar is provided, it will be applied to all columns. 

648 method : {None, 'multi', callable}, optional 

649 Controls the SQL insertion clause used: 

650 

651 - None : Uses standard SQL ``INSERT`` clause (one per row). 

652 - ``'multi'``: Pass multiple values in a single ``INSERT`` clause. 

653 - callable with signature ``(pd_table, conn, keys, data_iter) -> int | None``. 

654 

655 Details and a sample callable implementation can be found in the 

656 section :ref:`insert method <io.sql.method>`. 

657 engine : {'auto', 'sqlalchemy'}, default 'auto' 

658 SQL engine library to use. If 'auto', then the option 

659 ``io.sql.engine`` is used. The default ``io.sql.engine`` 

660 behavior is 'sqlalchemy' 

661 

662 .. versionadded:: 1.3.0 

663 

664 **engine_kwargs 

665 Any additional kwargs are passed to the engine. 

666 

667 Returns 

668 ------- 

669 None or int 

670 Number of rows affected by to_sql. None is returned if the callable 

671 passed into ``method`` does not return an integer number of rows. 

672 

673 .. versionadded:: 1.4.0 

674 

675 Notes 

676 ----- 

677 The returned rows affected is the sum of the ``rowcount`` attribute of ``sqlite3.Cursor`` 

678 or SQLAlchemy connectable. The returned value may not reflect the exact number of written 

679 rows as stipulated in the 

680 `sqlite3 <https://docs.python.org/3/library/sqlite3.html#sqlite3.Cursor.rowcount>`__ or 

681 `SQLAlchemy <https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.BaseCursorResult.rowcount>`__ 

682 """ # noqa:E501 

683 if if_exists not in ("fail", "replace", "append"): 

684 raise ValueError(f"'{if_exists}' is not valid for if_exists") 

685 

686 pandas_sql = pandasSQL_builder(con, schema=schema) 

687 

688 if isinstance(frame, Series): 

689 frame = frame.to_frame() 

690 elif not isinstance(frame, DataFrame): 

691 raise NotImplementedError( 

692 "'frame' argument should be either a Series or a DataFrame" 

693 ) 

694 

695 return pandas_sql.to_sql( 

696 frame, 

697 name, 

698 if_exists=if_exists, 

699 index=index, 

700 index_label=index_label, 

701 schema=schema, 

702 chunksize=chunksize, 

703 dtype=dtype, 

704 method=method, 

705 engine=engine, 

706 **engine_kwargs, 

707 ) 

708 

709 

710def has_table(table_name: str, con, schema: str | None = None) -> bool: 

711 """ 

712 Check if DataBase has named table. 

713 

714 Parameters 

715 ---------- 

716 table_name: string 

717 Name of SQL table. 

718 con: SQLAlchemy connectable(engine/connection) or sqlite3 DBAPI2 connection 

719 Using SQLAlchemy makes it possible to use any DB supported by that 

720 library. 

721 If a DBAPI2 object, only sqlite3 is supported. 

722 schema : string, default None 

723 Name of SQL schema in database to write to (if database flavor supports 

724 this). If None, use default schema (default). 

725 

726 Returns 

727 ------- 

728 boolean 

729 """ 

730 pandas_sql = pandasSQL_builder(con, schema=schema) 

731 return pandas_sql.has_table(table_name) 

732 

733 

734table_exists = has_table 

735 

736 

737def pandasSQL_builder(con, schema: str | None = None) -> SQLDatabase | SQLiteDatabase: 

738 """ 

739 Convenience function to return the correct PandasSQL subclass based on the 

740 provided parameters. 

741 """ 

742 import sqlite3 

743 import warnings 

744 

745 if isinstance(con, sqlite3.Connection) or con is None: 

746 return SQLiteDatabase(con) 

747 

748 sqlalchemy = import_optional_dependency("sqlalchemy", errors="ignore") 

749 

750 if isinstance(con, str): 

751 if sqlalchemy is None: 

752 raise ImportError("Using URI string without sqlalchemy installed.") 

753 else: 

754 con = sqlalchemy.create_engine(con) 

755 

756 if sqlalchemy is not None and isinstance(con, sqlalchemy.engine.Connectable): 

757 return SQLDatabase(con, schema=schema) 

758 

759 warnings.warn( 

760 "pandas only supports SQLAlchemy connectable (engine/connection) or " 

761 "database string URI or sqlite3 DBAPI2 connection. " 

762 "Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.", 

763 UserWarning, 

764 stacklevel=find_stack_level(), 

765 ) 

766 return SQLiteDatabase(con) 

767 

768 

769class SQLTable(PandasObject): 

770 """ 

771 For mapping Pandas tables to SQL tables. 

772 Uses fact that table is reflected by SQLAlchemy to 

773 do better type conversions. 

774 Also holds various flags needed to avoid having to 

775 pass them between functions all the time. 

776 """ 

777 

778 # TODO: support for multiIndex 

779 

780 def __init__( 

781 self, 

782 name: str, 

783 pandas_sql_engine, 

784 frame=None, 

785 index: bool | str | list[str] | None = True, 

786 if_exists: str = "fail", 

787 prefix: str = "pandas", 

788 index_label=None, 

789 schema=None, 

790 keys=None, 

791 dtype: DtypeArg | None = None, 

792 ) -> None: 

793 self.name = name 

794 self.pd_sql = pandas_sql_engine 

795 self.prefix = prefix 

796 self.frame = frame 

797 self.index = self._index_name(index, index_label) 

798 self.schema = schema 

799 self.if_exists = if_exists 

800 self.keys = keys 

801 self.dtype = dtype 

802 

803 if frame is not None: 

804 # We want to initialize based on a dataframe 

805 self.table = self._create_table_setup() 

806 else: 

807 # no data provided, read-only mode 

808 self.table = self.pd_sql.get_table(self.name, self.schema) 

809 

810 if self.table is None: 

811 raise ValueError(f"Could not init table '{name}'") 

812 

813 def exists(self): 

814 return self.pd_sql.has_table(self.name, self.schema) 

815 

816 def sql_schema(self) -> str: 

817 from sqlalchemy.schema import CreateTable 

818 

819 return str(CreateTable(self.table).compile(self.pd_sql.connectable)) 

820 

821 def _execute_create(self): 

822 # Inserting table into database, add to MetaData object 

823 self.table = self.table.to_metadata(self.pd_sql.meta) 

824 self.table.create(bind=self.pd_sql.connectable) 

825 

826 def create(self) -> None: 

827 if self.exists(): 

828 if self.if_exists == "fail": 

829 raise ValueError(f"Table '{self.name}' already exists.") 

830 elif self.if_exists == "replace": 

831 self.pd_sql.drop_table(self.name, self.schema) 

832 self._execute_create() 

833 elif self.if_exists == "append": 

834 pass 

835 else: 

836 raise ValueError(f"'{self.if_exists}' is not valid for if_exists") 

837 else: 

838 self._execute_create() 

839 

840 def _execute_insert(self, conn, keys: list[str], data_iter) -> int: 

841 """ 

842 Execute SQL statement inserting data 

843 

844 Parameters 

845 ---------- 

846 conn : sqlalchemy.engine.Engine or sqlalchemy.engine.Connection 

847 keys : list of str 

848 Column names 

849 data_iter : generator of list 

850 Each item contains a list of values to be inserted 

851 """ 

852 data = [dict(zip(keys, row)) for row in data_iter] 

853 result = conn.execute(self.table.insert(), data) 

854 return result.rowcount 

855 

856 def _execute_insert_multi(self, conn, keys: list[str], data_iter) -> int: 

857 """ 

858 Alternative to _execute_insert for DBs support multivalue INSERT. 

859 

860 Note: multi-value insert is usually faster for analytics DBs 

861 and tables containing a few columns 

862 but performance degrades quickly with increase of columns. 

863 """ 

864 

865 from sqlalchemy import insert 

866 

867 data = [dict(zip(keys, row)) for row in data_iter] 

868 stmt = insert(self.table).values(data) 

869 result = conn.execute(stmt) 

870 return result.rowcount 

871 

872 def insert_data(self) -> tuple[list[str], list[np.ndarray]]: 

873 if self.index is not None: 

874 temp = self.frame.copy() 

875 temp.index.names = self.index 

876 try: 

877 temp.reset_index(inplace=True) 

878 except ValueError as err: 

879 raise ValueError(f"duplicate name in index/columns: {err}") from err 

880 else: 

881 temp = self.frame 

882 

883 column_names = list(map(str, temp.columns)) 

884 ncols = len(column_names) 

885 # this just pre-allocates the list: None's will be replaced with ndarrays 

886 # error: List item 0 has incompatible type "None"; expected "ndarray" 

887 data_list: list[np.ndarray] = [None] * ncols # type: ignore[list-item] 

888 

889 for i, (_, ser) in enumerate(temp.items()): 

890 vals = ser._values 

891 if vals.dtype.kind == "M": 

892 d = vals.to_pydatetime() 

893 elif vals.dtype.kind == "m": 

894 # store as integers, see GH#6921, GH#7076 

895 d = vals.view("i8").astype(object) 

896 else: 

897 d = vals.astype(object) 

898 

899 assert isinstance(d, np.ndarray), type(d) 

900 

901 if ser._can_hold_na: 

902 # Note: this will miss timedeltas since they are converted to int 

903 mask = isna(d) 

904 d[mask] = None 

905 

906 data_list[i] = d 

907 

908 return column_names, data_list 

909 

910 def insert( 

911 self, chunksize: int | None = None, method: str | None = None 

912 ) -> int | None: 

913 

914 # set insert method 

915 if method is None: 

916 exec_insert = self._execute_insert 

917 elif method == "multi": 

918 exec_insert = self._execute_insert_multi 

919 elif callable(method): 

920 exec_insert = partial(method, self) 

921 else: 

922 raise ValueError(f"Invalid parameter `method`: {method}") 

923 

924 keys, data_list = self.insert_data() 

925 

926 nrows = len(self.frame) 

927 

928 if nrows == 0: 

929 return 0 

930 

931 if chunksize is None: 

932 chunksize = nrows 

933 elif chunksize == 0: 

934 raise ValueError("chunksize argument should be non-zero") 

935 

936 chunks = (nrows // chunksize) + 1 

937 total_inserted = None 

938 with self.pd_sql.run_transaction() as conn: 

939 for i in range(chunks): 

940 start_i = i * chunksize 

941 end_i = min((i + 1) * chunksize, nrows) 

942 if start_i >= end_i: 

943 break 

944 

945 chunk_iter = zip(*(arr[start_i:end_i] for arr in data_list)) 

946 num_inserted = exec_insert(conn, keys, chunk_iter) 

947 # GH 46891 

948 if is_integer(num_inserted): 

949 if total_inserted is None: 

950 total_inserted = num_inserted 

951 else: 

952 total_inserted += num_inserted 

953 return total_inserted 

954 

955 def _query_iterator( 

956 self, 

957 result, 

958 chunksize: str | None, 

959 columns, 

960 coerce_float: bool = True, 

961 parse_dates=None, 

962 ): 

963 """Return generator through chunked result set.""" 

964 has_read_data = False 

965 while True: 

966 data = result.fetchmany(chunksize) 

967 if not data: 

968 if not has_read_data: 

969 yield DataFrame.from_records( 

970 [], columns=columns, coerce_float=coerce_float 

971 ) 

972 break 

973 else: 

974 has_read_data = True 

975 self.frame = DataFrame.from_records( 

976 data, columns=columns, coerce_float=coerce_float 

977 ) 

978 

979 self._harmonize_columns(parse_dates=parse_dates) 

980 

981 if self.index is not None: 

982 self.frame.set_index(self.index, inplace=True) 

983 

984 yield self.frame 

985 

986 def read( 

987 self, 

988 coerce_float: bool = True, 

989 parse_dates=None, 

990 columns=None, 

991 chunksize=None, 

992 ) -> DataFrame | Iterator[DataFrame]: 

993 from sqlalchemy import select 

994 

995 if columns is not None and len(columns) > 0: 

996 cols = [self.table.c[n] for n in columns] 

997 if self.index is not None: 

998 for idx in self.index[::-1]: 

999 cols.insert(0, self.table.c[idx]) 

1000 sql_select = select(*cols) 

1001 else: 

1002 sql_select = select(self.table) 

1003 result = self.pd_sql.execute(sql_select) 

1004 column_names = result.keys() 

1005 

1006 if chunksize is not None: 

1007 return self._query_iterator( 

1008 result, 

1009 chunksize, 

1010 column_names, 

1011 coerce_float=coerce_float, 

1012 parse_dates=parse_dates, 

1013 ) 

1014 else: 

1015 data = result.fetchall() 

1016 self.frame = DataFrame.from_records( 

1017 data, columns=column_names, coerce_float=coerce_float 

1018 ) 

1019 

1020 self._harmonize_columns(parse_dates=parse_dates) 

1021 

1022 if self.index is not None: 

1023 self.frame.set_index(self.index, inplace=True) 

1024 

1025 return self.frame 

1026 

1027 def _index_name(self, index, index_label): 

1028 # for writing: index=True to include index in sql table 

1029 if index is True: 

1030 nlevels = self.frame.index.nlevels 

1031 # if index_label is specified, set this as index name(s) 

1032 if index_label is not None: 

1033 if not isinstance(index_label, list): 

1034 index_label = [index_label] 

1035 if len(index_label) != nlevels: 

1036 raise ValueError( 

1037 "Length of 'index_label' should match number of " 

1038 f"levels, which is {nlevels}" 

1039 ) 

1040 else: 

1041 return index_label 

1042 # return the used column labels for the index columns 

1043 if ( 

1044 nlevels == 1 

1045 and "index" not in self.frame.columns 

1046 and self.frame.index.name is None 

1047 ): 

1048 return ["index"] 

1049 else: 

1050 return com.fill_missing_names(self.frame.index.names) 

1051 

1052 # for reading: index=(list of) string to specify column to set as index 

1053 elif isinstance(index, str): 

1054 return [index] 

1055 elif isinstance(index, list): 

1056 return index 

1057 else: 

1058 return None 

1059 

1060 def _get_column_names_and_types(self, dtype_mapper): 

1061 column_names_and_types = [] 

1062 if self.index is not None: 

1063 for i, idx_label in enumerate(self.index): 

1064 idx_type = dtype_mapper(self.frame.index._get_level_values(i)) 

1065 column_names_and_types.append((str(idx_label), idx_type, True)) 

1066 

1067 column_names_and_types += [ 

1068 (str(self.frame.columns[i]), dtype_mapper(self.frame.iloc[:, i]), False) 

1069 for i in range(len(self.frame.columns)) 

1070 ] 

1071 

1072 return column_names_and_types 

1073 

1074 def _create_table_setup(self): 

1075 from sqlalchemy import ( 

1076 Column, 

1077 PrimaryKeyConstraint, 

1078 Table, 

1079 ) 

1080 from sqlalchemy.schema import MetaData 

1081 

1082 column_names_and_types = self._get_column_names_and_types(self._sqlalchemy_type) 

1083 

1084 columns = [ 

1085 Column(name, typ, index=is_index) 

1086 for name, typ, is_index in column_names_and_types 

1087 ] 

1088 

1089 if self.keys is not None: 

1090 if not is_list_like(self.keys): 

1091 keys = [self.keys] 

1092 else: 

1093 keys = self.keys 

1094 pkc = PrimaryKeyConstraint(*keys, name=self.name + "_pk") 

1095 columns.append(pkc) 

1096 

1097 schema = self.schema or self.pd_sql.meta.schema 

1098 

1099 # At this point, attach to new metadata, only attach to self.meta 

1100 # once table is created. 

1101 meta = MetaData() 

1102 return Table(self.name, meta, *columns, schema=schema) 

1103 

1104 def _harmonize_columns(self, parse_dates=None): 

1105 """ 

1106 Make the DataFrame's column types align with the SQL table 

1107 column types. 

1108 Need to work around limited NA value support. Floats are always 

1109 fine, ints must always be floats if there are Null values. 

1110 Booleans are hard because converting bool column with None replaces 

1111 all Nones with false. Therefore only convert bool if there are no 

1112 NA values. 

1113 Datetimes should already be converted to np.datetime64 if supported, 

1114 but here we also force conversion if required. 

1115 """ 

1116 parse_dates = _process_parse_dates_argument(parse_dates) 

1117 

1118 for sql_col in self.table.columns: 

1119 col_name = sql_col.name 

1120 try: 

1121 df_col = self.frame[col_name] 

1122 

1123 # Handle date parsing upfront; don't try to convert columns 

1124 # twice 

1125 if col_name in parse_dates: 

1126 try: 

1127 fmt = parse_dates[col_name] 

1128 except TypeError: 

1129 fmt = None 

1130 self.frame[col_name] = _handle_date_column(df_col, format=fmt) 

1131 continue 

1132 

1133 # the type the dataframe column should have 

1134 col_type = self._get_dtype(sql_col.type) 

1135 

1136 if ( 

1137 col_type is datetime 

1138 or col_type is date 

1139 or col_type is DatetimeTZDtype 

1140 ): 

1141 # Convert tz-aware Datetime SQL columns to UTC 

1142 utc = col_type is DatetimeTZDtype 

1143 self.frame[col_name] = _handle_date_column(df_col, utc=utc) 

1144 elif col_type is float: 

1145 # floats support NA, can always convert! 

1146 self.frame[col_name] = df_col.astype(col_type, copy=False) 

1147 

1148 elif len(df_col) == df_col.count(): 

1149 # No NA values, can convert ints and bools 

1150 if col_type is np.dtype("int64") or col_type is bool: 

1151 self.frame[col_name] = df_col.astype(col_type, copy=False) 

1152 except KeyError: 

1153 pass # this column not in results 

1154 

1155 def _sqlalchemy_type(self, col): 

1156 

1157 dtype: DtypeArg = self.dtype or {} 

1158 if is_dict_like(dtype): 

1159 dtype = cast(dict, dtype) 

1160 if col.name in dtype: 

1161 return dtype[col.name] 

1162 

1163 # Infer type of column, while ignoring missing values. 

1164 # Needed for inserting typed data containing NULLs, GH 8778. 

1165 col_type = lib.infer_dtype(col, skipna=True) 

1166 

1167 from sqlalchemy.types import ( 

1168 TIMESTAMP, 

1169 BigInteger, 

1170 Boolean, 

1171 Date, 

1172 DateTime, 

1173 Float, 

1174 Integer, 

1175 SmallInteger, 

1176 Text, 

1177 Time, 

1178 ) 

1179 

1180 if col_type == "datetime64" or col_type == "datetime": 

1181 # GH 9086: TIMESTAMP is the suggested type if the column contains 

1182 # timezone information 

1183 try: 

1184 if col.dt.tz is not None: 

1185 return TIMESTAMP(timezone=True) 

1186 except AttributeError: 

1187 # The column is actually a DatetimeIndex 

1188 # GH 26761 or an Index with date-like data e.g. 9999-01-01 

1189 if getattr(col, "tz", None) is not None: 

1190 return TIMESTAMP(timezone=True) 

1191 return DateTime 

1192 if col_type == "timedelta64": 

1193 warnings.warn( 

1194 "the 'timedelta' type is not supported, and will be " 

1195 "written as integer values (ns frequency) to the database.", 

1196 UserWarning, 

1197 stacklevel=find_stack_level(), 

1198 ) 

1199 return BigInteger 

1200 elif col_type == "floating": 

1201 if col.dtype == "float32": 

1202 return Float(precision=23) 

1203 else: 

1204 return Float(precision=53) 

1205 elif col_type == "integer": 

1206 # GH35076 Map pandas integer to optimal SQLAlchemy integer type 

1207 if col.dtype.name.lower() in ("int8", "uint8", "int16"): 

1208 return SmallInteger 

1209 elif col.dtype.name.lower() in ("uint16", "int32"): 

1210 return Integer 

1211 elif col.dtype.name.lower() == "uint64": 

1212 raise ValueError("Unsigned 64 bit integer datatype is not supported") 

1213 else: 

1214 return BigInteger 

1215 elif col_type == "boolean": 

1216 return Boolean 

1217 elif col_type == "date": 

1218 return Date 

1219 elif col_type == "time": 

1220 return Time 

1221 elif col_type == "complex": 

1222 raise ValueError("Complex datatypes not supported") 

1223 

1224 return Text 

1225 

1226 def _get_dtype(self, sqltype): 

1227 from sqlalchemy.types import ( 

1228 TIMESTAMP, 

1229 Boolean, 

1230 Date, 

1231 DateTime, 

1232 Float, 

1233 Integer, 

1234 ) 

1235 

1236 if isinstance(sqltype, Float): 

1237 return float 

1238 elif isinstance(sqltype, Integer): 

1239 # TODO: Refine integer size. 

1240 return np.dtype("int64") 

1241 elif isinstance(sqltype, TIMESTAMP): 

1242 # we have a timezone capable type 

1243 if not sqltype.timezone: 

1244 return datetime 

1245 return DatetimeTZDtype 

1246 elif isinstance(sqltype, DateTime): 

1247 # Caution: np.datetime64 is also a subclass of np.number. 

1248 return datetime 

1249 elif isinstance(sqltype, Date): 

1250 return date 

1251 elif isinstance(sqltype, Boolean): 

1252 return bool 

1253 return object 

1254 

1255 

1256class PandasSQL(PandasObject): 

1257 """ 

1258 Subclasses Should define read_sql and to_sql. 

1259 """ 

1260 

1261 def read_sql(self, *args, **kwargs): 

1262 raise ValueError( 

1263 "PandasSQL must be created with an SQLAlchemy " 

1264 "connectable or sqlite connection" 

1265 ) 

1266 

1267 def to_sql( 

1268 self, 

1269 frame, 

1270 name, 

1271 if_exists: str = "fail", 

1272 index: bool = True, 

1273 index_label=None, 

1274 schema=None, 

1275 chunksize=None, 

1276 dtype: DtypeArg | None = None, 

1277 method=None, 

1278 ) -> int | None: 

1279 raise ValueError( 

1280 "PandasSQL must be created with an SQLAlchemy " 

1281 "connectable or sqlite connection" 

1282 ) 

1283 

1284 

1285class BaseEngine: 

1286 def insert_records( 

1287 self, 

1288 table: SQLTable, 

1289 con, 

1290 frame, 

1291 name, 

1292 index=True, 

1293 schema=None, 

1294 chunksize=None, 

1295 method=None, 

1296 **engine_kwargs, 

1297 ) -> int | None: 

1298 """ 

1299 Inserts data into already-prepared table 

1300 """ 

1301 raise AbstractMethodError(self) 

1302 

1303 

1304class SQLAlchemyEngine(BaseEngine): 

1305 def __init__(self) -> None: 

1306 import_optional_dependency( 

1307 "sqlalchemy", extra="sqlalchemy is required for SQL support." 

1308 ) 

1309 

1310 def insert_records( 

1311 self, 

1312 table: SQLTable, 

1313 con, 

1314 frame, 

1315 name, 

1316 index=True, 

1317 schema=None, 

1318 chunksize=None, 

1319 method=None, 

1320 **engine_kwargs, 

1321 ) -> int | None: 

1322 from sqlalchemy import exc 

1323 

1324 try: 

1325 return table.insert(chunksize=chunksize, method=method) 

1326 except exc.SQLAlchemyError as err: 

1327 # GH34431 

1328 # https://stackoverflow.com/a/67358288/6067848 

1329 msg = r"""(\(1054, "Unknown column 'inf(e0)?' in 'field list'"\))(?# 

1330 )|inf can not be used with MySQL""" 

1331 err_text = str(err.orig) 

1332 if re.search(msg, err_text): 

1333 raise ValueError("inf cannot be used with MySQL") from err 

1334 else: 

1335 raise err 

1336 

1337 

1338def get_engine(engine: str) -> BaseEngine: 

1339 """return our implementation""" 

1340 if engine == "auto": 

1341 engine = get_option("io.sql.engine") 

1342 

1343 if engine == "auto": 

1344 # try engines in this order 

1345 engine_classes = [SQLAlchemyEngine] 

1346 

1347 error_msgs = "" 

1348 for engine_class in engine_classes: 

1349 try: 

1350 return engine_class() 

1351 except ImportError as err: 

1352 error_msgs += "\n - " + str(err) 

1353 

1354 raise ImportError( 

1355 "Unable to find a usable engine; " 

1356 "tried using: 'sqlalchemy'.\n" 

1357 "A suitable version of " 

1358 "sqlalchemy is required for sql I/O " 

1359 "support.\n" 

1360 "Trying to import the above resulted in these errors:" 

1361 f"{error_msgs}" 

1362 ) 

1363 

1364 elif engine == "sqlalchemy": 

1365 return SQLAlchemyEngine() 

1366 

1367 raise ValueError("engine must be one of 'auto', 'sqlalchemy'") 

1368 

1369 

1370class SQLDatabase(PandasSQL): 

1371 """ 

1372 This class enables conversion between DataFrame and SQL databases 

1373 using SQLAlchemy to handle DataBase abstraction. 

1374 

1375 Parameters 

1376 ---------- 

1377 engine : SQLAlchemy connectable 

1378 Connectable to connect with the database. Using SQLAlchemy makes it 

1379 possible to use any DB supported by that library. 

1380 schema : string, default None 

1381 Name of SQL schema in database to write to (if database flavor 

1382 supports this). If None, use default schema (default). 

1383 

1384 """ 

1385 

1386 def __init__(self, engine, schema: str | None = None) -> None: 

1387 from sqlalchemy.schema import MetaData 

1388 

1389 self.connectable = engine 

1390 self.meta = MetaData(schema=schema) 

1391 

1392 @contextmanager 

1393 def run_transaction(self): 

1394 from sqlalchemy.engine import Engine 

1395 

1396 if isinstance(self.connectable, Engine): 

1397 with self.connectable.connect() as conn: 

1398 with conn.begin(): 

1399 yield conn 

1400 else: 

1401 yield self.connectable 

1402 

1403 def execute(self, *args, **kwargs): 

1404 """Simple passthrough to SQLAlchemy connectable""" 

1405 return self.connectable.execution_options().execute(*args, **kwargs) 

1406 

1407 def read_table( 

1408 self, 

1409 table_name: str, 

1410 index_col: str | list[str] | None = None, 

1411 coerce_float: bool = True, 

1412 parse_dates=None, 

1413 columns=None, 

1414 schema: str | None = None, 

1415 chunksize: int | None = None, 

1416 ) -> DataFrame | Iterator[DataFrame]: 

1417 """ 

1418 Read SQL database table into a DataFrame. 

1419 

1420 Parameters 

1421 ---------- 

1422 table_name : str 

1423 Name of SQL table in database. 

1424 index_col : string, optional, default: None 

1425 Column to set as index. 

1426 coerce_float : bool, default True 

1427 Attempts to convert values of non-string, non-numeric objects 

1428 (like decimal.Decimal) to floating point. This can result in 

1429 loss of precision. 

1430 parse_dates : list or dict, default: None 

1431 - List of column names to parse as dates. 

1432 - Dict of ``{column_name: format string}`` where format string is 

1433 strftime compatible in case of parsing string times, or is one of 

1434 (D, s, ns, ms, us) in case of parsing integer timestamps. 

1435 - Dict of ``{column_name: arg}``, where the arg corresponds 

1436 to the keyword arguments of :func:`pandas.to_datetime`. 

1437 Especially useful with databases without native Datetime support, 

1438 such as SQLite. 

1439 columns : list, default: None 

1440 List of column names to select from SQL table. 

1441 schema : string, default None 

1442 Name of SQL schema in database to query (if database flavor 

1443 supports this). If specified, this overwrites the default 

1444 schema of the SQL database object. 

1445 chunksize : int, default None 

1446 If specified, return an iterator where `chunksize` is the number 

1447 of rows to include in each chunk. 

1448 

1449 Returns 

1450 ------- 

1451 DataFrame 

1452 

1453 See Also 

1454 -------- 

1455 pandas.read_sql_table 

1456 SQLDatabase.read_query 

1457 

1458 """ 

1459 table = SQLTable(table_name, self, index=index_col, schema=schema) 

1460 return table.read( 

1461 coerce_float=coerce_float, 

1462 parse_dates=parse_dates, 

1463 columns=columns, 

1464 chunksize=chunksize, 

1465 ) 

1466 

1467 @staticmethod 

1468 def _query_iterator( 

1469 result, 

1470 chunksize: int, 

1471 columns, 

1472 index_col=None, 

1473 coerce_float=True, 

1474 parse_dates=None, 

1475 dtype: DtypeArg | None = None, 

1476 ): 

1477 """Return generator through chunked result set""" 

1478 has_read_data = False 

1479 while True: 

1480 data = result.fetchmany(chunksize) 

1481 if not data: 

1482 if not has_read_data: 

1483 yield _wrap_result( 

1484 [], 

1485 columns, 

1486 index_col=index_col, 

1487 coerce_float=coerce_float, 

1488 parse_dates=parse_dates, 

1489 ) 

1490 break 

1491 else: 

1492 has_read_data = True 

1493 yield _wrap_result( 

1494 data, 

1495 columns, 

1496 index_col=index_col, 

1497 coerce_float=coerce_float, 

1498 parse_dates=parse_dates, 

1499 dtype=dtype, 

1500 ) 

1501 

1502 def read_query( 

1503 self, 

1504 sql: str, 

1505 index_col: str | list[str] | None = None, 

1506 coerce_float: bool = True, 

1507 parse_dates=None, 

1508 params=None, 

1509 chunksize: int | None = None, 

1510 dtype: DtypeArg | None = None, 

1511 ) -> DataFrame | Iterator[DataFrame]: 

1512 """ 

1513 Read SQL query into a DataFrame. 

1514 

1515 Parameters 

1516 ---------- 

1517 sql : str 

1518 SQL query to be executed. 

1519 index_col : string, optional, default: None 

1520 Column name to use as index for the returned DataFrame object. 

1521 coerce_float : bool, default True 

1522 Attempt to convert values of non-string, non-numeric objects (like 

1523 decimal.Decimal) to floating point, useful for SQL result sets. 

1524 params : list, tuple or dict, optional, default: None 

1525 List of parameters to pass to execute method. The syntax used 

1526 to pass parameters is database driver dependent. Check your 

1527 database driver documentation for which of the five syntax styles, 

1528 described in PEP 249's paramstyle, is supported. 

1529 Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'} 

1530 parse_dates : list or dict, default: None 

1531 - List of column names to parse as dates. 

1532 - Dict of ``{column_name: format string}`` where format string is 

1533 strftime compatible in case of parsing string times, or is one of 

1534 (D, s, ns, ms, us) in case of parsing integer timestamps. 

1535 - Dict of ``{column_name: arg dict}``, where the arg dict 

1536 corresponds to the keyword arguments of 

1537 :func:`pandas.to_datetime` Especially useful with databases 

1538 without native Datetime support, such as SQLite. 

1539 chunksize : int, default None 

1540 If specified, return an iterator where `chunksize` is the number 

1541 of rows to include in each chunk. 

1542 dtype : Type name or dict of columns 

1543 Data type for data or columns. E.g. np.float64 or 

1544 {‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’} 

1545 

1546 .. versionadded:: 1.3.0 

1547 

1548 Returns 

1549 ------- 

1550 DataFrame 

1551 

1552 See Also 

1553 -------- 

1554 read_sql_table : Read SQL database table into a DataFrame. 

1555 read_sql 

1556 

1557 """ 

1558 args = _convert_params(sql, params) 

1559 

1560 result = self.execute(*args) 

1561 columns = result.keys() 

1562 

1563 if chunksize is not None: 

1564 return self._query_iterator( 

1565 result, 

1566 chunksize, 

1567 columns, 

1568 index_col=index_col, 

1569 coerce_float=coerce_float, 

1570 parse_dates=parse_dates, 

1571 dtype=dtype, 

1572 ) 

1573 else: 

1574 data = result.fetchall() 

1575 frame = _wrap_result( 

1576 data, 

1577 columns, 

1578 index_col=index_col, 

1579 coerce_float=coerce_float, 

1580 parse_dates=parse_dates, 

1581 dtype=dtype, 

1582 ) 

1583 return frame 

1584 

1585 read_sql = read_query 

1586 

1587 def prep_table( 

1588 self, 

1589 frame, 

1590 name, 

1591 if_exists="fail", 

1592 index=True, 

1593 index_label=None, 

1594 schema=None, 

1595 dtype: DtypeArg | None = None, 

1596 ) -> SQLTable: 

1597 """ 

1598 Prepares table in the database for data insertion. Creates it if needed, etc. 

1599 """ 

1600 if dtype: 

1601 if not is_dict_like(dtype): 

1602 # error: Value expression in dictionary comprehension has incompatible 

1603 # type "Union[ExtensionDtype, str, dtype[Any], Type[object], 

1604 # Dict[Hashable, Union[ExtensionDtype, Union[str, dtype[Any]], 

1605 # Type[str], Type[float], Type[int], Type[complex], Type[bool], 

1606 # Type[object]]]]"; expected type "Union[ExtensionDtype, str, 

1607 # dtype[Any], Type[object]]" 

1608 dtype = {col_name: dtype for col_name in frame} # type: ignore[misc] 

1609 else: 

1610 dtype = cast(dict, dtype) 

1611 

1612 from sqlalchemy.types import ( 

1613 TypeEngine, 

1614 to_instance, 

1615 ) 

1616 

1617 for col, my_type in dtype.items(): 

1618 if not isinstance(to_instance(my_type), TypeEngine): 

1619 raise ValueError(f"The type of {col} is not a SQLAlchemy type") 

1620 

1621 table = SQLTable( 

1622 name, 

1623 self, 

1624 frame=frame, 

1625 index=index, 

1626 if_exists=if_exists, 

1627 index_label=index_label, 

1628 schema=schema, 

1629 dtype=dtype, 

1630 ) 

1631 table.create() 

1632 return table 

1633 

1634 def check_case_sensitive( 

1635 self, 

1636 name: str, 

1637 schema: str | None, 

1638 ) -> None: 

1639 """ 

1640 Checks table name for issues with case-sensitivity. 

1641 Method is called after data is inserted. 

1642 """ 

1643 if not name.isdigit() and not name.islower(): 

1644 # check for potentially case sensitivity issues (GH7815) 

1645 # Only check when name is not a number and name is not lower case 

1646 from sqlalchemy import inspect as sqlalchemy_inspect 

1647 

1648 with self.connectable.connect() as conn: 

1649 insp = sqlalchemy_inspect(conn) 

1650 table_names = insp.get_table_names(schema=schema or self.meta.schema) 

1651 if name not in table_names: 

1652 msg = ( 

1653 f"The provided table name '{name}' is not found exactly as " 

1654 "such in the database after writing the table, possibly " 

1655 "due to case sensitivity issues. Consider using lower " 

1656 "case table names." 

1657 ) 

1658 warnings.warn( 

1659 msg, 

1660 UserWarning, 

1661 stacklevel=find_stack_level(), 

1662 ) 

1663 

1664 def to_sql( 

1665 self, 

1666 frame, 

1667 name: str, 

1668 if_exists: str = "fail", 

1669 index: bool = True, 

1670 index_label=None, 

1671 schema: str | None = None, 

1672 chunksize=None, 

1673 dtype: DtypeArg | None = None, 

1674 method=None, 

1675 engine="auto", 

1676 **engine_kwargs, 

1677 ) -> int | None: 

1678 """ 

1679 Write records stored in a DataFrame to a SQL database. 

1680 

1681 Parameters 

1682 ---------- 

1683 frame : DataFrame 

1684 name : string 

1685 Name of SQL table. 

1686 if_exists : {'fail', 'replace', 'append'}, default 'fail' 

1687 - fail: If table exists, do nothing. 

1688 - replace: If table exists, drop it, recreate it, and insert data. 

1689 - append: If table exists, insert data. Create if does not exist. 

1690 index : boolean, default True 

1691 Write DataFrame index as a column. 

1692 index_label : string or sequence, default None 

1693 Column label for index column(s). If None is given (default) and 

1694 `index` is True, then the index names are used. 

1695 A sequence should be given if the DataFrame uses MultiIndex. 

1696 schema : string, default None 

1697 Name of SQL schema in database to write to (if database flavor 

1698 supports this). If specified, this overwrites the default 

1699 schema of the SQLDatabase object. 

1700 chunksize : int, default None 

1701 If not None, then rows will be written in batches of this size at a 

1702 time. If None, all rows will be written at once. 

1703 dtype : single type or dict of column name to SQL type, default None 

1704 Optional specifying the datatype for columns. The SQL type should 

1705 be a SQLAlchemy type. If all columns are of the same type, one 

1706 single value can be used. 

1707 method : {None', 'multi', callable}, default None 

1708 Controls the SQL insertion clause used: 

1709 

1710 * None : Uses standard SQL ``INSERT`` clause (one per row). 

1711 * 'multi': Pass multiple values in a single ``INSERT`` clause. 

1712 * callable with signature ``(pd_table, conn, keys, data_iter)``. 

1713 

1714 Details and a sample callable implementation can be found in the 

1715 section :ref:`insert method <io.sql.method>`. 

1716 engine : {'auto', 'sqlalchemy'}, default 'auto' 

1717 SQL engine library to use. If 'auto', then the option 

1718 ``io.sql.engine`` is used. The default ``io.sql.engine`` 

1719 behavior is 'sqlalchemy' 

1720 

1721 .. versionadded:: 1.3.0 

1722 

1723 **engine_kwargs 

1724 Any additional kwargs are passed to the engine. 

1725 """ 

1726 sql_engine = get_engine(engine) 

1727 

1728 table = self.prep_table( 

1729 frame=frame, 

1730 name=name, 

1731 if_exists=if_exists, 

1732 index=index, 

1733 index_label=index_label, 

1734 schema=schema, 

1735 dtype=dtype, 

1736 ) 

1737 

1738 total_inserted = sql_engine.insert_records( 

1739 table=table, 

1740 con=self.connectable, 

1741 frame=frame, 

1742 name=name, 

1743 index=index, 

1744 schema=schema, 

1745 chunksize=chunksize, 

1746 method=method, 

1747 **engine_kwargs, 

1748 ) 

1749 

1750 self.check_case_sensitive(name=name, schema=schema) 

1751 return total_inserted 

1752 

1753 @property 

1754 def tables(self): 

1755 return self.meta.tables 

1756 

1757 def has_table(self, name: str, schema: str | None = None): 

1758 from sqlalchemy import inspect as sqlalchemy_inspect 

1759 

1760 insp = sqlalchemy_inspect(self.connectable) 

1761 return insp.has_table(name, schema or self.meta.schema) 

1762 

1763 def get_table(self, table_name: str, schema: str | None = None) -> Table: 

1764 from sqlalchemy import ( 

1765 Numeric, 

1766 Table, 

1767 ) 

1768 

1769 schema = schema or self.meta.schema 

1770 tbl = Table( 

1771 table_name, self.meta, autoload_with=self.connectable, schema=schema 

1772 ) 

1773 for column in tbl.columns: 

1774 if isinstance(column.type, Numeric): 

1775 column.type.asdecimal = False 

1776 return tbl 

1777 

1778 def drop_table(self, table_name: str, schema: str | None = None) -> None: 

1779 schema = schema or self.meta.schema 

1780 if self.has_table(table_name, schema): 

1781 self.meta.reflect(bind=self.connectable, only=[table_name], schema=schema) 

1782 self.get_table(table_name, schema).drop(bind=self.connectable) 

1783 self.meta.clear() 

1784 

1785 def _create_sql_schema( 

1786 self, 

1787 frame: DataFrame, 

1788 table_name: str, 

1789 keys: list[str] | None = None, 

1790 dtype: DtypeArg | None = None, 

1791 schema: str | None = None, 

1792 ): 

1793 table = SQLTable( 

1794 table_name, 

1795 self, 

1796 frame=frame, 

1797 index=False, 

1798 keys=keys, 

1799 dtype=dtype, 

1800 schema=schema, 

1801 ) 

1802 return str(table.sql_schema()) 

1803 

1804 

1805# ---- SQL without SQLAlchemy --- 

1806# sqlite-specific sql strings and handler class 

1807# dictionary used for readability purposes 

1808_SQL_TYPES = { 

1809 "string": "TEXT", 

1810 "floating": "REAL", 

1811 "integer": "INTEGER", 

1812 "datetime": "TIMESTAMP", 

1813 "date": "DATE", 

1814 "time": "TIME", 

1815 "boolean": "INTEGER", 

1816} 

1817 

1818 

1819def _get_unicode_name(name): 

1820 try: 

1821 uname = str(name).encode("utf-8", "strict").decode("utf-8") 

1822 except UnicodeError as err: 

1823 raise ValueError(f"Cannot convert identifier to UTF-8: '{name}'") from err 

1824 return uname 

1825 

1826 

1827def _get_valid_sqlite_name(name): 

1828 # See https://stackoverflow.com/questions/6514274/how-do-you-escape-strings\ 

1829 # -for-sqlite-table-column-names-in-python 

1830 # Ensure the string can be encoded as UTF-8. 

1831 # Ensure the string does not include any NUL characters. 

1832 # Replace all " with "". 

1833 # Wrap the entire thing in double quotes. 

1834 

1835 uname = _get_unicode_name(name) 

1836 if not len(uname): 

1837 raise ValueError("Empty table or column name specified") 

1838 

1839 nul_index = uname.find("\x00") 

1840 if nul_index >= 0: 

1841 raise ValueError("SQLite identifier cannot contain NULs") 

1842 return '"' + uname.replace('"', '""') + '"' 

1843 

1844 

1845class SQLiteTable(SQLTable): 

1846 """ 

1847 Patch the SQLTable for fallback support. 

1848 Instead of a table variable just use the Create Table statement. 

1849 """ 

1850 

1851 def __init__(self, *args, **kwargs) -> None: 

1852 # GH 8341 

1853 # register an adapter callable for datetime.time object 

1854 import sqlite3 

1855 

1856 # this will transform time(12,34,56,789) into '12:34:56.000789' 

1857 # (this is what sqlalchemy does) 

1858 def _adapt_time(t): 

1859 # This is faster than strftime 

1860 return f"{t.hour:02d}:{t.minute:02d}:{t.second:02d}.{t.microsecond:06d}" 

1861 

1862 sqlite3.register_adapter(time, _adapt_time) 

1863 super().__init__(*args, **kwargs) 

1864 

1865 def sql_schema(self) -> str: 

1866 return str(";\n".join(self.table)) 

1867 

1868 def _execute_create(self): 

1869 with self.pd_sql.run_transaction() as conn: 

1870 for stmt in self.table: 

1871 conn.execute(stmt) 

1872 

1873 def insert_statement(self, *, num_rows: int) -> str: 

1874 names = list(map(str, self.frame.columns)) 

1875 wld = "?" # wildcard char 

1876 escape = _get_valid_sqlite_name 

1877 

1878 if self.index is not None: 

1879 for idx in self.index[::-1]: 

1880 names.insert(0, idx) 

1881 

1882 bracketed_names = [escape(column) for column in names] 

1883 col_names = ",".join(bracketed_names) 

1884 

1885 row_wildcards = ",".join([wld] * len(names)) 

1886 wildcards = ",".join([f"({row_wildcards})" for _ in range(num_rows)]) 

1887 insert_statement = ( 

1888 f"INSERT INTO {escape(self.name)} ({col_names}) VALUES {wildcards}" 

1889 ) 

1890 return insert_statement 

1891 

1892 def _execute_insert(self, conn, keys, data_iter) -> int: 

1893 data_list = list(data_iter) 

1894 conn.executemany(self.insert_statement(num_rows=1), data_list) 

1895 return conn.rowcount 

1896 

1897 def _execute_insert_multi(self, conn, keys, data_iter) -> int: 

1898 data_list = list(data_iter) 

1899 flattened_data = [x for row in data_list for x in row] 

1900 conn.execute(self.insert_statement(num_rows=len(data_list)), flattened_data) 

1901 return conn.rowcount 

1902 

1903 def _create_table_setup(self): 

1904 """ 

1905 Return a list of SQL statements that creates a table reflecting the 

1906 structure of a DataFrame. The first entry will be a CREATE TABLE 

1907 statement while the rest will be CREATE INDEX statements. 

1908 """ 

1909 column_names_and_types = self._get_column_names_and_types(self._sql_type_name) 

1910 escape = _get_valid_sqlite_name 

1911 

1912 create_tbl_stmts = [ 

1913 escape(cname) + " " + ctype for cname, ctype, _ in column_names_and_types 

1914 ] 

1915 

1916 if self.keys is not None and len(self.keys): 

1917 if not is_list_like(self.keys): 

1918 keys = [self.keys] 

1919 else: 

1920 keys = self.keys 

1921 cnames_br = ", ".join([escape(c) for c in keys]) 

1922 create_tbl_stmts.append( 

1923 f"CONSTRAINT {self.name}_pk PRIMARY KEY ({cnames_br})" 

1924 ) 

1925 if self.schema: 

1926 schema_name = self.schema + "." 

1927 else: 

1928 schema_name = "" 

1929 create_stmts = [ 

1930 "CREATE TABLE " 

1931 + schema_name 

1932 + escape(self.name) 

1933 + " (\n" 

1934 + ",\n ".join(create_tbl_stmts) 

1935 + "\n)" 

1936 ] 

1937 

1938 ix_cols = [cname for cname, _, is_index in column_names_and_types if is_index] 

1939 if len(ix_cols): 

1940 cnames = "_".join(ix_cols) 

1941 cnames_br = ",".join([escape(c) for c in ix_cols]) 

1942 create_stmts.append( 

1943 "CREATE INDEX " 

1944 + escape("ix_" + self.name + "_" + cnames) 

1945 + "ON " 

1946 + escape(self.name) 

1947 + " (" 

1948 + cnames_br 

1949 + ")" 

1950 ) 

1951 

1952 return create_stmts 

1953 

1954 def _sql_type_name(self, col): 

1955 dtype: DtypeArg = self.dtype or {} 

1956 if is_dict_like(dtype): 

1957 dtype = cast(dict, dtype) 

1958 if col.name in dtype: 

1959 return dtype[col.name] 

1960 

1961 # Infer type of column, while ignoring missing values. 

1962 # Needed for inserting typed data containing NULLs, GH 8778. 

1963 col_type = lib.infer_dtype(col, skipna=True) 

1964 

1965 if col_type == "timedelta64": 

1966 warnings.warn( 

1967 "the 'timedelta' type is not supported, and will be " 

1968 "written as integer values (ns frequency) to the database.", 

1969 UserWarning, 

1970 stacklevel=find_stack_level(), 

1971 ) 

1972 col_type = "integer" 

1973 

1974 elif col_type == "datetime64": 

1975 col_type = "datetime" 

1976 

1977 elif col_type == "empty": 

1978 col_type = "string" 

1979 

1980 elif col_type == "complex": 

1981 raise ValueError("Complex datatypes not supported") 

1982 

1983 if col_type not in _SQL_TYPES: 

1984 col_type = "string" 

1985 

1986 return _SQL_TYPES[col_type] 

1987 

1988 

1989class SQLiteDatabase(PandasSQL): 

1990 """ 

1991 Version of SQLDatabase to support SQLite connections (fallback without 

1992 SQLAlchemy). This should only be used internally. 

1993 

1994 Parameters 

1995 ---------- 

1996 con : sqlite connection object 

1997 

1998 """ 

1999 

2000 def __init__(self, con) -> None: 

2001 self.con = con 

2002 

2003 @contextmanager 

2004 def run_transaction(self): 

2005 cur = self.con.cursor() 

2006 try: 

2007 yield cur 

2008 self.con.commit() 

2009 except Exception: 

2010 self.con.rollback() 

2011 raise 

2012 finally: 

2013 cur.close() 

2014 

2015 def execute(self, *args, **kwargs): 

2016 cur = self.con.cursor() 

2017 try: 

2018 cur.execute(*args, **kwargs) 

2019 return cur 

2020 except Exception as exc: 

2021 try: 

2022 self.con.rollback() 

2023 except Exception as inner_exc: # pragma: no cover 

2024 ex = DatabaseError( 

2025 f"Execution failed on sql: {args[0]}\n{exc}\nunable to rollback" 

2026 ) 

2027 raise ex from inner_exc 

2028 

2029 ex = DatabaseError(f"Execution failed on sql '{args[0]}': {exc}") 

2030 raise ex from exc 

2031 

2032 @staticmethod 

2033 def _query_iterator( 

2034 cursor, 

2035 chunksize: int, 

2036 columns, 

2037 index_col=None, 

2038 coerce_float: bool = True, 

2039 parse_dates=None, 

2040 dtype: DtypeArg | None = None, 

2041 ): 

2042 """Return generator through chunked result set""" 

2043 has_read_data = False 

2044 while True: 

2045 data = cursor.fetchmany(chunksize) 

2046 if type(data) == tuple: 

2047 data = list(data) 

2048 if not data: 

2049 cursor.close() 

2050 if not has_read_data: 

2051 yield DataFrame.from_records( 

2052 [], columns=columns, coerce_float=coerce_float 

2053 ) 

2054 break 

2055 else: 

2056 has_read_data = True 

2057 yield _wrap_result( 

2058 data, 

2059 columns, 

2060 index_col=index_col, 

2061 coerce_float=coerce_float, 

2062 parse_dates=parse_dates, 

2063 dtype=dtype, 

2064 ) 

2065 

2066 def read_query( 

2067 self, 

2068 sql, 

2069 index_col=None, 

2070 coerce_float: bool = True, 

2071 params=None, 

2072 parse_dates=None, 

2073 chunksize: int | None = None, 

2074 dtype: DtypeArg | None = None, 

2075 ) -> DataFrame | Iterator[DataFrame]: 

2076 

2077 args = _convert_params(sql, params) 

2078 cursor = self.execute(*args) 

2079 columns = [col_desc[0] for col_desc in cursor.description] 

2080 

2081 if chunksize is not None: 

2082 return self._query_iterator( 

2083 cursor, 

2084 chunksize, 

2085 columns, 

2086 index_col=index_col, 

2087 coerce_float=coerce_float, 

2088 parse_dates=parse_dates, 

2089 dtype=dtype, 

2090 ) 

2091 else: 

2092 data = self._fetchall_as_list(cursor) 

2093 cursor.close() 

2094 

2095 frame = _wrap_result( 

2096 data, 

2097 columns, 

2098 index_col=index_col, 

2099 coerce_float=coerce_float, 

2100 parse_dates=parse_dates, 

2101 dtype=dtype, 

2102 ) 

2103 return frame 

2104 

2105 def _fetchall_as_list(self, cur): 

2106 result = cur.fetchall() 

2107 if not isinstance(result, list): 

2108 result = list(result) 

2109 return result 

2110 

2111 def to_sql( 

2112 self, 

2113 frame, 

2114 name, 

2115 if_exists: str = "fail", 

2116 index: bool = True, 

2117 index_label=None, 

2118 schema=None, 

2119 chunksize=None, 

2120 dtype: DtypeArg | None = None, 

2121 method=None, 

2122 **kwargs, 

2123 ) -> int | None: 

2124 """ 

2125 Write records stored in a DataFrame to a SQL database. 

2126 

2127 Parameters 

2128 ---------- 

2129 frame: DataFrame 

2130 name: string 

2131 Name of SQL table. 

2132 if_exists: {'fail', 'replace', 'append'}, default 'fail' 

2133 fail: If table exists, do nothing. 

2134 replace: If table exists, drop it, recreate it, and insert data. 

2135 append: If table exists, insert data. Create if it does not exist. 

2136 index : bool, default True 

2137 Write DataFrame index as a column 

2138 index_label : string or sequence, default None 

2139 Column label for index column(s). If None is given (default) and 

2140 `index` is True, then the index names are used. 

2141 A sequence should be given if the DataFrame uses MultiIndex. 

2142 schema : string, default None 

2143 Ignored parameter included for compatibility with SQLAlchemy 

2144 version of ``to_sql``. 

2145 chunksize : int, default None 

2146 If not None, then rows will be written in batches of this 

2147 size at a time. If None, all rows will be written at once. 

2148 dtype : single type or dict of column name to SQL type, default None 

2149 Optional specifying the datatype for columns. The SQL type should 

2150 be a string. If all columns are of the same type, one single value 

2151 can be used. 

2152 method : {None, 'multi', callable}, default None 

2153 Controls the SQL insertion clause used: 

2154 

2155 * None : Uses standard SQL ``INSERT`` clause (one per row). 

2156 * 'multi': Pass multiple values in a single ``INSERT`` clause. 

2157 * callable with signature ``(pd_table, conn, keys, data_iter)``. 

2158 

2159 Details and a sample callable implementation can be found in the 

2160 section :ref:`insert method <io.sql.method>`. 

2161 """ 

2162 if dtype: 

2163 if not is_dict_like(dtype): 

2164 # error: Value expression in dictionary comprehension has incompatible 

2165 # type "Union[ExtensionDtype, str, dtype[Any], Type[object], 

2166 # Dict[Hashable, Union[ExtensionDtype, Union[str, dtype[Any]], 

2167 # Type[str], Type[float], Type[int], Type[complex], Type[bool], 

2168 # Type[object]]]]"; expected type "Union[ExtensionDtype, str, 

2169 # dtype[Any], Type[object]]" 

2170 dtype = {col_name: dtype for col_name in frame} # type: ignore[misc] 

2171 else: 

2172 dtype = cast(dict, dtype) 

2173 

2174 for col, my_type in dtype.items(): 

2175 if not isinstance(my_type, str): 

2176 raise ValueError(f"{col} ({my_type}) not a string") 

2177 

2178 table = SQLiteTable( 

2179 name, 

2180 self, 

2181 frame=frame, 

2182 index=index, 

2183 if_exists=if_exists, 

2184 index_label=index_label, 

2185 dtype=dtype, 

2186 ) 

2187 table.create() 

2188 return table.insert(chunksize, method) 

2189 

2190 def has_table(self, name: str, schema: str | None = None) -> bool: 

2191 

2192 wld = "?" 

2193 query = f"SELECT name FROM sqlite_master WHERE type='table' AND name={wld};" 

2194 

2195 return len(self.execute(query, [name]).fetchall()) > 0 

2196 

2197 def get_table(self, table_name: str, schema: str | None = None) -> None: 

2198 return None # not supported in fallback mode 

2199 

2200 def drop_table(self, name: str, schema: str | None = None) -> None: 

2201 drop_sql = f"DROP TABLE {_get_valid_sqlite_name(name)}" 

2202 self.execute(drop_sql) 

2203 

2204 def _create_sql_schema( 

2205 self, 

2206 frame, 

2207 table_name: str, 

2208 keys=None, 

2209 dtype: DtypeArg | None = None, 

2210 schema: str | None = None, 

2211 ): 

2212 table = SQLiteTable( 

2213 table_name, 

2214 self, 

2215 frame=frame, 

2216 index=False, 

2217 keys=keys, 

2218 dtype=dtype, 

2219 schema=schema, 

2220 ) 

2221 return str(table.sql_schema()) 

2222 

2223 

2224def get_schema( 

2225 frame, 

2226 name: str, 

2227 keys=None, 

2228 con=None, 

2229 dtype: DtypeArg | None = None, 

2230 schema: str | None = None, 

2231) -> str: 

2232 """ 

2233 Get the SQL db table schema for the given frame. 

2234 

2235 Parameters 

2236 ---------- 

2237 frame : DataFrame 

2238 name : str 

2239 name of SQL table 

2240 keys : string or sequence, default: None 

2241 columns to use a primary key 

2242 con: an open SQL database connection object or a SQLAlchemy connectable 

2243 Using SQLAlchemy makes it possible to use any DB supported by that 

2244 library, default: None 

2245 If a DBAPI2 object, only sqlite3 is supported. 

2246 dtype : dict of column name to SQL type, default None 

2247 Optional specifying the datatype for columns. The SQL type should 

2248 be a SQLAlchemy type, or a string for sqlite3 fallback connection. 

2249 schema: str, default: None 

2250 Optional specifying the schema to be used in creating the table. 

2251 

2252 .. versionadded:: 1.2.0 

2253 """ 

2254 pandas_sql = pandasSQL_builder(con=con) 

2255 return pandas_sql._create_sql_schema( 

2256 frame, name, keys=keys, dtype=dtype, schema=schema 

2257 )