Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/internals/concat.py: 9%

341 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3import copy 

4import itertools 

5from typing import ( 

6 TYPE_CHECKING, 

7 Sequence, 

8 cast, 

9) 

10 

11import numpy as np 

12 

13from pandas._libs import ( 

14 NaT, 

15 internals as libinternals, 

16) 

17from pandas._libs.missing import NA 

18from pandas._typing import ( 

19 ArrayLike, 

20 DtypeObj, 

21 Manager, 

22 Shape, 

23) 

24from pandas.util._decorators import cache_readonly 

25 

26from pandas.core.dtypes.cast import ( 

27 ensure_dtype_can_hold_na, 

28 find_common_type, 

29) 

30from pandas.core.dtypes.common import ( 

31 is_1d_only_ea_dtype, 

32 is_dtype_equal, 

33 is_scalar, 

34 needs_i8_conversion, 

35) 

36from pandas.core.dtypes.concat import ( 

37 cast_to_common_type, 

38 concat_compat, 

39) 

40from pandas.core.dtypes.dtypes import ( 

41 DatetimeTZDtype, 

42 ExtensionDtype, 

43) 

44from pandas.core.dtypes.missing import ( 

45 is_valid_na_for_dtype, 

46 isna, 

47 isna_all, 

48) 

49 

50import pandas.core.algorithms as algos 

51from pandas.core.arrays import ( 

52 DatetimeArray, 

53 ExtensionArray, 

54) 

55from pandas.core.arrays.sparse import SparseDtype 

56from pandas.core.construction import ensure_wrapped_if_datetimelike 

57from pandas.core.internals.array_manager import ( 

58 ArrayManager, 

59 NullArrayProxy, 

60) 

61from pandas.core.internals.blocks import ( 

62 ensure_block_shape, 

63 new_block_2d, 

64) 

65from pandas.core.internals.managers import BlockManager 

66 

67if TYPE_CHECKING: 67 ↛ 68line 67 didn't jump to line 68, because the condition on line 67 was never true

68 from pandas import Index 

69 from pandas.core.internals.blocks import Block 

70 

71 

72def _concatenate_array_managers( 

73 mgrs_indexers, axes: list[Index], concat_axis: int, copy: bool 

74) -> Manager: 

75 """ 

76 Concatenate array managers into one. 

77 

78 Parameters 

79 ---------- 

80 mgrs_indexers : list of (ArrayManager, {axis: indexer,...}) tuples 

81 axes : list of Index 

82 concat_axis : int 

83 copy : bool 

84 

85 Returns 

86 ------- 

87 ArrayManager 

88 """ 

89 # reindex all arrays 

90 mgrs = [] 

91 for mgr, indexers in mgrs_indexers: 

92 axis1_made_copy = False 

93 for ax, indexer in indexers.items(): 

94 mgr = mgr.reindex_indexer( 

95 axes[ax], indexer, axis=ax, allow_dups=True, use_na_proxy=True 

96 ) 

97 if ax == 1 and indexer is not None: 

98 axis1_made_copy = True 

99 if copy and concat_axis == 0 and not axis1_made_copy: 

100 # for concat_axis 1 we will always get a copy through concat_arrays 

101 mgr = mgr.copy() 

102 mgrs.append(mgr) 

103 

104 if concat_axis == 1: 

105 # concatting along the rows -> concat the reindexed arrays 

106 # TODO(ArrayManager) doesn't yet preserve the correct dtype 

107 arrays = [ 

108 concat_arrays([mgrs[i].arrays[j] for i in range(len(mgrs))]) 

109 for j in range(len(mgrs[0].arrays)) 

110 ] 

111 else: 

112 # concatting along the columns -> combine reindexed arrays in a single manager 

113 assert concat_axis == 0 

114 arrays = list(itertools.chain.from_iterable([mgr.arrays for mgr in mgrs])) 

115 

116 new_mgr = ArrayManager(arrays, [axes[1], axes[0]], verify_integrity=False) 

117 return new_mgr 

118 

119 

120def concat_arrays(to_concat: list) -> ArrayLike: 

121 """ 

122 Alternative for concat_compat but specialized for use in the ArrayManager. 

123 

124 Differences: only deals with 1D arrays (no axis keyword), assumes 

125 ensure_wrapped_if_datetimelike and does not skip empty arrays to determine 

126 the dtype. 

127 In addition ensures that all NullArrayProxies get replaced with actual 

128 arrays. 

129 

130 Parameters 

131 ---------- 

132 to_concat : list of arrays 

133 

134 Returns 

135 ------- 

136 np.ndarray or ExtensionArray 

137 """ 

138 # ignore the all-NA proxies to determine the resulting dtype 

139 to_concat_no_proxy = [x for x in to_concat if not isinstance(x, NullArrayProxy)] 

140 

141 dtypes = {x.dtype for x in to_concat_no_proxy} 

142 single_dtype = len(dtypes) == 1 

143 

144 if single_dtype: 

145 target_dtype = to_concat_no_proxy[0].dtype 

146 elif all(x.kind in ["i", "u", "b"] and isinstance(x, np.dtype) for x in dtypes): 

147 # GH#42092 

148 target_dtype = np.find_common_type(list(dtypes), []) 

149 else: 

150 target_dtype = find_common_type([arr.dtype for arr in to_concat_no_proxy]) 

151 

152 to_concat = [ 

153 arr.to_array(target_dtype) 

154 if isinstance(arr, NullArrayProxy) 

155 else cast_to_common_type(arr, target_dtype) 

156 for arr in to_concat 

157 ] 

158 

159 if isinstance(to_concat[0], ExtensionArray): 

160 cls = type(to_concat[0]) 

161 return cls._concat_same_type(to_concat) 

162 

163 result = np.concatenate(to_concat) 

164 

165 # TODO decide on exact behaviour (we shouldn't do this only for empty result) 

166 # see https://github.com/pandas-dev/pandas/issues/39817 

167 if len(result) == 0: 

168 # all empties -> check for bool to not coerce to float 

169 kinds = {obj.dtype.kind for obj in to_concat_no_proxy} 

170 if len(kinds) != 1: 

171 if "b" in kinds: 

172 result = result.astype(object) 

173 return result 

174 

175 

176def concatenate_managers( 

177 mgrs_indexers, axes: list[Index], concat_axis: int, copy: bool 

178) -> Manager: 

179 """ 

180 Concatenate block managers into one. 

181 

182 Parameters 

183 ---------- 

184 mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples 

185 axes : list of Index 

186 concat_axis : int 

187 copy : bool 

188 

189 Returns 

190 ------- 

191 BlockManager 

192 """ 

193 # TODO(ArrayManager) this assumes that all managers are of the same type 

194 if isinstance(mgrs_indexers[0][0], ArrayManager): 

195 return _concatenate_array_managers(mgrs_indexers, axes, concat_axis, copy) 

196 

197 mgrs_indexers = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers) 

198 

199 concat_plans = [ 

200 _get_mgr_concatenation_plan(mgr, indexers) for mgr, indexers in mgrs_indexers 

201 ] 

202 concat_plan = _combine_concat_plans(concat_plans, concat_axis) 

203 blocks = [] 

204 

205 for placement, join_units in concat_plan: 

206 unit = join_units[0] 

207 blk = unit.block 

208 

209 if len(join_units) == 1 and not join_units[0].indexers: 

210 values = blk.values 

211 if copy: 

212 values = values.copy() 

213 else: 

214 values = values.view() 

215 fastpath = True 

216 elif _is_uniform_join_units(join_units): 

217 vals = [ju.block.values for ju in join_units] 

218 

219 if not blk.is_extension: 

220 # _is_uniform_join_units ensures a single dtype, so 

221 # we can use np.concatenate, which is more performant 

222 # than concat_compat 

223 values = np.concatenate(vals, axis=1) 

224 else: 

225 # TODO(EA2D): special-casing not needed with 2D EAs 

226 values = concat_compat(vals, axis=1) 

227 values = ensure_block_shape(values, ndim=2) 

228 

229 values = ensure_wrapped_if_datetimelike(values) 

230 

231 fastpath = blk.values.dtype == values.dtype 

232 else: 

233 values = _concatenate_join_units(join_units, concat_axis, copy=copy) 

234 fastpath = False 

235 

236 if fastpath: 

237 b = blk.make_block_same_class(values, placement=placement) 

238 else: 

239 b = new_block_2d(values, placement=placement) 

240 

241 blocks.append(b) 

242 

243 return BlockManager(tuple(blocks), axes) 

244 

245 

246def _maybe_reindex_columns_na_proxy( 

247 axes: list[Index], mgrs_indexers: list[tuple[BlockManager, dict[int, np.ndarray]]] 

248) -> list[tuple[BlockManager, dict[int, np.ndarray]]]: 

249 """ 

250 Reindex along columns so that all of the BlockManagers being concatenated 

251 have matching columns. 

252 

253 Columns added in this reindexing have dtype=np.void, indicating they 

254 should be ignored when choosing a column's final dtype. 

255 """ 

256 new_mgrs_indexers = [] 

257 for mgr, indexers in mgrs_indexers: 

258 # We only reindex for axis=0 (i.e. columns), as this can be done cheaply 

259 if 0 in indexers: 

260 new_mgr = mgr.reindex_indexer( 

261 axes[0], 

262 indexers[0], 

263 axis=0, 

264 copy=False, 

265 only_slice=True, 

266 allow_dups=True, 

267 use_na_proxy=True, 

268 ) 

269 new_indexers = indexers.copy() 

270 del new_indexers[0] 

271 new_mgrs_indexers.append((new_mgr, new_indexers)) 

272 else: 

273 new_mgrs_indexers.append((mgr, indexers)) 

274 

275 return new_mgrs_indexers 

276 

277 

278def _get_mgr_concatenation_plan(mgr: BlockManager, indexers: dict[int, np.ndarray]): 

279 """ 

280 Construct concatenation plan for given block manager and indexers. 

281 

282 Parameters 

283 ---------- 

284 mgr : BlockManager 

285 indexers : dict of {axis: indexer} 

286 

287 Returns 

288 ------- 

289 plan : list of (BlockPlacement, JoinUnit) tuples 

290 

291 """ 

292 # Calculate post-reindex shape , save for item axis which will be separate 

293 # for each block anyway. 

294 mgr_shape_list = list(mgr.shape) 

295 for ax, indexer in indexers.items(): 

296 mgr_shape_list[ax] = len(indexer) 

297 mgr_shape = tuple(mgr_shape_list) 

298 

299 assert 0 not in indexers 

300 

301 if mgr.is_single_block: 

302 blk = mgr.blocks[0] 

303 return [(blk.mgr_locs, JoinUnit(blk, mgr_shape, indexers))] 

304 

305 blknos = mgr.blknos 

306 blklocs = mgr.blklocs 

307 

308 plan = [] 

309 for blkno, placements in libinternals.get_blkno_placements(blknos, group=False): 

310 

311 assert placements.is_slice_like 

312 assert blkno != -1 

313 

314 join_unit_indexers = indexers.copy() 

315 

316 shape_list = list(mgr_shape) 

317 shape_list[0] = len(placements) 

318 shape = tuple(shape_list) 

319 

320 blk = mgr.blocks[blkno] 

321 ax0_blk_indexer = blklocs[placements.indexer] 

322 

323 unit_no_ax0_reindexing = ( 

324 len(placements) == len(blk.mgr_locs) 

325 and 

326 # Fastpath detection of join unit not 

327 # needing to reindex its block: no ax0 

328 # reindexing took place and block 

329 # placement was sequential before. 

330 ( 

331 (blk.mgr_locs.is_slice_like and blk.mgr_locs.as_slice.step == 1) 

332 or 

333 # Slow-ish detection: all indexer locs 

334 # are sequential (and length match is 

335 # checked above). 

336 (np.diff(ax0_blk_indexer) == 1).all() 

337 ) 

338 ) 

339 

340 # Omit indexer if no item reindexing is required. 

341 if unit_no_ax0_reindexing: 

342 join_unit_indexers.pop(0, None) 

343 else: 

344 join_unit_indexers[0] = ax0_blk_indexer 

345 

346 unit = JoinUnit(blk, shape, join_unit_indexers) 

347 

348 plan.append((placements, unit)) 

349 

350 return plan 

351 

352 

353class JoinUnit: 

354 def __init__(self, block: Block, shape: Shape, indexers=None): 

355 # Passing shape explicitly is required for cases when block is None. 

356 # Note: block is None implies indexers is None, but not vice-versa 

357 if indexers is None: 

358 indexers = {} 

359 self.block = block 

360 self.indexers = indexers 

361 self.shape = shape 

362 

363 def __repr__(self) -> str: 

364 return f"{type(self).__name__}({repr(self.block)}, {self.indexers})" 

365 

366 @cache_readonly 

367 def needs_filling(self) -> bool: 

368 for indexer in self.indexers.values(): 

369 # FIXME: cache results of indexer == -1 checks. 

370 if (indexer == -1).any(): 

371 return True 

372 

373 return False 

374 

375 @cache_readonly 

376 def dtype(self) -> DtypeObj: 

377 blk = self.block 

378 if blk.values.dtype.kind == "V": 

379 raise AssertionError("Block is None, no dtype") 

380 

381 if not self.needs_filling: 

382 return blk.dtype 

383 return ensure_dtype_can_hold_na(blk.dtype) 

384 

385 def _is_valid_na_for(self, dtype: DtypeObj) -> bool: 

386 """ 

387 Check that we are all-NA of a type/dtype that is compatible with this dtype. 

388 Augments `self.is_na` with an additional check of the type of NA values. 

389 """ 

390 if not self.is_na: 

391 return False 

392 if self.block.dtype.kind == "V": 

393 return True 

394 

395 if self.dtype == object: 

396 values = self.block.values 

397 return all(is_valid_na_for_dtype(x, dtype) for x in values.ravel(order="K")) 

398 

399 na_value = self.block.fill_value 

400 if na_value is NaT and not is_dtype_equal(self.dtype, dtype): 

401 # e.g. we are dt64 and other is td64 

402 # fill_values match but we should not cast self.block.values to dtype 

403 # TODO: this will need updating if we ever have non-nano dt64/td64 

404 return False 

405 

406 if na_value is NA and needs_i8_conversion(dtype): 

407 # FIXME: kludge; test_append_empty_frame_with_timedelta64ns_nat 

408 # e.g. self.dtype == "Int64" and dtype is td64, we dont want 

409 # to consider these as matching 

410 return False 

411 

412 # TODO: better to use can_hold_element? 

413 return is_valid_na_for_dtype(na_value, dtype) 

414 

415 @cache_readonly 

416 def is_na(self) -> bool: 

417 blk = self.block 

418 if blk.dtype.kind == "V": 

419 return True 

420 

421 if not blk._can_hold_na: 

422 return False 

423 

424 values = blk.values 

425 if values.size == 0: 

426 return True 

427 if isinstance(values.dtype, SparseDtype): 

428 return False 

429 

430 if values.ndim == 1: 

431 # TODO(EA2D): no need for special case with 2D EAs 

432 val = values[0] 

433 if not is_scalar(val) or not isna(val): 

434 # ideally isna_all would do this short-circuiting 

435 return False 

436 return isna_all(values) 

437 else: 

438 val = values[0][0] 

439 if not is_scalar(val) or not isna(val): 

440 # ideally isna_all would do this short-circuiting 

441 return False 

442 return all(isna_all(row) for row in values) 

443 

444 def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: 

445 values: ArrayLike 

446 

447 if upcasted_na is None and self.block.dtype.kind != "V": 

448 # No upcasting is necessary 

449 fill_value = self.block.fill_value 

450 values = self.block.get_values() 

451 else: 

452 fill_value = upcasted_na 

453 

454 if self._is_valid_na_for(empty_dtype): 

455 # note: always holds when self.block.dtype.kind == "V" 

456 blk_dtype = self.block.dtype 

457 

458 if blk_dtype == np.dtype("object"): 

459 # we want to avoid filling with np.nan if we are 

460 # using None; we already know that we are all 

461 # nulls 

462 values = self.block.values.ravel(order="K") 

463 if len(values) and values[0] is None: 

464 fill_value = None 

465 

466 if isinstance(empty_dtype, DatetimeTZDtype): 

467 # NB: exclude e.g. pyarrow[dt64tz] dtypes 

468 i8values = np.full(self.shape, fill_value.value) 

469 return DatetimeArray(i8values, dtype=empty_dtype) 

470 

471 elif is_1d_only_ea_dtype(empty_dtype): 

472 if is_dtype_equal(blk_dtype, empty_dtype) and self.indexers: 

473 # avoid creating new empty array if we already have an array 

474 # with correct dtype that can be reindexed 

475 pass 

476 else: 

477 empty_dtype = cast(ExtensionDtype, empty_dtype) 

478 cls = empty_dtype.construct_array_type() 

479 

480 missing_arr = cls._from_sequence([], dtype=empty_dtype) 

481 ncols, nrows = self.shape 

482 assert ncols == 1, ncols 

483 empty_arr = -1 * np.ones((nrows,), dtype=np.intp) 

484 return missing_arr.take( 

485 empty_arr, allow_fill=True, fill_value=fill_value 

486 ) 

487 elif isinstance(empty_dtype, ExtensionDtype): 

488 # TODO: no tests get here, a handful would if we disabled 

489 # the dt64tz special-case above (which is faster) 

490 cls = empty_dtype.construct_array_type() 

491 missing_arr = cls._empty(shape=self.shape, dtype=empty_dtype) 

492 missing_arr[:] = fill_value 

493 return missing_arr 

494 else: 

495 # NB: we should never get here with empty_dtype integer or bool; 

496 # if we did, the missing_arr.fill would cast to gibberish 

497 missing_arr = np.empty(self.shape, dtype=empty_dtype) 

498 missing_arr.fill(fill_value) 

499 return missing_arr 

500 

501 if (not self.indexers) and (not self.block._can_consolidate): 

502 # preserve these for validation in concat_compat 

503 return self.block.values 

504 

505 if self.block.is_bool: 

506 # External code requested filling/upcasting, bool values must 

507 # be upcasted to object to avoid being upcasted to numeric. 

508 values = self.block.astype(np.dtype("object")).values 

509 else: 

510 # No dtype upcasting is done here, it will be performed during 

511 # concatenation itself. 

512 values = self.block.values 

513 

514 if not self.indexers: 

515 # If there's no indexing to be done, we want to signal outside 

516 # code that this array must be copied explicitly. This is done 

517 # by returning a view and checking `retval.base`. 

518 values = values.view() 

519 

520 else: 

521 for ax, indexer in self.indexers.items(): 

522 values = algos.take_nd(values, indexer, axis=ax) 

523 

524 return values 

525 

526 

527def _concatenate_join_units( 

528 join_units: list[JoinUnit], concat_axis: int, copy: bool 

529) -> ArrayLike: 

530 """ 

531 Concatenate values from several join units along selected axis. 

532 """ 

533 if concat_axis == 0 and len(join_units) > 1: 

534 # Concatenating join units along ax0 is handled in _merge_blocks. 

535 raise AssertionError("Concatenating join units along axis0") 

536 

537 empty_dtype = _get_empty_dtype(join_units) 

538 

539 has_none_blocks = any(unit.block.dtype.kind == "V" for unit in join_units) 

540 upcasted_na = _dtype_to_na_value(empty_dtype, has_none_blocks) 

541 

542 to_concat = [ 

543 ju.get_reindexed_values(empty_dtype=empty_dtype, upcasted_na=upcasted_na) 

544 for ju in join_units 

545 ] 

546 

547 if len(to_concat) == 1: 

548 # Only one block, nothing to concatenate. 

549 concat_values = to_concat[0] 

550 if copy: 

551 if isinstance(concat_values, np.ndarray): 

552 # non-reindexed (=not yet copied) arrays are made into a view 

553 # in JoinUnit.get_reindexed_values 

554 if concat_values.base is not None: 

555 concat_values = concat_values.copy() 

556 else: 

557 concat_values = concat_values.copy() 

558 

559 elif any(is_1d_only_ea_dtype(t.dtype) for t in to_concat): 

560 # TODO(EA2D): special case not needed if all EAs used HybridBlocks 

561 # NB: we are still assuming here that Hybrid blocks have shape (1, N) 

562 # concatting with at least one EA means we are concatting a single column 

563 # the non-EA values are 2D arrays with shape (1, n) 

564 

565 # error: No overload variant of "__getitem__" of "ExtensionArray" matches 

566 # argument type "Tuple[int, slice]" 

567 to_concat = [ 

568 t 

569 if is_1d_only_ea_dtype(t.dtype) 

570 else t[0, :] # type: ignore[call-overload] 

571 for t in to_concat 

572 ] 

573 concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True) 

574 concat_values = ensure_block_shape(concat_values, 2) 

575 

576 else: 

577 concat_values = concat_compat(to_concat, axis=concat_axis) 

578 

579 return concat_values 

580 

581 

582def _dtype_to_na_value(dtype: DtypeObj, has_none_blocks: bool): 

583 """ 

584 Find the NA value to go with this dtype. 

585 """ 

586 if isinstance(dtype, ExtensionDtype): 

587 return dtype.na_value 

588 elif dtype.kind in ["m", "M"]: 

589 return dtype.type("NaT") 

590 elif dtype.kind in ["f", "c"]: 

591 return dtype.type("NaN") 

592 elif dtype.kind == "b": 

593 # different from missing.na_value_for_dtype 

594 return None 

595 elif dtype.kind in ["i", "u"]: 

596 if not has_none_blocks: 

597 # different from missing.na_value_for_dtype 

598 return None 

599 return np.nan 

600 elif dtype.kind == "O": 

601 return np.nan 

602 raise NotImplementedError 

603 

604 

605def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> DtypeObj: 

606 """ 

607 Return dtype and N/A values to use when concatenating specified units. 

608 

609 Returned N/A value may be None which means there was no casting involved. 

610 

611 Returns 

612 ------- 

613 dtype 

614 """ 

615 if len(join_units) == 1: 

616 blk = join_units[0].block 

617 return blk.dtype 

618 

619 if _is_uniform_reindex(join_units): 

620 empty_dtype = join_units[0].block.dtype 

621 return empty_dtype 

622 

623 has_none_blocks = any(unit.block.dtype.kind == "V" for unit in join_units) 

624 

625 dtypes = [unit.dtype for unit in join_units if not unit.is_na] 

626 if not len(dtypes): 

627 dtypes = [unit.dtype for unit in join_units if unit.block.dtype.kind != "V"] 

628 

629 dtype = find_common_type(dtypes) 

630 if has_none_blocks: 

631 dtype = ensure_dtype_can_hold_na(dtype) 

632 return dtype 

633 

634 

635def _is_uniform_join_units(join_units: list[JoinUnit]) -> bool: 

636 """ 

637 Check if the join units consist of blocks of uniform type that can 

638 be concatenated using Block.concat_same_type instead of the generic 

639 _concatenate_join_units (which uses `concat_compat`). 

640 

641 """ 

642 first = join_units[0].block 

643 if first.dtype.kind == "V": 

644 return False 

645 return ( 

646 # exclude cases where a) ju.block is None or b) we have e.g. Int64+int64 

647 all(type(ju.block) is type(first) for ju in join_units) 

648 and 

649 # e.g. DatetimeLikeBlock can be dt64 or td64, but these are not uniform 

650 all( 

651 is_dtype_equal(ju.block.dtype, first.dtype) 

652 # GH#42092 we only want the dtype_equal check for non-numeric blocks 

653 # (for now, may change but that would need a deprecation) 

654 or ju.block.dtype.kind in ["b", "i", "u"] 

655 for ju in join_units 

656 ) 

657 and 

658 # no blocks that would get missing values (can lead to type upcasts) 

659 # unless we're an extension dtype. 

660 all(not ju.is_na or ju.block.is_extension for ju in join_units) 

661 and 

662 # no blocks with indexers (as then the dimensions do not fit) 

663 all(not ju.indexers for ju in join_units) 

664 and 

665 # only use this path when there is something to concatenate 

666 len(join_units) > 1 

667 ) 

668 

669 

670def _is_uniform_reindex(join_units) -> bool: 

671 return ( 

672 # TODO: should this be ju.block._can_hold_na? 

673 all(ju.block.is_extension for ju in join_units) 

674 and len({ju.block.dtype.name for ju in join_units}) == 1 

675 ) 

676 

677 

678def _trim_join_unit(join_unit: JoinUnit, length: int) -> JoinUnit: 

679 """ 

680 Reduce join_unit's shape along item axis to length. 

681 

682 Extra items that didn't fit are returned as a separate block. 

683 """ 

684 if 0 not in join_unit.indexers: 

685 extra_indexers = join_unit.indexers 

686 

687 if join_unit.block is None: 

688 extra_block = None 

689 else: 

690 extra_block = join_unit.block.getitem_block(slice(length, None)) 

691 join_unit.block = join_unit.block.getitem_block(slice(length)) 

692 else: 

693 extra_block = join_unit.block 

694 

695 extra_indexers = copy.copy(join_unit.indexers) 

696 extra_indexers[0] = extra_indexers[0][length:] 

697 join_unit.indexers[0] = join_unit.indexers[0][:length] 

698 

699 extra_shape = (join_unit.shape[0] - length,) + join_unit.shape[1:] 

700 join_unit.shape = (length,) + join_unit.shape[1:] 

701 

702 return JoinUnit(block=extra_block, indexers=extra_indexers, shape=extra_shape) 

703 

704 

705def _combine_concat_plans(plans, concat_axis: int): 

706 """ 

707 Combine multiple concatenation plans into one. 

708 

709 existing_plan is updated in-place. 

710 """ 

711 if len(plans) == 1: 

712 for p in plans[0]: 

713 yield p[0], [p[1]] 

714 

715 elif concat_axis == 0: 

716 offset = 0 

717 for plan in plans: 

718 last_plc = None 

719 

720 for plc, unit in plan: 

721 yield plc.add(offset), [unit] 

722 last_plc = plc 

723 

724 if last_plc is not None: 

725 offset += last_plc.as_slice.stop 

726 

727 else: 

728 # singleton list so we can modify it as a side-effect within _next_or_none 

729 num_ended = [0] 

730 

731 def _next_or_none(seq): 

732 retval = next(seq, None) 

733 if retval is None: 

734 num_ended[0] += 1 

735 return retval 

736 

737 plans = list(map(iter, plans)) 

738 next_items = list(map(_next_or_none, plans)) 

739 

740 while num_ended[0] != len(next_items): 

741 if num_ended[0] > 0: 

742 raise ValueError("Plan shapes are not aligned") 

743 

744 placements, units = zip(*next_items) 

745 

746 lengths = list(map(len, placements)) 

747 min_len, max_len = min(lengths), max(lengths) 

748 

749 if min_len == max_len: 

750 yield placements[0], units 

751 next_items[:] = map(_next_or_none, plans) 

752 else: 

753 yielded_placement = None 

754 yielded_units = [None] * len(next_items) 

755 for i, (plc, unit) in enumerate(next_items): 

756 yielded_units[i] = unit 

757 if len(plc) > min_len: 

758 # _trim_join_unit updates unit in place, so only 

759 # placement needs to be sliced to skip min_len. 

760 next_items[i] = (plc[min_len:], _trim_join_unit(unit, min_len)) 

761 else: 

762 yielded_placement = plc 

763 next_items[i] = _next_or_none(plans[i]) 

764 

765 yield yielded_placement, yielded_units