Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/numpy/core/defchararray.py: 38%
439 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1"""
2This module contains a set of functions for vectorized string
3operations and methods.
5.. note::
6 The `chararray` class exists for backwards compatibility with
7 Numarray, it is not recommended for new development. Starting from numpy
8 1.4, if one needs arrays of strings, it is recommended to use arrays of
9 `dtype` `object_`, `string_` or `unicode_`, and use the free functions
10 in the `numpy.char` module for fast vectorized string operations.
12Some methods will only be available if the corresponding string method is
13available in your version of Python.
15The preferred alias for `defchararray` is `numpy.char`.
17"""
18import functools
19from .numerictypes import (
20 string_, unicode_, integer, int_, object_, bool_, character)
21from .numeric import ndarray, compare_chararrays
22from .numeric import array as narray
23from numpy.core.multiarray import _vec_string
24from numpy.core.overrides import set_module
25from numpy.core import overrides
26from numpy.compat import asbytes
27import numpy
29__all__ = [
30 'equal', 'not_equal', 'greater_equal', 'less_equal',
31 'greater', 'less', 'str_len', 'add', 'multiply', 'mod', 'capitalize',
32 'center', 'count', 'decode', 'encode', 'endswith', 'expandtabs',
33 'find', 'index', 'isalnum', 'isalpha', 'isdigit', 'islower', 'isspace',
34 'istitle', 'isupper', 'join', 'ljust', 'lower', 'lstrip', 'partition',
35 'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 'rsplit',
36 'rstrip', 'split', 'splitlines', 'startswith', 'strip', 'swapcase',
37 'title', 'translate', 'upper', 'zfill', 'isnumeric', 'isdecimal',
38 'array', 'asarray'
39 ]
42_globalvar = 0
44array_function_dispatch = functools.partial(
45 overrides.array_function_dispatch, module='numpy.char')
48def _use_unicode(*args):
49 """
50 Helper function for determining the output type of some string
51 operations.
53 For an operation on two ndarrays, if at least one is unicode, the
54 result should be unicode.
55 """
56 for x in args:
57 if (isinstance(x, str) or
58 issubclass(numpy.asarray(x).dtype.type, unicode_)):
59 return unicode_
60 return string_
62def _to_string_or_unicode_array(result):
63 """
64 Helper function to cast a result back into a string or unicode array
65 if an object array must be used as an intermediary.
66 """
67 return numpy.asarray(result.tolist())
69def _clean_args(*args):
70 """
71 Helper function for delegating arguments to Python string
72 functions.
74 Many of the Python string operations that have optional arguments
75 do not use 'None' to indicate a default value. In these cases,
76 we need to remove all None arguments, and those following them.
77 """
78 newargs = []
79 for chk in args:
80 if chk is None:
81 break
82 newargs.append(chk)
83 return newargs
85def _get_num_chars(a):
86 """
87 Helper function that returns the number of characters per field in
88 a string or unicode array. This is to abstract out the fact that
89 for a unicode array this is itemsize / 4.
90 """
91 if issubclass(a.dtype.type, unicode_):
92 return a.itemsize // 4
93 return a.itemsize
96def _binary_op_dispatcher(x1, x2):
97 return (x1, x2)
100@array_function_dispatch(_binary_op_dispatcher)
101def equal(x1, x2):
102 """
103 Return (x1 == x2) element-wise.
105 Unlike `numpy.equal`, this comparison is performed by first
106 stripping whitespace characters from the end of the string. This
107 behavior is provided for backward-compatibility with numarray.
109 Parameters
110 ----------
111 x1, x2 : array_like of str or unicode
112 Input arrays of the same shape.
114 Returns
115 -------
116 out : ndarray
117 Output array of bools.
119 See Also
120 --------
121 not_equal, greater_equal, less_equal, greater, less
122 """
123 return compare_chararrays(x1, x2, '==', True)
126@array_function_dispatch(_binary_op_dispatcher)
127def not_equal(x1, x2):
128 """
129 Return (x1 != x2) element-wise.
131 Unlike `numpy.not_equal`, this comparison is performed by first
132 stripping whitespace characters from the end of the string. This
133 behavior is provided for backward-compatibility with numarray.
135 Parameters
136 ----------
137 x1, x2 : array_like of str or unicode
138 Input arrays of the same shape.
140 Returns
141 -------
142 out : ndarray
143 Output array of bools.
145 See Also
146 --------
147 equal, greater_equal, less_equal, greater, less
148 """
149 return compare_chararrays(x1, x2, '!=', True)
152@array_function_dispatch(_binary_op_dispatcher)
153def greater_equal(x1, x2):
154 """
155 Return (x1 >= x2) element-wise.
157 Unlike `numpy.greater_equal`, this comparison is performed by
158 first stripping whitespace characters from the end of the string.
159 This behavior is provided for backward-compatibility with
160 numarray.
162 Parameters
163 ----------
164 x1, x2 : array_like of str or unicode
165 Input arrays of the same shape.
167 Returns
168 -------
169 out : ndarray
170 Output array of bools.
172 See Also
173 --------
174 equal, not_equal, less_equal, greater, less
175 """
176 return compare_chararrays(x1, x2, '>=', True)
179@array_function_dispatch(_binary_op_dispatcher)
180def less_equal(x1, x2):
181 """
182 Return (x1 <= x2) element-wise.
184 Unlike `numpy.less_equal`, this comparison is performed by first
185 stripping whitespace characters from the end of the string. This
186 behavior is provided for backward-compatibility with numarray.
188 Parameters
189 ----------
190 x1, x2 : array_like of str or unicode
191 Input arrays of the same shape.
193 Returns
194 -------
195 out : ndarray
196 Output array of bools.
198 See Also
199 --------
200 equal, not_equal, greater_equal, greater, less
201 """
202 return compare_chararrays(x1, x2, '<=', True)
205@array_function_dispatch(_binary_op_dispatcher)
206def greater(x1, x2):
207 """
208 Return (x1 > x2) element-wise.
210 Unlike `numpy.greater`, this comparison is performed by first
211 stripping whitespace characters from the end of the string. This
212 behavior is provided for backward-compatibility with numarray.
214 Parameters
215 ----------
216 x1, x2 : array_like of str or unicode
217 Input arrays of the same shape.
219 Returns
220 -------
221 out : ndarray
222 Output array of bools.
224 See Also
225 --------
226 equal, not_equal, greater_equal, less_equal, less
227 """
228 return compare_chararrays(x1, x2, '>', True)
231@array_function_dispatch(_binary_op_dispatcher)
232def less(x1, x2):
233 """
234 Return (x1 < x2) element-wise.
236 Unlike `numpy.greater`, this comparison is performed by first
237 stripping whitespace characters from the end of the string. This
238 behavior is provided for backward-compatibility with numarray.
240 Parameters
241 ----------
242 x1, x2 : array_like of str or unicode
243 Input arrays of the same shape.
245 Returns
246 -------
247 out : ndarray
248 Output array of bools.
250 See Also
251 --------
252 equal, not_equal, greater_equal, less_equal, greater
253 """
254 return compare_chararrays(x1, x2, '<', True)
257def _unary_op_dispatcher(a):
258 return (a,)
261@array_function_dispatch(_unary_op_dispatcher)
262def str_len(a):
263 """
264 Return len(a) element-wise.
266 Parameters
267 ----------
268 a : array_like of str or unicode
270 Returns
271 -------
272 out : ndarray
273 Output array of integers
275 See Also
276 --------
277 builtins.len
278 """
279 # Note: __len__, etc. currently return ints, which are not C-integers.
280 # Generally intp would be expected for lengths, although int is sufficient
281 # due to the dtype itemsize limitation.
282 return _vec_string(a, int_, '__len__')
285@array_function_dispatch(_binary_op_dispatcher)
286def add(x1, x2):
287 """
288 Return element-wise string concatenation for two arrays of str or unicode.
290 Arrays `x1` and `x2` must have the same shape.
292 Parameters
293 ----------
294 x1 : array_like of str or unicode
295 Input array.
296 x2 : array_like of str or unicode
297 Input array.
299 Returns
300 -------
301 add : ndarray
302 Output array of `string_` or `unicode_`, depending on input types
303 of the same shape as `x1` and `x2`.
305 """
306 arr1 = numpy.asarray(x1)
307 arr2 = numpy.asarray(x2)
308 out_size = _get_num_chars(arr1) + _get_num_chars(arr2)
309 dtype = _use_unicode(arr1, arr2)
310 return _vec_string(arr1, (dtype, out_size), '__add__', (arr2,))
313def _multiply_dispatcher(a, i):
314 return (a,)
317@array_function_dispatch(_multiply_dispatcher)
318def multiply(a, i):
319 """
320 Return (a * i), that is string multiple concatenation,
321 element-wise.
323 Values in `i` of less than 0 are treated as 0 (which yields an
324 empty string).
326 Parameters
327 ----------
328 a : array_like of str or unicode
330 i : array_like of ints
332 Returns
333 -------
334 out : ndarray
335 Output array of str or unicode, depending on input types
337 """
338 a_arr = numpy.asarray(a)
339 i_arr = numpy.asarray(i)
340 if not issubclass(i_arr.dtype.type, integer):
341 raise ValueError("Can only multiply by integers")
342 out_size = _get_num_chars(a_arr) * max(int(i_arr.max()), 0)
343 return _vec_string(
344 a_arr, (a_arr.dtype.type, out_size), '__mul__', (i_arr,))
347def _mod_dispatcher(a, values):
348 return (a, values)
351@array_function_dispatch(_mod_dispatcher)
352def mod(a, values):
353 """
354 Return (a % i), that is pre-Python 2.6 string formatting
355 (interpolation), element-wise for a pair of array_likes of str
356 or unicode.
358 Parameters
359 ----------
360 a : array_like of str or unicode
362 values : array_like of values
363 These values will be element-wise interpolated into the string.
365 Returns
366 -------
367 out : ndarray
368 Output array of str or unicode, depending on input types
370 See Also
371 --------
372 str.__mod__
374 """
375 return _to_string_or_unicode_array(
376 _vec_string(a, object_, '__mod__', (values,)))
379@array_function_dispatch(_unary_op_dispatcher)
380def capitalize(a):
381 """
382 Return a copy of `a` with only the first character of each element
383 capitalized.
385 Calls `str.capitalize` element-wise.
387 For 8-bit strings, this method is locale-dependent.
389 Parameters
390 ----------
391 a : array_like of str or unicode
392 Input array of strings to capitalize.
394 Returns
395 -------
396 out : ndarray
397 Output array of str or unicode, depending on input
398 types
400 See Also
401 --------
402 str.capitalize
404 Examples
405 --------
406 >>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4'); c
407 array(['a1b2', '1b2a', 'b2a1', '2a1b'],
408 dtype='|S4')
409 >>> np.char.capitalize(c)
410 array(['A1b2', '1b2a', 'B2a1', '2a1b'],
411 dtype='|S4')
413 """
414 a_arr = numpy.asarray(a)
415 return _vec_string(a_arr, a_arr.dtype, 'capitalize')
418def _center_dispatcher(a, width, fillchar=None):
419 return (a,)
422@array_function_dispatch(_center_dispatcher)
423def center(a, width, fillchar=' '):
424 """
425 Return a copy of `a` with its elements centered in a string of
426 length `width`.
428 Calls `str.center` element-wise.
430 Parameters
431 ----------
432 a : array_like of str or unicode
434 width : int
435 The length of the resulting strings
436 fillchar : str or unicode, optional
437 The padding character to use (default is space).
439 Returns
440 -------
441 out : ndarray
442 Output array of str or unicode, depending on input
443 types
445 See Also
446 --------
447 str.center
449 """
450 a_arr = numpy.asarray(a)
451 width_arr = numpy.asarray(width)
452 size = int(numpy.max(width_arr.flat))
453 if numpy.issubdtype(a_arr.dtype, numpy.string_):
454 fillchar = asbytes(fillchar)
455 return _vec_string(
456 a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar))
459def _count_dispatcher(a, sub, start=None, end=None):
460 return (a,)
463@array_function_dispatch(_count_dispatcher)
464def count(a, sub, start=0, end=None):
465 """
466 Returns an array with the number of non-overlapping occurrences of
467 substring `sub` in the range [`start`, `end`].
469 Calls `str.count` element-wise.
471 Parameters
472 ----------
473 a : array_like of str or unicode
475 sub : str or unicode
476 The substring to search for.
478 start, end : int, optional
479 Optional arguments `start` and `end` are interpreted as slice
480 notation to specify the range in which to count.
482 Returns
483 -------
484 out : ndarray
485 Output array of ints.
487 See Also
488 --------
489 str.count
491 Examples
492 --------
493 >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
494 >>> c
495 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
496 >>> np.char.count(c, 'A')
497 array([3, 1, 1])
498 >>> np.char.count(c, 'aA')
499 array([3, 1, 0])
500 >>> np.char.count(c, 'A', start=1, end=4)
501 array([2, 1, 1])
502 >>> np.char.count(c, 'A', start=1, end=3)
503 array([1, 0, 0])
505 """
506 return _vec_string(a, int_, 'count', [sub, start] + _clean_args(end))
509def _code_dispatcher(a, encoding=None, errors=None):
510 return (a,)
513@array_function_dispatch(_code_dispatcher)
514def decode(a, encoding=None, errors=None):
515 """
516 Calls `str.decode` element-wise.
518 The set of available codecs comes from the Python standard library,
519 and may be extended at runtime. For more information, see the
520 :mod:`codecs` module.
522 Parameters
523 ----------
524 a : array_like of str or unicode
526 encoding : str, optional
527 The name of an encoding
529 errors : str, optional
530 Specifies how to handle encoding errors
532 Returns
533 -------
534 out : ndarray
536 See Also
537 --------
538 str.decode
540 Notes
541 -----
542 The type of the result will depend on the encoding specified.
544 Examples
545 --------
546 >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
547 >>> c
548 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
549 >>> np.char.encode(c, encoding='cp037')
550 array(['\\x81\\xc1\\x81\\xc1\\x81\\xc1', '@@\\x81\\xc1@@',
551 '\\x81\\x82\\xc2\\xc1\\xc2\\x82\\x81'],
552 dtype='|S7')
554 """
555 return _to_string_or_unicode_array(
556 _vec_string(a, object_, 'decode', _clean_args(encoding, errors)))
559@array_function_dispatch(_code_dispatcher)
560def encode(a, encoding=None, errors=None):
561 """
562 Calls `str.encode` element-wise.
564 The set of available codecs comes from the Python standard library,
565 and may be extended at runtime. For more information, see the codecs
566 module.
568 Parameters
569 ----------
570 a : array_like of str or unicode
572 encoding : str, optional
573 The name of an encoding
575 errors : str, optional
576 Specifies how to handle encoding errors
578 Returns
579 -------
580 out : ndarray
582 See Also
583 --------
584 str.encode
586 Notes
587 -----
588 The type of the result will depend on the encoding specified.
590 """
591 return _to_string_or_unicode_array(
592 _vec_string(a, object_, 'encode', _clean_args(encoding, errors)))
595def _endswith_dispatcher(a, suffix, start=None, end=None):
596 return (a,)
599@array_function_dispatch(_endswith_dispatcher)
600def endswith(a, suffix, start=0, end=None):
601 """
602 Returns a boolean array which is `True` where the string element
603 in `a` ends with `suffix`, otherwise `False`.
605 Calls `str.endswith` element-wise.
607 Parameters
608 ----------
609 a : array_like of str or unicode
611 suffix : str
613 start, end : int, optional
614 With optional `start`, test beginning at that position. With
615 optional `end`, stop comparing at that position.
617 Returns
618 -------
619 out : ndarray
620 Outputs an array of bools.
622 See Also
623 --------
624 str.endswith
626 Examples
627 --------
628 >>> s = np.array(['foo', 'bar'])
629 >>> s[0] = 'foo'
630 >>> s[1] = 'bar'
631 >>> s
632 array(['foo', 'bar'], dtype='<U3')
633 >>> np.char.endswith(s, 'ar')
634 array([False, True])
635 >>> np.char.endswith(s, 'a', start=1, end=2)
636 array([False, True])
638 """
639 return _vec_string(
640 a, bool_, 'endswith', [suffix, start] + _clean_args(end))
643def _expandtabs_dispatcher(a, tabsize=None):
644 return (a,)
647@array_function_dispatch(_expandtabs_dispatcher)
648def expandtabs(a, tabsize=8):
649 """
650 Return a copy of each string element where all tab characters are
651 replaced by one or more spaces.
653 Calls `str.expandtabs` element-wise.
655 Return a copy of each string element where all tab characters are
656 replaced by one or more spaces, depending on the current column
657 and the given `tabsize`. The column number is reset to zero after
658 each newline occurring in the string. This doesn't understand other
659 non-printing characters or escape sequences.
661 Parameters
662 ----------
663 a : array_like of str or unicode
664 Input array
665 tabsize : int, optional
666 Replace tabs with `tabsize` number of spaces. If not given defaults
667 to 8 spaces.
669 Returns
670 -------
671 out : ndarray
672 Output array of str or unicode, depending on input type
674 See Also
675 --------
676 str.expandtabs
678 """
679 return _to_string_or_unicode_array(
680 _vec_string(a, object_, 'expandtabs', (tabsize,)))
683@array_function_dispatch(_count_dispatcher)
684def find(a, sub, start=0, end=None):
685 """
686 For each element, return the lowest index in the string where
687 substring `sub` is found.
689 Calls `str.find` element-wise.
691 For each element, return the lowest index in the string where
692 substring `sub` is found, such that `sub` is contained in the
693 range [`start`, `end`].
695 Parameters
696 ----------
697 a : array_like of str or unicode
699 sub : str or unicode
701 start, end : int, optional
702 Optional arguments `start` and `end` are interpreted as in
703 slice notation.
705 Returns
706 -------
707 out : ndarray or int
708 Output array of ints. Returns -1 if `sub` is not found.
710 See Also
711 --------
712 str.find
714 """
715 return _vec_string(
716 a, int_, 'find', [sub, start] + _clean_args(end))
719@array_function_dispatch(_count_dispatcher)
720def index(a, sub, start=0, end=None):
721 """
722 Like `find`, but raises `ValueError` when the substring is not found.
724 Calls `str.index` element-wise.
726 Parameters
727 ----------
728 a : array_like of str or unicode
730 sub : str or unicode
732 start, end : int, optional
734 Returns
735 -------
736 out : ndarray
737 Output array of ints. Returns -1 if `sub` is not found.
739 See Also
740 --------
741 find, str.find
743 """
744 return _vec_string(
745 a, int_, 'index', [sub, start] + _clean_args(end))
748@array_function_dispatch(_unary_op_dispatcher)
749def isalnum(a):
750 """
751 Returns true for each element if all characters in the string are
752 alphanumeric and there is at least one character, false otherwise.
754 Calls `str.isalnum` element-wise.
756 For 8-bit strings, this method is locale-dependent.
758 Parameters
759 ----------
760 a : array_like of str or unicode
762 Returns
763 -------
764 out : ndarray
765 Output array of str or unicode, depending on input type
767 See Also
768 --------
769 str.isalnum
770 """
771 return _vec_string(a, bool_, 'isalnum')
774@array_function_dispatch(_unary_op_dispatcher)
775def isalpha(a):
776 """
777 Returns true for each element if all characters in the string are
778 alphabetic and there is at least one character, false otherwise.
780 Calls `str.isalpha` element-wise.
782 For 8-bit strings, this method is locale-dependent.
784 Parameters
785 ----------
786 a : array_like of str or unicode
788 Returns
789 -------
790 out : ndarray
791 Output array of bools
793 See Also
794 --------
795 str.isalpha
796 """
797 return _vec_string(a, bool_, 'isalpha')
800@array_function_dispatch(_unary_op_dispatcher)
801def isdigit(a):
802 """
803 Returns true for each element if all characters in the string are
804 digits and there is at least one character, false otherwise.
806 Calls `str.isdigit` element-wise.
808 For 8-bit strings, this method is locale-dependent.
810 Parameters
811 ----------
812 a : array_like of str or unicode
814 Returns
815 -------
816 out : ndarray
817 Output array of bools
819 See Also
820 --------
821 str.isdigit
822 """
823 return _vec_string(a, bool_, 'isdigit')
826@array_function_dispatch(_unary_op_dispatcher)
827def islower(a):
828 """
829 Returns true for each element if all cased characters in the
830 string are lowercase and there is at least one cased character,
831 false otherwise.
833 Calls `str.islower` element-wise.
835 For 8-bit strings, this method is locale-dependent.
837 Parameters
838 ----------
839 a : array_like of str or unicode
841 Returns
842 -------
843 out : ndarray
844 Output array of bools
846 See Also
847 --------
848 str.islower
849 """
850 return _vec_string(a, bool_, 'islower')
853@array_function_dispatch(_unary_op_dispatcher)
854def isspace(a):
855 """
856 Returns true for each element if there are only whitespace
857 characters in the string and there is at least one character,
858 false otherwise.
860 Calls `str.isspace` element-wise.
862 For 8-bit strings, this method is locale-dependent.
864 Parameters
865 ----------
866 a : array_like of str or unicode
868 Returns
869 -------
870 out : ndarray
871 Output array of bools
873 See Also
874 --------
875 str.isspace
876 """
877 return _vec_string(a, bool_, 'isspace')
880@array_function_dispatch(_unary_op_dispatcher)
881def istitle(a):
882 """
883 Returns true for each element if the element is a titlecased
884 string and there is at least one character, false otherwise.
886 Call `str.istitle` element-wise.
888 For 8-bit strings, this method is locale-dependent.
890 Parameters
891 ----------
892 a : array_like of str or unicode
894 Returns
895 -------
896 out : ndarray
897 Output array of bools
899 See Also
900 --------
901 str.istitle
902 """
903 return _vec_string(a, bool_, 'istitle')
906@array_function_dispatch(_unary_op_dispatcher)
907def isupper(a):
908 """
909 Returns true for each element if all cased characters in the
910 string are uppercase and there is at least one character, false
911 otherwise.
913 Call `str.isupper` element-wise.
915 For 8-bit strings, this method is locale-dependent.
917 Parameters
918 ----------
919 a : array_like of str or unicode
921 Returns
922 -------
923 out : ndarray
924 Output array of bools
926 See Also
927 --------
928 str.isupper
929 """
930 return _vec_string(a, bool_, 'isupper')
933def _join_dispatcher(sep, seq):
934 return (sep, seq)
937@array_function_dispatch(_join_dispatcher)
938def join(sep, seq):
939 """
940 Return a string which is the concatenation of the strings in the
941 sequence `seq`.
943 Calls `str.join` element-wise.
945 Parameters
946 ----------
947 sep : array_like of str or unicode
948 seq : array_like of str or unicode
950 Returns
951 -------
952 out : ndarray
953 Output array of str or unicode, depending on input types
955 See Also
956 --------
957 str.join
958 """
959 return _to_string_or_unicode_array(
960 _vec_string(sep, object_, 'join', (seq,)))
964def _just_dispatcher(a, width, fillchar=None):
965 return (a,)
968@array_function_dispatch(_just_dispatcher)
969def ljust(a, width, fillchar=' '):
970 """
971 Return an array with the elements of `a` left-justified in a
972 string of length `width`.
974 Calls `str.ljust` element-wise.
976 Parameters
977 ----------
978 a : array_like of str or unicode
980 width : int
981 The length of the resulting strings
982 fillchar : str or unicode, optional
983 The character to use for padding
985 Returns
986 -------
987 out : ndarray
988 Output array of str or unicode, depending on input type
990 See Also
991 --------
992 str.ljust
994 """
995 a_arr = numpy.asarray(a)
996 width_arr = numpy.asarray(width)
997 size = int(numpy.max(width_arr.flat))
998 if numpy.issubdtype(a_arr.dtype, numpy.string_):
999 fillchar = asbytes(fillchar)
1000 return _vec_string(
1001 a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar))
1004@array_function_dispatch(_unary_op_dispatcher)
1005def lower(a):
1006 """
1007 Return an array with the elements converted to lowercase.
1009 Call `str.lower` element-wise.
1011 For 8-bit strings, this method is locale-dependent.
1013 Parameters
1014 ----------
1015 a : array_like, {str, unicode}
1016 Input array.
1018 Returns
1019 -------
1020 out : ndarray, {str, unicode}
1021 Output array of str or unicode, depending on input type
1023 See Also
1024 --------
1025 str.lower
1027 Examples
1028 --------
1029 >>> c = np.array(['A1B C', '1BCA', 'BCA1']); c
1030 array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')
1031 >>> np.char.lower(c)
1032 array(['a1b c', '1bca', 'bca1'], dtype='<U5')
1034 """
1035 a_arr = numpy.asarray(a)
1036 return _vec_string(a_arr, a_arr.dtype, 'lower')
1039def _strip_dispatcher(a, chars=None):
1040 return (a,)
1043@array_function_dispatch(_strip_dispatcher)
1044def lstrip(a, chars=None):
1045 """
1046 For each element in `a`, return a copy with the leading characters
1047 removed.
1049 Calls `str.lstrip` element-wise.
1051 Parameters
1052 ----------
1053 a : array-like, {str, unicode}
1054 Input array.
1056 chars : {str, unicode}, optional
1057 The `chars` argument is a string specifying the set of
1058 characters to be removed. If omitted or None, the `chars`
1059 argument defaults to removing whitespace. The `chars` argument
1060 is not a prefix; rather, all combinations of its values are
1061 stripped.
1063 Returns
1064 -------
1065 out : ndarray, {str, unicode}
1066 Output array of str or unicode, depending on input type
1068 See Also
1069 --------
1070 str.lstrip
1072 Examples
1073 --------
1074 >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
1075 >>> c
1076 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
1078 The 'a' variable is unstripped from c[1] because whitespace leading.
1080 >>> np.char.lstrip(c, 'a')
1081 array(['AaAaA', ' aA ', 'bBABba'], dtype='<U7')
1084 >>> np.char.lstrip(c, 'A') # leaves c unchanged
1085 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
1086 >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, '')).all()
1087 ... # XXX: is this a regression? This used to return True
1088 ... # np.char.lstrip(c,'') does not modify c at all.
1089 False
1090 >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, None)).all()
1091 True
1093 """
1094 a_arr = numpy.asarray(a)
1095 return _vec_string(a_arr, a_arr.dtype, 'lstrip', (chars,))
1098def _partition_dispatcher(a, sep):
1099 return (a,)
1102@array_function_dispatch(_partition_dispatcher)
1103def partition(a, sep):
1104 """
1105 Partition each element in `a` around `sep`.
1107 Calls `str.partition` element-wise.
1109 For each element in `a`, split the element as the first
1110 occurrence of `sep`, and return 3 strings containing the part
1111 before the separator, the separator itself, and the part after
1112 the separator. If the separator is not found, return 3 strings
1113 containing the string itself, followed by two empty strings.
1115 Parameters
1116 ----------
1117 a : array_like, {str, unicode}
1118 Input array
1119 sep : {str, unicode}
1120 Separator to split each string element in `a`.
1122 Returns
1123 -------
1124 out : ndarray, {str, unicode}
1125 Output array of str or unicode, depending on input type.
1126 The output array will have an extra dimension with 3
1127 elements per input element.
1129 See Also
1130 --------
1131 str.partition
1133 """
1134 return _to_string_or_unicode_array(
1135 _vec_string(a, object_, 'partition', (sep,)))
1138def _replace_dispatcher(a, old, new, count=None):
1139 return (a,)
1142@array_function_dispatch(_replace_dispatcher)
1143def replace(a, old, new, count=None):
1144 """
1145 For each element in `a`, return a copy of the string with all
1146 occurrences of substring `old` replaced by `new`.
1148 Calls `str.replace` element-wise.
1150 Parameters
1151 ----------
1152 a : array-like of str or unicode
1154 old, new : str or unicode
1156 count : int, optional
1157 If the optional argument `count` is given, only the first
1158 `count` occurrences are replaced.
1160 Returns
1161 -------
1162 out : ndarray
1163 Output array of str or unicode, depending on input type
1165 See Also
1166 --------
1167 str.replace
1169 """
1170 return _to_string_or_unicode_array(
1171 _vec_string(
1172 a, object_, 'replace', [old, new] + _clean_args(count)))
1175@array_function_dispatch(_count_dispatcher)
1176def rfind(a, sub, start=0, end=None):
1177 """
1178 For each element in `a`, return the highest index in the string
1179 where substring `sub` is found, such that `sub` is contained
1180 within [`start`, `end`].
1182 Calls `str.rfind` element-wise.
1184 Parameters
1185 ----------
1186 a : array-like of str or unicode
1188 sub : str or unicode
1190 start, end : int, optional
1191 Optional arguments `start` and `end` are interpreted as in
1192 slice notation.
1194 Returns
1195 -------
1196 out : ndarray
1197 Output array of ints. Return -1 on failure.
1199 See Also
1200 --------
1201 str.rfind
1203 """
1204 return _vec_string(
1205 a, int_, 'rfind', [sub, start] + _clean_args(end))
1208@array_function_dispatch(_count_dispatcher)
1209def rindex(a, sub, start=0, end=None):
1210 """
1211 Like `rfind`, but raises `ValueError` when the substring `sub` is
1212 not found.
1214 Calls `str.rindex` element-wise.
1216 Parameters
1217 ----------
1218 a : array-like of str or unicode
1220 sub : str or unicode
1222 start, end : int, optional
1224 Returns
1225 -------
1226 out : ndarray
1227 Output array of ints.
1229 See Also
1230 --------
1231 rfind, str.rindex
1233 """
1234 return _vec_string(
1235 a, int_, 'rindex', [sub, start] + _clean_args(end))
1238@array_function_dispatch(_just_dispatcher)
1239def rjust(a, width, fillchar=' '):
1240 """
1241 Return an array with the elements of `a` right-justified in a
1242 string of length `width`.
1244 Calls `str.rjust` element-wise.
1246 Parameters
1247 ----------
1248 a : array_like of str or unicode
1250 width : int
1251 The length of the resulting strings
1252 fillchar : str or unicode, optional
1253 The character to use for padding
1255 Returns
1256 -------
1257 out : ndarray
1258 Output array of str or unicode, depending on input type
1260 See Also
1261 --------
1262 str.rjust
1264 """
1265 a_arr = numpy.asarray(a)
1266 width_arr = numpy.asarray(width)
1267 size = int(numpy.max(width_arr.flat))
1268 if numpy.issubdtype(a_arr.dtype, numpy.string_):
1269 fillchar = asbytes(fillchar)
1270 return _vec_string(
1271 a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar))
1274@array_function_dispatch(_partition_dispatcher)
1275def rpartition(a, sep):
1276 """
1277 Partition (split) each element around the right-most separator.
1279 Calls `str.rpartition` element-wise.
1281 For each element in `a`, split the element as the last
1282 occurrence of `sep`, and return 3 strings containing the part
1283 before the separator, the separator itself, and the part after
1284 the separator. If the separator is not found, return 3 strings
1285 containing the string itself, followed by two empty strings.
1287 Parameters
1288 ----------
1289 a : array_like of str or unicode
1290 Input array
1291 sep : str or unicode
1292 Right-most separator to split each element in array.
1294 Returns
1295 -------
1296 out : ndarray
1297 Output array of string or unicode, depending on input
1298 type. The output array will have an extra dimension with
1299 3 elements per input element.
1301 See Also
1302 --------
1303 str.rpartition
1305 """
1306 return _to_string_or_unicode_array(
1307 _vec_string(a, object_, 'rpartition', (sep,)))
1310def _split_dispatcher(a, sep=None, maxsplit=None):
1311 return (a,)
1314@array_function_dispatch(_split_dispatcher)
1315def rsplit(a, sep=None, maxsplit=None):
1316 """
1317 For each element in `a`, return a list of the words in the
1318 string, using `sep` as the delimiter string.
1320 Calls `str.rsplit` element-wise.
1322 Except for splitting from the right, `rsplit`
1323 behaves like `split`.
1325 Parameters
1326 ----------
1327 a : array_like of str or unicode
1329 sep : str or unicode, optional
1330 If `sep` is not specified or None, any whitespace string
1331 is a separator.
1332 maxsplit : int, optional
1333 If `maxsplit` is given, at most `maxsplit` splits are done,
1334 the rightmost ones.
1336 Returns
1337 -------
1338 out : ndarray
1339 Array of list objects
1341 See Also
1342 --------
1343 str.rsplit, split
1345 """
1346 # This will return an array of lists of different sizes, so we
1347 # leave it as an object array
1348 return _vec_string(
1349 a, object_, 'rsplit', [sep] + _clean_args(maxsplit))
1352def _strip_dispatcher(a, chars=None):
1353 return (a,)
1356@array_function_dispatch(_strip_dispatcher)
1357def rstrip(a, chars=None):
1358 """
1359 For each element in `a`, return a copy with the trailing
1360 characters removed.
1362 Calls `str.rstrip` element-wise.
1364 Parameters
1365 ----------
1366 a : array-like of str or unicode
1368 chars : str or unicode, optional
1369 The `chars` argument is a string specifying the set of
1370 characters to be removed. If omitted or None, the `chars`
1371 argument defaults to removing whitespace. The `chars` argument
1372 is not a suffix; rather, all combinations of its values are
1373 stripped.
1375 Returns
1376 -------
1377 out : ndarray
1378 Output array of str or unicode, depending on input type
1380 See Also
1381 --------
1382 str.rstrip
1384 Examples
1385 --------
1386 >>> c = np.array(['aAaAaA', 'abBABba'], dtype='S7'); c
1387 array(['aAaAaA', 'abBABba'],
1388 dtype='|S7')
1389 >>> np.char.rstrip(c, b'a')
1390 array(['aAaAaA', 'abBABb'],
1391 dtype='|S7')
1392 >>> np.char.rstrip(c, b'A')
1393 array(['aAaAa', 'abBABba'],
1394 dtype='|S7')
1396 """
1397 a_arr = numpy.asarray(a)
1398 return _vec_string(a_arr, a_arr.dtype, 'rstrip', (chars,))
1401@array_function_dispatch(_split_dispatcher)
1402def split(a, sep=None, maxsplit=None):
1403 """
1404 For each element in `a`, return a list of the words in the
1405 string, using `sep` as the delimiter string.
1407 Calls `str.split` element-wise.
1409 Parameters
1410 ----------
1411 a : array_like of str or unicode
1413 sep : str or unicode, optional
1414 If `sep` is not specified or None, any whitespace string is a
1415 separator.
1417 maxsplit : int, optional
1418 If `maxsplit` is given, at most `maxsplit` splits are done.
1420 Returns
1421 -------
1422 out : ndarray
1423 Array of list objects
1425 See Also
1426 --------
1427 str.split, rsplit
1429 """
1430 # This will return an array of lists of different sizes, so we
1431 # leave it as an object array
1432 return _vec_string(
1433 a, object_, 'split', [sep] + _clean_args(maxsplit))
1436def _splitlines_dispatcher(a, keepends=None):
1437 return (a,)
1440@array_function_dispatch(_splitlines_dispatcher)
1441def splitlines(a, keepends=None):
1442 """
1443 For each element in `a`, return a list of the lines in the
1444 element, breaking at line boundaries.
1446 Calls `str.splitlines` element-wise.
1448 Parameters
1449 ----------
1450 a : array_like of str or unicode
1452 keepends : bool, optional
1453 Line breaks are not included in the resulting list unless
1454 keepends is given and true.
1456 Returns
1457 -------
1458 out : ndarray
1459 Array of list objects
1461 See Also
1462 --------
1463 str.splitlines
1465 """
1466 return _vec_string(
1467 a, object_, 'splitlines', _clean_args(keepends))
1470def _startswith_dispatcher(a, prefix, start=None, end=None):
1471 return (a,)
1474@array_function_dispatch(_startswith_dispatcher)
1475def startswith(a, prefix, start=0, end=None):
1476 """
1477 Returns a boolean array which is `True` where the string element
1478 in `a` starts with `prefix`, otherwise `False`.
1480 Calls `str.startswith` element-wise.
1482 Parameters
1483 ----------
1484 a : array_like of str or unicode
1486 prefix : str
1488 start, end : int, optional
1489 With optional `start`, test beginning at that position. With
1490 optional `end`, stop comparing at that position.
1492 Returns
1493 -------
1494 out : ndarray
1495 Array of booleans
1497 See Also
1498 --------
1499 str.startswith
1501 """
1502 return _vec_string(
1503 a, bool_, 'startswith', [prefix, start] + _clean_args(end))
1506@array_function_dispatch(_strip_dispatcher)
1507def strip(a, chars=None):
1508 """
1509 For each element in `a`, return a copy with the leading and
1510 trailing characters removed.
1512 Calls `str.strip` element-wise.
1514 Parameters
1515 ----------
1516 a : array-like of str or unicode
1518 chars : str or unicode, optional
1519 The `chars` argument is a string specifying the set of
1520 characters to be removed. If omitted or None, the `chars`
1521 argument defaults to removing whitespace. The `chars` argument
1522 is not a prefix or suffix; rather, all combinations of its
1523 values are stripped.
1525 Returns
1526 -------
1527 out : ndarray
1528 Output array of str or unicode, depending on input type
1530 See Also
1531 --------
1532 str.strip
1534 Examples
1535 --------
1536 >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
1537 >>> c
1538 array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
1539 >>> np.char.strip(c)
1540 array(['aAaAaA', 'aA', 'abBABba'], dtype='<U7')
1541 >>> np.char.strip(c, 'a') # 'a' unstripped from c[1] because whitespace leads
1542 array(['AaAaA', ' aA ', 'bBABb'], dtype='<U7')
1543 >>> np.char.strip(c, 'A') # 'A' unstripped from c[1] because (unprinted) ws trails
1544 array(['aAaAa', ' aA ', 'abBABba'], dtype='<U7')
1546 """
1547 a_arr = numpy.asarray(a)
1548 return _vec_string(a_arr, a_arr.dtype, 'strip', _clean_args(chars))
1551@array_function_dispatch(_unary_op_dispatcher)
1552def swapcase(a):
1553 """
1554 Return element-wise a copy of the string with
1555 uppercase characters converted to lowercase and vice versa.
1557 Calls `str.swapcase` element-wise.
1559 For 8-bit strings, this method is locale-dependent.
1561 Parameters
1562 ----------
1563 a : array_like, {str, unicode}
1564 Input array.
1566 Returns
1567 -------
1568 out : ndarray, {str, unicode}
1569 Output array of str or unicode, depending on input type
1571 See Also
1572 --------
1573 str.swapcase
1575 Examples
1576 --------
1577 >>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5'); c
1578 array(['a1B c', '1b Ca', 'b Ca1', 'cA1b'],
1579 dtype='|S5')
1580 >>> np.char.swapcase(c)
1581 array(['A1b C', '1B cA', 'B cA1', 'Ca1B'],
1582 dtype='|S5')
1584 """
1585 a_arr = numpy.asarray(a)
1586 return _vec_string(a_arr, a_arr.dtype, 'swapcase')
1589@array_function_dispatch(_unary_op_dispatcher)
1590def title(a):
1591 """
1592 Return element-wise title cased version of string or unicode.
1594 Title case words start with uppercase characters, all remaining cased
1595 characters are lowercase.
1597 Calls `str.title` element-wise.
1599 For 8-bit strings, this method is locale-dependent.
1601 Parameters
1602 ----------
1603 a : array_like, {str, unicode}
1604 Input array.
1606 Returns
1607 -------
1608 out : ndarray
1609 Output array of str or unicode, depending on input type
1611 See Also
1612 --------
1613 str.title
1615 Examples
1616 --------
1617 >>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5'); c
1618 array(['a1b c', '1b ca', 'b ca1', 'ca1b'],
1619 dtype='|S5')
1620 >>> np.char.title(c)
1621 array(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'],
1622 dtype='|S5')
1624 """
1625 a_arr = numpy.asarray(a)
1626 return _vec_string(a_arr, a_arr.dtype, 'title')
1629def _translate_dispatcher(a, table, deletechars=None):
1630 return (a,)
1633@array_function_dispatch(_translate_dispatcher)
1634def translate(a, table, deletechars=None):
1635 """
1636 For each element in `a`, return a copy of the string where all
1637 characters occurring in the optional argument `deletechars` are
1638 removed, and the remaining characters have been mapped through the
1639 given translation table.
1641 Calls `str.translate` element-wise.
1643 Parameters
1644 ----------
1645 a : array-like of str or unicode
1647 table : str of length 256
1649 deletechars : str
1651 Returns
1652 -------
1653 out : ndarray
1654 Output array of str or unicode, depending on input type
1656 See Also
1657 --------
1658 str.translate
1660 """
1661 a_arr = numpy.asarray(a)
1662 if issubclass(a_arr.dtype.type, unicode_):
1663 return _vec_string(
1664 a_arr, a_arr.dtype, 'translate', (table,))
1665 else:
1666 return _vec_string(
1667 a_arr, a_arr.dtype, 'translate', [table] + _clean_args(deletechars))
1670@array_function_dispatch(_unary_op_dispatcher)
1671def upper(a):
1672 """
1673 Return an array with the elements converted to uppercase.
1675 Calls `str.upper` element-wise.
1677 For 8-bit strings, this method is locale-dependent.
1679 Parameters
1680 ----------
1681 a : array_like, {str, unicode}
1682 Input array.
1684 Returns
1685 -------
1686 out : ndarray, {str, unicode}
1687 Output array of str or unicode, depending on input type
1689 See Also
1690 --------
1691 str.upper
1693 Examples
1694 --------
1695 >>> c = np.array(['a1b c', '1bca', 'bca1']); c
1696 array(['a1b c', '1bca', 'bca1'], dtype='<U5')
1697 >>> np.char.upper(c)
1698 array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')
1700 """
1701 a_arr = numpy.asarray(a)
1702 return _vec_string(a_arr, a_arr.dtype, 'upper')
1705def _zfill_dispatcher(a, width):
1706 return (a,)
1709@array_function_dispatch(_zfill_dispatcher)
1710def zfill(a, width):
1711 """
1712 Return the numeric string left-filled with zeros
1714 Calls `str.zfill` element-wise.
1716 Parameters
1717 ----------
1718 a : array_like, {str, unicode}
1719 Input array.
1720 width : int
1721 Width of string to left-fill elements in `a`.
1723 Returns
1724 -------
1725 out : ndarray, {str, unicode}
1726 Output array of str or unicode, depending on input type
1728 See Also
1729 --------
1730 str.zfill
1732 """
1733 a_arr = numpy.asarray(a)
1734 width_arr = numpy.asarray(width)
1735 size = int(numpy.max(width_arr.flat))
1736 return _vec_string(
1737 a_arr, (a_arr.dtype.type, size), 'zfill', (width_arr,))
1740@array_function_dispatch(_unary_op_dispatcher)
1741def isnumeric(a):
1742 """
1743 For each element, return True if there are only numeric
1744 characters in the element.
1746 Calls `unicode.isnumeric` element-wise.
1748 Numeric characters include digit characters, and all characters
1749 that have the Unicode numeric value property, e.g. ``U+2155,
1750 VULGAR FRACTION ONE FIFTH``.
1752 Parameters
1753 ----------
1754 a : array_like, unicode
1755 Input array.
1757 Returns
1758 -------
1759 out : ndarray, bool
1760 Array of booleans of same shape as `a`.
1762 See Also
1763 --------
1764 unicode.isnumeric
1766 """
1767 if _use_unicode(a) != unicode_:
1768 raise TypeError("isnumeric is only available for Unicode strings and arrays")
1769 return _vec_string(a, bool_, 'isnumeric')
1772@array_function_dispatch(_unary_op_dispatcher)
1773def isdecimal(a):
1774 """
1775 For each element, return True if there are only decimal
1776 characters in the element.
1778 Calls `unicode.isdecimal` element-wise.
1780 Decimal characters include digit characters, and all characters
1781 that can be used to form decimal-radix numbers,
1782 e.g. ``U+0660, ARABIC-INDIC DIGIT ZERO``.
1784 Parameters
1785 ----------
1786 a : array_like, unicode
1787 Input array.
1789 Returns
1790 -------
1791 out : ndarray, bool
1792 Array of booleans identical in shape to `a`.
1794 See Also
1795 --------
1796 unicode.isdecimal
1798 """
1799 if _use_unicode(a) != unicode_:
1800 raise TypeError("isnumeric is only available for Unicode strings and arrays")
1801 return _vec_string(a, bool_, 'isdecimal')
1804@set_module('numpy')
1805class chararray(ndarray):
1806 """
1807 chararray(shape, itemsize=1, unicode=False, buffer=None, offset=0,
1808 strides=None, order=None)
1810 Provides a convenient view on arrays of string and unicode values.
1812 .. note::
1813 The `chararray` class exists for backwards compatibility with
1814 Numarray, it is not recommended for new development. Starting from numpy
1815 1.4, if one needs arrays of strings, it is recommended to use arrays of
1816 `dtype` `object_`, `string_` or `unicode_`, and use the free functions
1817 in the `numpy.char` module for fast vectorized string operations.
1819 Versus a regular NumPy array of type `str` or `unicode`, this
1820 class adds the following functionality:
1822 1) values automatically have whitespace removed from the end
1823 when indexed
1825 2) comparison operators automatically remove whitespace from the
1826 end when comparing values
1828 3) vectorized string operations are provided as methods
1829 (e.g. `.endswith`) and infix operators (e.g. ``"+", "*", "%"``)
1831 chararrays should be created using `numpy.char.array` or
1832 `numpy.char.asarray`, rather than this constructor directly.
1834 This constructor creates the array, using `buffer` (with `offset`
1835 and `strides`) if it is not ``None``. If `buffer` is ``None``, then
1836 constructs a new array with `strides` in "C order", unless both
1837 ``len(shape) >= 2`` and ``order='F'``, in which case `strides`
1838 is in "Fortran order".
1840 Methods
1841 -------
1842 astype
1843 argsort
1844 copy
1845 count
1846 decode
1847 dump
1848 dumps
1849 encode
1850 endswith
1851 expandtabs
1852 fill
1853 find
1854 flatten
1855 getfield
1856 index
1857 isalnum
1858 isalpha
1859 isdecimal
1860 isdigit
1861 islower
1862 isnumeric
1863 isspace
1864 istitle
1865 isupper
1866 item
1867 join
1868 ljust
1869 lower
1870 lstrip
1871 nonzero
1872 put
1873 ravel
1874 repeat
1875 replace
1876 reshape
1877 resize
1878 rfind
1879 rindex
1880 rjust
1881 rsplit
1882 rstrip
1883 searchsorted
1884 setfield
1885 setflags
1886 sort
1887 split
1888 splitlines
1889 squeeze
1890 startswith
1891 strip
1892 swapaxes
1893 swapcase
1894 take
1895 title
1896 tofile
1897 tolist
1898 tostring
1899 translate
1900 transpose
1901 upper
1902 view
1903 zfill
1905 Parameters
1906 ----------
1907 shape : tuple
1908 Shape of the array.
1909 itemsize : int, optional
1910 Length of each array element, in number of characters. Default is 1.
1911 unicode : bool, optional
1912 Are the array elements of type unicode (True) or string (False).
1913 Default is False.
1914 buffer : object exposing the buffer interface or str, optional
1915 Memory address of the start of the array data. Default is None,
1916 in which case a new array is created.
1917 offset : int, optional
1918 Fixed stride displacement from the beginning of an axis?
1919 Default is 0. Needs to be >=0.
1920 strides : array_like of ints, optional
1921 Strides for the array (see `ndarray.strides` for full description).
1922 Default is None.
1923 order : {'C', 'F'}, optional
1924 The order in which the array data is stored in memory: 'C' ->
1925 "row major" order (the default), 'F' -> "column major"
1926 (Fortran) order.
1928 Examples
1929 --------
1930 >>> charar = np.chararray((3, 3))
1931 >>> charar[:] = 'a'
1932 >>> charar
1933 chararray([[b'a', b'a', b'a'],
1934 [b'a', b'a', b'a'],
1935 [b'a', b'a', b'a']], dtype='|S1')
1937 >>> charar = np.chararray(charar.shape, itemsize=5)
1938 >>> charar[:] = 'abc'
1939 >>> charar
1940 chararray([[b'abc', b'abc', b'abc'],
1941 [b'abc', b'abc', b'abc'],
1942 [b'abc', b'abc', b'abc']], dtype='|S5')
1944 """
1945 def __new__(subtype, shape, itemsize=1, unicode=False, buffer=None,
1946 offset=0, strides=None, order='C'):
1947 global _globalvar
1949 if unicode:
1950 dtype = unicode_
1951 else:
1952 dtype = string_
1954 # force itemsize to be a Python int, since using NumPy integer
1955 # types results in itemsize.itemsize being used as the size of
1956 # strings in the new array.
1957 itemsize = int(itemsize)
1959 if isinstance(buffer, str):
1960 # unicode objects do not have the buffer interface
1961 filler = buffer
1962 buffer = None
1963 else:
1964 filler = None
1966 _globalvar = 1
1967 if buffer is None:
1968 self = ndarray.__new__(subtype, shape, (dtype, itemsize),
1969 order=order)
1970 else:
1971 self = ndarray.__new__(subtype, shape, (dtype, itemsize),
1972 buffer=buffer,
1973 offset=offset, strides=strides,
1974 order=order)
1975 if filler is not None:
1976 self[...] = filler
1977 _globalvar = 0
1978 return self
1980 def __array_finalize__(self, obj):
1981 # The b is a special case because it is used for reconstructing.
1982 if not _globalvar and self.dtype.char not in 'SUbc':
1983 raise ValueError("Can only create a chararray from string data.")
1985 def __getitem__(self, obj):
1986 val = ndarray.__getitem__(self, obj)
1988 if isinstance(val, character):
1989 temp = val.rstrip()
1990 if len(temp) == 0:
1991 val = ''
1992 else:
1993 val = temp
1995 return val
1997 # IMPLEMENTATION NOTE: Most of the methods of this class are
1998 # direct delegations to the free functions in this module.
1999 # However, those that return an array of strings should instead
2000 # return a chararray, so some extra wrapping is required.
2002 def __eq__(self, other):
2003 """
2004 Return (self == other) element-wise.
2006 See Also
2007 --------
2008 equal
2009 """
2010 return equal(self, other)
2012 def __ne__(self, other):
2013 """
2014 Return (self != other) element-wise.
2016 See Also
2017 --------
2018 not_equal
2019 """
2020 return not_equal(self, other)
2022 def __ge__(self, other):
2023 """
2024 Return (self >= other) element-wise.
2026 See Also
2027 --------
2028 greater_equal
2029 """
2030 return greater_equal(self, other)
2032 def __le__(self, other):
2033 """
2034 Return (self <= other) element-wise.
2036 See Also
2037 --------
2038 less_equal
2039 """
2040 return less_equal(self, other)
2042 def __gt__(self, other):
2043 """
2044 Return (self > other) element-wise.
2046 See Also
2047 --------
2048 greater
2049 """
2050 return greater(self, other)
2052 def __lt__(self, other):
2053 """
2054 Return (self < other) element-wise.
2056 See Also
2057 --------
2058 less
2059 """
2060 return less(self, other)
2062 def __add__(self, other):
2063 """
2064 Return (self + other), that is string concatenation,
2065 element-wise for a pair of array_likes of str or unicode.
2067 See Also
2068 --------
2069 add
2070 """
2071 return asarray(add(self, other))
2073 def __radd__(self, other):
2074 """
2075 Return (other + self), that is string concatenation,
2076 element-wise for a pair of array_likes of `string_` or `unicode_`.
2078 See Also
2079 --------
2080 add
2081 """
2082 return asarray(add(numpy.asarray(other), self))
2084 def __mul__(self, i):
2085 """
2086 Return (self * i), that is string multiple concatenation,
2087 element-wise.
2089 See Also
2090 --------
2091 multiply
2092 """
2093 return asarray(multiply(self, i))
2095 def __rmul__(self, i):
2096 """
2097 Return (self * i), that is string multiple concatenation,
2098 element-wise.
2100 See Also
2101 --------
2102 multiply
2103 """
2104 return asarray(multiply(self, i))
2106 def __mod__(self, i):
2107 """
2108 Return (self % i), that is pre-Python 2.6 string formatting
2109 (interpolation), element-wise for a pair of array_likes of `string_`
2110 or `unicode_`.
2112 See Also
2113 --------
2114 mod
2115 """
2116 return asarray(mod(self, i))
2118 def __rmod__(self, other):
2119 return NotImplemented
2121 def argsort(self, axis=-1, kind=None, order=None):
2122 """
2123 Return the indices that sort the array lexicographically.
2125 For full documentation see `numpy.argsort`, for which this method is
2126 in fact merely a "thin wrapper."
2128 Examples
2129 --------
2130 >>> c = np.array(['a1b c', '1b ca', 'b ca1', 'Ca1b'], 'S5')
2131 >>> c = c.view(np.chararray); c
2132 chararray(['a1b c', '1b ca', 'b ca1', 'Ca1b'],
2133 dtype='|S5')
2134 >>> c[c.argsort()]
2135 chararray(['1b ca', 'Ca1b', 'a1b c', 'b ca1'],
2136 dtype='|S5')
2138 """
2139 return self.__array__().argsort(axis, kind, order)
2140 argsort.__doc__ = ndarray.argsort.__doc__
2142 def capitalize(self):
2143 """
2144 Return a copy of `self` with only the first character of each element
2145 capitalized.
2147 See Also
2148 --------
2149 char.capitalize
2151 """
2152 return asarray(capitalize(self))
2154 def center(self, width, fillchar=' '):
2155 """
2156 Return a copy of `self` with its elements centered in a
2157 string of length `width`.
2159 See Also
2160 --------
2161 center
2162 """
2163 return asarray(center(self, width, fillchar))
2165 def count(self, sub, start=0, end=None):
2166 """
2167 Returns an array with the number of non-overlapping occurrences of
2168 substring `sub` in the range [`start`, `end`].
2170 See Also
2171 --------
2172 char.count
2174 """
2175 return count(self, sub, start, end)
2177 def decode(self, encoding=None, errors=None):
2178 """
2179 Calls `str.decode` element-wise.
2181 See Also
2182 --------
2183 char.decode
2185 """
2186 return decode(self, encoding, errors)
2188 def encode(self, encoding=None, errors=None):
2189 """
2190 Calls `str.encode` element-wise.
2192 See Also
2193 --------
2194 char.encode
2196 """
2197 return encode(self, encoding, errors)
2199 def endswith(self, suffix, start=0, end=None):
2200 """
2201 Returns a boolean array which is `True` where the string element
2202 in `self` ends with `suffix`, otherwise `False`.
2204 See Also
2205 --------
2206 char.endswith
2208 """
2209 return endswith(self, suffix, start, end)
2211 def expandtabs(self, tabsize=8):
2212 """
2213 Return a copy of each string element where all tab characters are
2214 replaced by one or more spaces.
2216 See Also
2217 --------
2218 char.expandtabs
2220 """
2221 return asarray(expandtabs(self, tabsize))
2223 def find(self, sub, start=0, end=None):
2224 """
2225 For each element, return the lowest index in the string where
2226 substring `sub` is found.
2228 See Also
2229 --------
2230 char.find
2232 """
2233 return find(self, sub, start, end)
2235 def index(self, sub, start=0, end=None):
2236 """
2237 Like `find`, but raises `ValueError` when the substring is not found.
2239 See Also
2240 --------
2241 char.index
2243 """
2244 return index(self, sub, start, end)
2246 def isalnum(self):
2247 """
2248 Returns true for each element if all characters in the string
2249 are alphanumeric and there is at least one character, false
2250 otherwise.
2252 See Also
2253 --------
2254 char.isalnum
2256 """
2257 return isalnum(self)
2259 def isalpha(self):
2260 """
2261 Returns true for each element if all characters in the string
2262 are alphabetic and there is at least one character, false
2263 otherwise.
2265 See Also
2266 --------
2267 char.isalpha
2269 """
2270 return isalpha(self)
2272 def isdigit(self):
2273 """
2274 Returns true for each element if all characters in the string are
2275 digits and there is at least one character, false otherwise.
2277 See Also
2278 --------
2279 char.isdigit
2281 """
2282 return isdigit(self)
2284 def islower(self):
2285 """
2286 Returns true for each element if all cased characters in the
2287 string are lowercase and there is at least one cased character,
2288 false otherwise.
2290 See Also
2291 --------
2292 char.islower
2294 """
2295 return islower(self)
2297 def isspace(self):
2298 """
2299 Returns true for each element if there are only whitespace
2300 characters in the string and there is at least one character,
2301 false otherwise.
2303 See Also
2304 --------
2305 char.isspace
2307 """
2308 return isspace(self)
2310 def istitle(self):
2311 """
2312 Returns true for each element if the element is a titlecased
2313 string and there is at least one character, false otherwise.
2315 See Also
2316 --------
2317 char.istitle
2319 """
2320 return istitle(self)
2322 def isupper(self):
2323 """
2324 Returns true for each element if all cased characters in the
2325 string are uppercase and there is at least one character, false
2326 otherwise.
2328 See Also
2329 --------
2330 char.isupper
2332 """
2333 return isupper(self)
2335 def join(self, seq):
2336 """
2337 Return a string which is the concatenation of the strings in the
2338 sequence `seq`.
2340 See Also
2341 --------
2342 char.join
2344 """
2345 return join(self, seq)
2347 def ljust(self, width, fillchar=' '):
2348 """
2349 Return an array with the elements of `self` left-justified in a
2350 string of length `width`.
2352 See Also
2353 --------
2354 char.ljust
2356 """
2357 return asarray(ljust(self, width, fillchar))
2359 def lower(self):
2360 """
2361 Return an array with the elements of `self` converted to
2362 lowercase.
2364 See Also
2365 --------
2366 char.lower
2368 """
2369 return asarray(lower(self))
2371 def lstrip(self, chars=None):
2372 """
2373 For each element in `self`, return a copy with the leading characters
2374 removed.
2376 See Also
2377 --------
2378 char.lstrip
2380 """
2381 return asarray(lstrip(self, chars))
2383 def partition(self, sep):
2384 """
2385 Partition each element in `self` around `sep`.
2387 See Also
2388 --------
2389 partition
2390 """
2391 return asarray(partition(self, sep))
2393 def replace(self, old, new, count=None):
2394 """
2395 For each element in `self`, return a copy of the string with all
2396 occurrences of substring `old` replaced by `new`.
2398 See Also
2399 --------
2400 char.replace
2402 """
2403 return asarray(replace(self, old, new, count))
2405 def rfind(self, sub, start=0, end=None):
2406 """
2407 For each element in `self`, return the highest index in the string
2408 where substring `sub` is found, such that `sub` is contained
2409 within [`start`, `end`].
2411 See Also
2412 --------
2413 char.rfind
2415 """
2416 return rfind(self, sub, start, end)
2418 def rindex(self, sub, start=0, end=None):
2419 """
2420 Like `rfind`, but raises `ValueError` when the substring `sub` is
2421 not found.
2423 See Also
2424 --------
2425 char.rindex
2427 """
2428 return rindex(self, sub, start, end)
2430 def rjust(self, width, fillchar=' '):
2431 """
2432 Return an array with the elements of `self`
2433 right-justified in a string of length `width`.
2435 See Also
2436 --------
2437 char.rjust
2439 """
2440 return asarray(rjust(self, width, fillchar))
2442 def rpartition(self, sep):
2443 """
2444 Partition each element in `self` around `sep`.
2446 See Also
2447 --------
2448 rpartition
2449 """
2450 return asarray(rpartition(self, sep))
2452 def rsplit(self, sep=None, maxsplit=None):
2453 """
2454 For each element in `self`, return a list of the words in
2455 the string, using `sep` as the delimiter string.
2457 See Also
2458 --------
2459 char.rsplit
2461 """
2462 return rsplit(self, sep, maxsplit)
2464 def rstrip(self, chars=None):
2465 """
2466 For each element in `self`, return a copy with the trailing
2467 characters removed.
2469 See Also
2470 --------
2471 char.rstrip
2473 """
2474 return asarray(rstrip(self, chars))
2476 def split(self, sep=None, maxsplit=None):
2477 """
2478 For each element in `self`, return a list of the words in the
2479 string, using `sep` as the delimiter string.
2481 See Also
2482 --------
2483 char.split
2485 """
2486 return split(self, sep, maxsplit)
2488 def splitlines(self, keepends=None):
2489 """
2490 For each element in `self`, return a list of the lines in the
2491 element, breaking at line boundaries.
2493 See Also
2494 --------
2495 char.splitlines
2497 """
2498 return splitlines(self, keepends)
2500 def startswith(self, prefix, start=0, end=None):
2501 """
2502 Returns a boolean array which is `True` where the string element
2503 in `self` starts with `prefix`, otherwise `False`.
2505 See Also
2506 --------
2507 char.startswith
2509 """
2510 return startswith(self, prefix, start, end)
2512 def strip(self, chars=None):
2513 """
2514 For each element in `self`, return a copy with the leading and
2515 trailing characters removed.
2517 See Also
2518 --------
2519 char.strip
2521 """
2522 return asarray(strip(self, chars))
2524 def swapcase(self):
2525 """
2526 For each element in `self`, return a copy of the string with
2527 uppercase characters converted to lowercase and vice versa.
2529 See Also
2530 --------
2531 char.swapcase
2533 """
2534 return asarray(swapcase(self))
2536 def title(self):
2537 """
2538 For each element in `self`, return a titlecased version of the
2539 string: words start with uppercase characters, all remaining cased
2540 characters are lowercase.
2542 See Also
2543 --------
2544 char.title
2546 """
2547 return asarray(title(self))
2549 def translate(self, table, deletechars=None):
2550 """
2551 For each element in `self`, return a copy of the string where
2552 all characters occurring in the optional argument
2553 `deletechars` are removed, and the remaining characters have
2554 been mapped through the given translation table.
2556 See Also
2557 --------
2558 char.translate
2560 """
2561 return asarray(translate(self, table, deletechars))
2563 def upper(self):
2564 """
2565 Return an array with the elements of `self` converted to
2566 uppercase.
2568 See Also
2569 --------
2570 char.upper
2572 """
2573 return asarray(upper(self))
2575 def zfill(self, width):
2576 """
2577 Return the numeric string left-filled with zeros in a string of
2578 length `width`.
2580 See Also
2581 --------
2582 char.zfill
2584 """
2585 return asarray(zfill(self, width))
2587 def isnumeric(self):
2588 """
2589 For each element in `self`, return True if there are only
2590 numeric characters in the element.
2592 See Also
2593 --------
2594 char.isnumeric
2596 """
2597 return isnumeric(self)
2599 def isdecimal(self):
2600 """
2601 For each element in `self`, return True if there are only
2602 decimal characters in the element.
2604 See Also
2605 --------
2606 char.isdecimal
2608 """
2609 return isdecimal(self)
2612@set_module("numpy.char")
2613def array(obj, itemsize=None, copy=True, unicode=None, order=None):
2614 """
2615 Create a `chararray`.
2617 .. note::
2618 This class is provided for numarray backward-compatibility.
2619 New code (not concerned with numarray compatibility) should use
2620 arrays of type `string_` or `unicode_` and use the free functions
2621 in :mod:`numpy.char <numpy.core.defchararray>` for fast
2622 vectorized string operations instead.
2624 Versus a regular NumPy array of type `str` or `unicode`, this
2625 class adds the following functionality:
2627 1) values automatically have whitespace removed from the end
2628 when indexed
2630 2) comparison operators automatically remove whitespace from the
2631 end when comparing values
2633 3) vectorized string operations are provided as methods
2634 (e.g. `str.endswith`) and infix operators (e.g. ``+, *, %``)
2636 Parameters
2637 ----------
2638 obj : array of str or unicode-like
2640 itemsize : int, optional
2641 `itemsize` is the number of characters per scalar in the
2642 resulting array. If `itemsize` is None, and `obj` is an
2643 object array or a Python list, the `itemsize` will be
2644 automatically determined. If `itemsize` is provided and `obj`
2645 is of type str or unicode, then the `obj` string will be
2646 chunked into `itemsize` pieces.
2648 copy : bool, optional
2649 If true (default), then the object is copied. Otherwise, a copy
2650 will only be made if __array__ returns a copy, if obj is a
2651 nested sequence, or if a copy is needed to satisfy any of the other
2652 requirements (`itemsize`, unicode, `order`, etc.).
2654 unicode : bool, optional
2655 When true, the resulting `chararray` can contain Unicode
2656 characters, when false only 8-bit characters. If unicode is
2657 None and `obj` is one of the following:
2659 - a `chararray`,
2660 - an ndarray of type `str` or `unicode`
2661 - a Python str or unicode object,
2663 then the unicode setting of the output array will be
2664 automatically determined.
2666 order : {'C', 'F', 'A'}, optional
2667 Specify the order of the array. If order is 'C' (default), then the
2668 array will be in C-contiguous order (last-index varies the
2669 fastest). If order is 'F', then the returned array
2670 will be in Fortran-contiguous order (first-index varies the
2671 fastest). If order is 'A', then the returned array may
2672 be in any order (either C-, Fortran-contiguous, or even
2673 discontiguous).
2674 """
2675 if isinstance(obj, (bytes, str)):
2676 if unicode is None:
2677 if isinstance(obj, str):
2678 unicode = True
2679 else:
2680 unicode = False
2682 if itemsize is None:
2683 itemsize = len(obj)
2684 shape = len(obj) // itemsize
2686 return chararray(shape, itemsize=itemsize, unicode=unicode,
2687 buffer=obj, order=order)
2689 if isinstance(obj, (list, tuple)):
2690 obj = numpy.asarray(obj)
2692 if isinstance(obj, ndarray) and issubclass(obj.dtype.type, character):
2693 # If we just have a vanilla chararray, create a chararray
2694 # view around it.
2695 if not isinstance(obj, chararray):
2696 obj = obj.view(chararray)
2698 if itemsize is None:
2699 itemsize = obj.itemsize
2700 # itemsize is in 8-bit chars, so for Unicode, we need
2701 # to divide by the size of a single Unicode character,
2702 # which for NumPy is always 4
2703 if issubclass(obj.dtype.type, unicode_):
2704 itemsize //= 4
2706 if unicode is None:
2707 if issubclass(obj.dtype.type, unicode_):
2708 unicode = True
2709 else:
2710 unicode = False
2712 if unicode:
2713 dtype = unicode_
2714 else:
2715 dtype = string_
2717 if order is not None:
2718 obj = numpy.asarray(obj, order=order)
2719 if (copy or
2720 (itemsize != obj.itemsize) or
2721 (not unicode and isinstance(obj, unicode_)) or
2722 (unicode and isinstance(obj, string_))):
2723 obj = obj.astype((dtype, int(itemsize)))
2724 return obj
2726 if isinstance(obj, ndarray) and issubclass(obj.dtype.type, object):
2727 if itemsize is None:
2728 # Since no itemsize was specified, convert the input array to
2729 # a list so the ndarray constructor will automatically
2730 # determine the itemsize for us.
2731 obj = obj.tolist()
2732 # Fall through to the default case
2734 if unicode:
2735 dtype = unicode_
2736 else:
2737 dtype = string_
2739 if itemsize is None:
2740 val = narray(obj, dtype=dtype, order=order, subok=True)
2741 else:
2742 val = narray(obj, dtype=(dtype, itemsize), order=order, subok=True)
2743 return val.view(chararray)
2746@set_module("numpy.char")
2747def asarray(obj, itemsize=None, unicode=None, order=None):
2748 """
2749 Convert the input to a `chararray`, copying the data only if
2750 necessary.
2752 Versus a regular NumPy array of type `str` or `unicode`, this
2753 class adds the following functionality:
2755 1) values automatically have whitespace removed from the end
2756 when indexed
2758 2) comparison operators automatically remove whitespace from the
2759 end when comparing values
2761 3) vectorized string operations are provided as methods
2762 (e.g. `str.endswith`) and infix operators (e.g. ``+``, ``*``,``%``)
2764 Parameters
2765 ----------
2766 obj : array of str or unicode-like
2768 itemsize : int, optional
2769 `itemsize` is the number of characters per scalar in the
2770 resulting array. If `itemsize` is None, and `obj` is an
2771 object array or a Python list, the `itemsize` will be
2772 automatically determined. If `itemsize` is provided and `obj`
2773 is of type str or unicode, then the `obj` string will be
2774 chunked into `itemsize` pieces.
2776 unicode : bool, optional
2777 When true, the resulting `chararray` can contain Unicode
2778 characters, when false only 8-bit characters. If unicode is
2779 None and `obj` is one of the following:
2781 - a `chararray`,
2782 - an ndarray of type `str` or 'unicode`
2783 - a Python str or unicode object,
2785 then the unicode setting of the output array will be
2786 automatically determined.
2788 order : {'C', 'F'}, optional
2789 Specify the order of the array. If order is 'C' (default), then the
2790 array will be in C-contiguous order (last-index varies the
2791 fastest). If order is 'F', then the returned array
2792 will be in Fortran-contiguous order (first-index varies the
2793 fastest).
2794 """
2795 return array(obj, itemsize, copy=False,
2796 unicode=unicode, order=order)