Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pkg_resources/

1# module pyparsing.py

5# Permission is hereby granted, free of charge, to any person obtaining

6# a copy of this software and associated documentation files (the

7# "Software"), to deal in the Software without restriction, including

8# without limitation the rights to use, copy, modify, merge, publish,

9# distribute, sublicense, and/or sell copies of the Software, and to

10# permit persons to whom the Software is furnished to do so, subject to

11# the following conditions:

12#

13# The above copyright notice and this permission notice shall be

14# included in all copies or substantial portions of the Software.

15#

16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY

20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,

21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE

22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

23#

25__doc__ = \

26"""

27pyparsing module - Classes and methods to define and execute parsing grammars

28=============================================================================

30The pyparsing module is an alternative approach to creating and executing simple grammars,

31vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you

32don't need to learn a new syntax for defining grammars or matching expressions - the parsing module

33provides a library of classes that you use to construct the grammar directly in Python.

35Here is a program to parse "Hello, World!" (or any greeting of the form

36C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements

37(L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to

38L{Literal} expressions)::

40 from pyparsing import Word, alphas

42 # define grammar of a greeting

43 greet = Word(alphas) + "," + Word(alphas) + "!"

45 hello = "Hello, World!"

46 print (hello, "->", greet.parseString(hello))

48The program outputs the following::

50 Hello, World! -> ['Hello', ',', 'World', '!']

52The Python representation of the grammar is quite readable, owing to the self-explanatory

53class names, and the use of '+', '|' and '^' operators.

55The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an

56object with named attributes.

58The pyparsing module handles some of the problems that are typically vexing when writing text parsers:

59 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)

60 - quoted strings

61 - embedded comments

64Getting Started -

65-----------------

66Visit the classes L{ParserElement} and L{ParseResults} to see the base classes that most other pyparsing

67classes inherit from. Use the docstrings for examples of how to:

68 - construct literal match expressions from L{Literal} and L{CaselessLiteral} classes

69 - construct character word-group expressions using the L{Word} class

70 - see how to create repetitive expressions using L{ZeroOrMore} and L{OneOrMore} classes

71 - use L{'+'<And>}, L{'|'<MatchFirst>}, L{'^'<Or>}, and L{'&'<Each>} operators to combine simple expressions into more complex ones

72 - associate names with your parsed results using L{ParserElement.setResultsName}

73 - find some helpful expression short-cuts like L{delimitedList} and L{oneOf}

74 - find more useful common expressions in the L{pyparsing_common} namespace class

75"""

77__version__ = "2.2.1"

78__versionTime__ = "18 Sep 2018 00:49 UTC"

79__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"

81import string

82from weakref import ref as wkref

83import copy

84import sys

85import warnings

86import re

87import sre_constants

88import collections

89import pprint

90import traceback

91import types

92from datetime import datetime

94try:

95 from _thread import RLock

96except ImportError:

97 from threading import RLock

99try:

100 # Python 3

101 from collections.abc import Iterable

102 from collections.abc import MutableMapping

103except ImportError:

104 # Python 2.7

105 from collections import Iterable

106 from collections import MutableMapping

107

108try:

109 from collections import OrderedDict as _OrderedDict

110except ImportError:

111 try:

112 from ordereddict import OrderedDict as _OrderedDict

113 except ImportError:

114 _OrderedDict = None

115

116#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )

117

118__all__ = [

119'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',

120'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',

121'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',

122'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',

123'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',

124'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',

125'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',

126'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',

127'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',

128'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',

129'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',

130'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',

131'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',

132'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',

133'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',

134'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',

135'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',

136'CloseMatch', 'tokenMap', 'pyparsing_common',

137]

138

139system_version = tuple(sys.version_info)[:3]

140PY_3 = system_version[0] == 3

141if PY_3: 141 ↛ 151line 141 didn't jump to line 151, because the condition on line 141 was never false

142 _MAX_INT = sys.maxsize

143 basestring = str

144 unichr = chr

145 _ustr = str

146

147 # build list of single arg builtins, that can be used as parse actions

148 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]

149

150else:

151 _MAX_INT = sys.maxint

152 range = xrange

153

154 def _ustr(obj):

155 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries

156 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It

157 then < returns the unicode object | encodes it with the default encoding | ... >.

158 """

159 if isinstance(obj,unicode):

160 return obj

161

162 try:

163 # If this works, then _ustr(obj) has the same behaviour as str(obj), so

164 # it won't break any existing code.

165 return str(obj)

166

167 except UnicodeEncodeError:

168 # Else encode it

169 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')

170 xmlcharref = Regex(r'&#\d+;')

171 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])

172 return xmlcharref.transformString(ret)

173

174 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions

175 singleArgBuiltins = []

176 import __builtin__

177 for fname in "sum len sorted reversed list tuple set any all min max".split():

178 try:

179 singleArgBuiltins.append(getattr(__builtin__,fname))

180 except AttributeError:

181 continue

182

183_generatorType = type((y for y in range(1))) 183 ↛ exitline 183 didn't run the generator expression on line 183

184

185def _xml_escape(data):

186 """Escape &, <, >, ", ', etc. in a string of data."""

187

188 # ampersand must be replaced first

189 from_symbols = '&><"\''

190 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())

191 for from_,to_ in zip(from_symbols, to_symbols):

192 data = data.replace(from_, to_)

193 return data

194

195class _Constants(object):

196 pass

197

198alphas = string.ascii_uppercase + string.ascii_lowercase

199nums = "0123456789"

200hexnums = nums + "ABCDEFabcdef"

201alphanums = alphas + nums

202_bslash = chr(92)

203printables = "".join(c for c in string.printable if c not in string.whitespace)

204

205class ParseBaseException(Exception):

206 """base exception class for all parsing runtime exceptions"""

207 # Performance tuning: we construct a *lot* of these, so keep this

208 # constructor as small and fast as possible

209 def __init__( self, pstr, loc=0, msg=None, elem=None ):

210 self.loc = loc

211 if msg is None: 211 ↛ 212line 211 didn't jump to line 212, because the condition on line 211 was never true

212 self.msg = pstr

213 self.pstr = ""

214 else:

215 self.msg = msg

216 self.pstr = pstr

217 self.parserElement = elem

218 self.args = (pstr, loc, msg)

219

220 @classmethod

221 def _from_exception(cls, pe):

222 """

223 internal factory method to simplify creating one type of ParseException

224 from another - avoids having __init__ signature conflicts among subclasses

225 """

226 return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)

227

228 def __getattr__( self, aname ):

229 """supported attributes by name are:

230 - lineno - returns the line number of the exception text

231 - col - returns the column number of the exception text

232 - line - returns the line containing the exception text

233 """

234 if( aname == "lineno" ):

235 return lineno( self.loc, self.pstr )

236 elif( aname in ("col", "column") ):

237 return col( self.loc, self.pstr )

238 elif( aname == "line" ):

239 return line( self.loc, self.pstr )

240 else:

241 raise AttributeError(aname)

242

243 def __str__( self ):

244 return "%s (at char %d), (line:%d, col:%d)" % \

245 ( self.msg, self.loc, self.lineno, self.column )

246 def __repr__( self ):

247 return _ustr(self)

248 def markInputline( self, markerString = ">!<" ):

249 """Extracts the exception line from the input string, and marks

250 the location of the exception with a special symbol.

251 """

252 line_str = self.line

253 line_column = self.column - 1

254 if markerString:

255 line_str = "".join((line_str[:line_column],

256 markerString, line_str[line_column:]))

257 return line_str.strip()

258 def __dir__(self):

259 return "lineno col line".split() + dir(type(self))

260

261class ParseException(ParseBaseException):

262 """

263 Exception thrown when parse expressions don't match class;

264 supported attributes by name are:

265 - lineno - returns the line number of the exception text

266 - col - returns the column number of the exception text

267 - line - returns the line containing the exception text

268

269 Example::

270 try:

271 Word(nums).setName("integer").parseString("ABC")

272 except ParseException as pe:

273 print(pe)

274 print("column: {}".format(pe.col))

275

276 prints::

277 Expected integer (at char 0), (line:1, col:1)

278 column: 1

279 """

280 pass

281

282class ParseFatalException(ParseBaseException):

283 """user-throwable exception thrown when inconsistent parse content

284 is found; stops all parsing immediately"""

285 pass

286

287class ParseSyntaxException(ParseFatalException):

288 """just like L{ParseFatalException}, but thrown internally when an

289 L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop

290 immediately because an unbacktrackable syntax error has been found"""

291 pass

292

293#~ class ReparseException(ParseBaseException):

294 #~ """Experimental class - parse actions can raise this exception to cause

295 #~ pyparsing to reparse the input string:

296 #~ - with a modified input string, and/or

297 #~ - with a modified start location

298 #~ Set the values of the ReparseException in the constructor, and raise the

299 #~ exception in a parse action to cause pyparsing to use the new string/location.

300 #~ Setting the values as None causes no change to be made.

301 #~ """

302 #~ def __init_( self, newstring, restartLoc ):

303 #~ self.newParseText = newstring

304 #~ self.reparseLoc = restartLoc

305

306class RecursiveGrammarException(Exception):

307 """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""

308 def __init__( self, parseElementList ):

309 self.parseElementTrace = parseElementList

310

311 def __str__( self ):

312 return "RecursiveGrammarException: %s" % self.parseElementTrace

313

314class _ParseResultsWithOffset(object):

315 def __init__(self,p1,p2):

316 self.tup = (p1,p2)

317 def __getitem__(self,i):

318 return self.tup[i]

319 def __repr__(self):

320 return repr(self.tup[0])

321 def setOffset(self,i):

322 self.tup = (self.tup[0],i)

323

324class ParseResults(object):

325 """

326 Structured parse results, to provide multiple means of access to the parsed data:

327 - as a list (C{len(results)})

328 - by list index (C{results[0], results[1]}, etc.)

329 - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})

330

331 Example::

332 integer = Word(nums)

333 date_str = (integer.setResultsName("year") + '/'

334 + integer.setResultsName("month") + '/'

335 + integer.setResultsName("day"))

336 # equivalent form:

337 # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")

338

339 # parseString returns a ParseResults object

340 result = date_str.parseString("1999/12/31")

341

342 def test(s, fn=repr):

343 print("%s -> %s" % (s, fn(eval(s))))

344 test("list(result)")

345 test("result[0]")

346 test("result['month']")

347 test("result.day")

348 test("'month' in result")

349 test("'minutes' in result")

350 test("result.dump()", str)

351 prints::

352 list(result) -> ['1999', '/', '12', '/', '31']

353 result[0] -> '1999'

354 result['month'] -> '12'

355 result.day -> '31'

356 'month' in result -> True

357 'minutes' in result -> False

358 result.dump() -> ['1999', '/', '12', '/', '31']

359 - day: 31

360 - month: 12

361 - year: 1999

362 """

363 def __new__(cls, toklist=None, name=None, asList=True, modal=True ):

364 if isinstance(toklist, cls):

365 return toklist

366 retobj = object.__new__(cls)

367 retobj.__doinit = True

368 return retobj

369

370 # Performance tuning: we construct a *lot* of these, so keep this

371 # constructor as small and fast as possible

372 def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):

373 if self.__doinit:

374 self.__doinit = False

375 self.__name = None

376 self.__parent = None

377 self.__accumNames = {}

378 self.__asList = asList

379 self.__modal = modal

380 if toklist is None: 380 ↛ 381line 380 didn't jump to line 381, because the condition on line 380 was never true

381 toklist = []

382 if isinstance(toklist, list):

383 self.__toklist = toklist[:]

384 elif isinstance(toklist, _generatorType): 384 ↛ 385line 384 didn't jump to line 385, because the condition on line 384 was never true

385 self.__toklist = list(toklist)

386 else:

387 self.__toklist = [toklist]

388 self.__tokdict = dict()

389

390 if name is not None and name:

391 if not modal: 391 ↛ 392line 391 didn't jump to line 392, because the condition on line 391 was never true

392 self.__accumNames[name] = 0

393 if isinstance(name,int): 393 ↛ 394line 393 didn't jump to line 394, because the condition on line 393 was never true

394 name = _ustr(name) # will always return a str, but use _ustr for consistency

395 self.__name = name

396 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):

397 if isinstance(toklist,basestring):

398 toklist = [ toklist ]

399 if asList:

400 if isinstance(toklist,ParseResults):

401 self[name] = _ParseResultsWithOffset(toklist.copy(),0)

402 else:

403 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)

404 self[name].__name = name

405 else:

406 try:

407 self[name] = toklist[0]

408 except (KeyError,TypeError,IndexError):

409 self[name] = toklist

410

411 def __getitem__( self, i ):

412 if isinstance( i, (int,slice) ):

413 return self.__toklist[i]

414 else:

415 if i not in self.__accumNames: 415 ↛ 418line 415 didn't jump to line 418, because the condition on line 415 was never false

416 return self.__tokdict[i][-1][0]

417 else:

418 return ParseResults([ v[0] for v in self.__tokdict[i] ])

419

420 def __setitem__( self, k, v, isinstance=isinstance ):

421 if isinstance(v,_ParseResultsWithOffset):

422 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]

423 sub = v[0]

424 elif isinstance(k,(int,slice)): 424 ↛ 425line 424 didn't jump to line 425, because the condition on line 424 was never true

425 self.__toklist[k] = v

426 sub = v

427 else:

428 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]

429 sub = v

430 if isinstance(sub,ParseResults):

431 sub.__parent = wkref(self)

432

433 def __delitem__( self, i ):

434 if isinstance(i,(int,slice)): 434 ↛ 452line 434 didn't jump to line 452, because the condition on line 434 was never false

435 mylen = len( self.__toklist )

436 del self.__toklist[i]

437

438 # convert int to slice

439 if isinstance(i, int): 439 ↛ 440line 439 didn't jump to line 440, because the condition on line 439 was never true

440 if i < 0:

441 i += mylen

442 i = slice(i, i+1)

443 # get removed indices

444 removed = list(range(*i.indices(mylen)))

445 removed.reverse()

446 # fixup indices in token dictionary

447 for name,occurrences in self.__tokdict.items():

448 for j in removed:

449 for k, (value, position) in enumerate(occurrences):

450 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))

451 else:

452 del self.__tokdict[i]

453

454 def __contains__( self, k ):

455 return k in self.__tokdict

456

457 def __len__( self ): return len( self.__toklist )

458 def __bool__(self): return ( not not self.__toklist )

459 __nonzero__ = __bool__

460 def __iter__( self ): return iter( self.__toklist )

461 def __reversed__( self ): return iter( self.__toklist[::-1] ) 461 ↛ exitline 461 didn't return from function '__reversed__', because the return on line 461 wasn't executed

462 def _iterkeys( self ):

463 if hasattr(self.__tokdict, "iterkeys"):

464 return self.__tokdict.iterkeys()

465 else:

466 return iter(self.__tokdict)

467

468 def _itervalues( self ):

469 return (self[k] for k in self._iterkeys())

470

471 def _iteritems( self ):

472 return ((k, self[k]) for k in self._iterkeys())

473

474 if PY_3: 474 ↛ 485line 474 didn't jump to line 485, because the condition on line 474 was never false

475 keys = _iterkeys

476 """Returns an iterator of all named result keys (Python 3.x only)."""

477

478 values = _itervalues

479 """Returns an iterator of all named result values (Python 3.x only)."""

480

481 items = _iteritems

482 """Returns an iterator of all named result key-value tuples (Python 3.x only)."""

483

484 else:

485 iterkeys = _iterkeys

486 """Returns an iterator of all named result keys (Python 2.x only)."""

487

488 itervalues = _itervalues

489 """Returns an iterator of all named result values (Python 2.x only)."""

490

491 iteritems = _iteritems

492 """Returns an iterator of all named result key-value tuples (Python 2.x only)."""

493

494 def keys( self ):

495 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""

496 return list(self.iterkeys())

497

498 def values( self ):

499 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""

500 return list(self.itervalues())

501

502 def items( self ):

503 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""

504 return list(self.iteritems())

505

506 def haskeys( self ):

507 """Since keys() returns an iterator, this method is helpful in bypassing

508 code that looks for the existence of any defined results names."""

509 return bool(self.__tokdict)

510

511 def pop( self, *args, **kwargs):

512 """

513 Removes and returns item at specified index (default=C{last}).

514 Supports both C{list} and C{dict} semantics for C{pop()}. If passed no

515 argument or an integer argument, it will use C{list} semantics

516 and pop tokens from the list of parsed tokens. If passed a

517 non-integer argument (most likely a string), it will use C{dict}

518 semantics and pop the corresponding value from any defined

519 results names. A second default return value argument is

520 supported, just as in C{dict.pop()}.

521

522 Example::

523 def remove_first(tokens):

524 tokens.pop(0)

525 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']

526 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']

527

528 label = Word(alphas)

529 patt = label("LABEL") + OneOrMore(Word(nums))

530 print(patt.parseString("AAB 123 321").dump())

531

532 # Use pop() in a parse action to remove named result (note that corresponding value is not

533 # removed from list form of results)

534 def remove_LABEL(tokens):

535 tokens.pop("LABEL")

536 return tokens

537 patt.addParseAction(remove_LABEL)

538 print(patt.parseString("AAB 123 321").dump())

539 prints::

540 ['AAB', '123', '321']

541 - LABEL: AAB

542

543 ['AAB', '123', '321']

544 """

545 if not args:

546 args = [-1]

547 for k,v in kwargs.items():

548 if k == 'default':

549 args = (args[0], v)

550 else:

551 raise TypeError("pop() got an unexpected keyword argument '%s'" % k)

552 if (isinstance(args[0], int) or

553 len(args) == 1 or

554 args[0] in self):

555 index = args[0]

556 ret = self[index]

557 del self[index]

558 return ret

559 else:

560 defaultvalue = args[1]

561 return defaultvalue

562

563 def get(self, key, defaultValue=None):

564 """

565 Returns named result matching the given key, or if there is no

566 such name, then returns the given C{defaultValue} or C{None} if no

567 C{defaultValue} is specified.

568

569 Similar to C{dict.get()}.

570

571 Example::

572 integer = Word(nums)

573 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")

574

575 result = date_str.parseString("1999/12/31")

576 print(result.get("year")) # -> '1999'

577 print(result.get("hour", "not specified")) # -> 'not specified'

578 print(result.get("hour")) # -> None

579 """

580 if key in self:

581 return self[key]

582 else:

583 return defaultValue

584

585 def insert( self, index, insStr ):

586 """

587 Inserts new element at location index in the list of parsed tokens.

588

589 Similar to C{list.insert()}.

590

591 Example::

592 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']

593

594 # use a parse action to insert the parse location in the front of the parsed results

595 def insert_locn(locn, tokens):

596 tokens.insert(0, locn)

597 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']

598 """

599 self.__toklist.insert(index, insStr)

600 # fixup indices in token dictionary

601 for name,occurrences in self.__tokdict.items():

602 for k, (value, position) in enumerate(occurrences):

603 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))

604

605 def append( self, item ):

606 """

607 Add single element to end of ParseResults list of elements.

608

609 Example::

610 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']

611

612 # use a parse action to compute the sum of the parsed integers, and add it to the end

613 def append_sum(tokens):

614 tokens.append(sum(map(int, tokens)))

615 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]

616 """

617 self.__toklist.append(item)

618

619 def extend( self, itemseq ):

620 """

621 Add sequence of elements to end of ParseResults list of elements.

622

623 Example::

624 patt = OneOrMore(Word(alphas))

625

626 # use a parse action to append the reverse of the matched strings, to make a palindrome

627 def make_palindrome(tokens):

628 tokens.extend(reversed([t[::-1] for t in tokens]))

629 return ''.join(tokens)

630 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'

631 """

632 if isinstance(itemseq, ParseResults):

633 self += itemseq

634 else:

635 self.__toklist.extend(itemseq)

636

637 def clear( self ):

638 """

639 Clear all elements and results names.

640 """

641 del self.__toklist[:]

642 self.__tokdict.clear()

643

644 def __getattr__( self, name ):

645 try:

646 return self[name]

647 except KeyError:

648 return ""

649

650 if name in self.__tokdict:

651 if name not in self.__accumNames:

652 return self.__tokdict[name][-1][0]

653 else:

654 return ParseResults([ v[0] for v in self.__tokdict[name] ])

655 else:

656 return ""

657

658 def __add__( self, other ):

659 ret = self.copy()

660 ret += other

661 return ret

662

663 def __iadd__( self, other ):

664 if other.__tokdict:

665 offset = len(self.__toklist)

666 addoffset = lambda a: offset if a<0 else a+offset

667 otheritems = other.__tokdict.items()

668 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )

669 for (k,vlist) in otheritems for v in vlist]

670 for k,v in otherdictitems:

671 self[k] = v

672 if isinstance(v[0],ParseResults):

673 v[0].__parent = wkref(self)

674

675 self.__toklist += other.__toklist

676 self.__accumNames.update( other.__accumNames )

677 return self

678

679 def __radd__(self, other):

680 if isinstance(other,int) and other == 0:

681 # useful for merging many ParseResults using sum() builtin

682 return self.copy()

683 else:

684 # this may raise a TypeError - so be it

685 return other + self

686

687 def __repr__( self ):

688 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )

689

690 def __str__( self ):

691 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'

692

693 def _asStringList( self, sep='' ):

694 out = []

695 for item in self.__toklist:

696 if out and sep:

697 out.append(sep)

698 if isinstance( item, ParseResults ): 698 ↛ 699line 698 didn't jump to line 699, because the condition on line 698 was never true

699 out += item._asStringList()

700 else:

701 out.append( _ustr(item) )

702 return out

703

704 def asList( self ):

705 """

706 Returns the parse results as a nested list of matching tokens, all converted to strings.

707

708 Example::

709 patt = OneOrMore(Word(alphas))

710 result = patt.parseString("sldkj lsdkj sldkj")

711 # even though the result prints in string-like form, it is actually a pyparsing ParseResults

712 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']

713

714 # Use asList() to create an actual list

715 result_list = result.asList()

716 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']

717 """

718 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]

719

720 def asDict( self ):

721 """

722 Returns the named parse results as a nested dictionary.

723

724 Example::

725 integer = Word(nums)

726 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")

727

728 result = date_str.parseString('12/31/1999')

729 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})

730

731 result_dict = result.asDict()

732 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}

733

734 # even though a ParseResults supports dict-like access, sometime you just need to have a dict

735 import json

736 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable

737 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}

738 """

739 if PY_3:

740 item_fn = self.items

741 else:

742 item_fn = self.iteritems

743

744 def toItem(obj):

745 if isinstance(obj, ParseResults):

746 if obj.haskeys():

747 return obj.asDict()

748 else:

749 return [toItem(v) for v in obj]

750 else:

751 return obj

752

753 return dict((k,toItem(v)) for k,v in item_fn())

754

755 def copy( self ):

756 """

757 Returns a new copy of a C{ParseResults} object.

758 """

759 ret = ParseResults( self.__toklist )

760 ret.__tokdict = self.__tokdict.copy()

761 ret.__parent = self.__parent

762 ret.__accumNames.update( self.__accumNames )

763 ret.__name = self.__name

764 return ret

765

766 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):

767 """

768 (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.

769 """

770 nl = "\n"

771 out = []

772 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()

773 for v in vlist)

774 nextLevelIndent = indent + " "

775

776 # collapse out indents if formatting is not desired

777 if not formatted:

778 indent = ""

779 nextLevelIndent = ""

780 nl = ""

781

782 selfTag = None

783 if doctag is not None:

784 selfTag = doctag

785 else:

786 if self.__name:

787 selfTag = self.__name

788

789 if not selfTag:

790 if namedItemsOnly:

791 return ""

792 else:

793 selfTag = "ITEM"

794

795 out += [ nl, indent, "<", selfTag, ">" ]

796

797 for i,res in enumerate(self.__toklist):

798 if isinstance(res,ParseResults):

799 if i in namedItems:

800 out += [ res.asXML(namedItems[i],

801 namedItemsOnly and doctag is None,

802 nextLevelIndent,

803 formatted)]

804 else:

805 out += [ res.asXML(None,

806 namedItemsOnly and doctag is None,

807 nextLevelIndent,

808 formatted)]

809 else:

810 # individual token, see if there is a name for it

811 resTag = None

812 if i in namedItems:

813 resTag = namedItems[i]

814 if not resTag:

815 if namedItemsOnly:

816 continue

817 else:

818 resTag = "ITEM"

819 xmlBodyText = _xml_escape(_ustr(res))

820 out += [ nl, nextLevelIndent, "<", resTag, ">",

821 xmlBodyText,

822 "</", resTag, ">" ]

823

824 out += [ nl, indent, "</", selfTag, ">" ]

825 return "".join(out)

826

827 def __lookup(self,sub):

828 for k,vlist in self.__tokdict.items():

829 for v,loc in vlist:

830 if sub is v:

831 return k

832 return None

833

834 def getName(self):

835 r"""

836 Returns the results name for this token expression. Useful when several

837 different expressions might match at a particular location.

838

839 Example::

840 integer = Word(nums)

841 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")

842 house_number_expr = Suppress('#') + Word(nums, alphanums)

843 user_data = (Group(house_number_expr)("house_number")

844 | Group(ssn_expr)("ssn")

845 | Group(integer)("age"))

846 user_info = OneOrMore(user_data)

847

848 result = user_info.parseString("22 111-22-3333 #221B")

849 for item in result:

850 print(item.getName(), ':', item[0])

851 prints::

852 age : 22

853 ssn : 111-22-3333

854 house_number : 221B

855 """

856 if self.__name:

857 return self.__name

858 elif self.__parent:

859 par = self.__parent()

860 if par:

861 return par.__lookup(self)

862 else:

863 return None

864 elif (len(self) == 1 and

865 len(self.__tokdict) == 1 and

866 next(iter(self.__tokdict.values()))[0][1] in (0,-1)):

867 return next(iter(self.__tokdict.keys()))

868 else:

869 return None

870

871 def dump(self, indent='', depth=0, full=True):

872 """

873 Diagnostic method for listing out the contents of a C{ParseResults}.

874 Accepts an optional C{indent} argument so that this string can be embedded

875 in a nested display of other data.

876

877 Example::

878 integer = Word(nums)

879 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")

880

881 result = date_str.parseString('12/31/1999')

882 print(result.dump())

883 prints::

884 ['12', '/', '31', '/', '1999']

885 - day: 1999

886 - month: 31

887 - year: 12

888 """

889 out = []

890 NL = '\n'

891 out.append( indent+_ustr(self.asList()) )

892 if full:

893 if self.haskeys():

894 items = sorted((str(k), v) for k,v in self.items())

895 for k,v in items:

896 if out:

897 out.append(NL)

898 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )

899 if isinstance(v,ParseResults):

900 if v:

901 out.append( v.dump(indent,depth+1) )

902 else:

903 out.append(_ustr(v))

904 else:

905 out.append(repr(v))

906 elif any(isinstance(vv,ParseResults) for vv in self):

907 v = self

908 for i,vv in enumerate(v):

909 if isinstance(vv,ParseResults):

910 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))

911 else:

912 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))

913

914 return "".join(out)

915

916 def pprint(self, *args, **kwargs):

917 """

918 Pretty-printer for parsed results as a list, using the C{pprint} module.

919 Accepts additional positional or keyword args as defined for the

920 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})

921

922 Example::

923 ident = Word(alphas, alphanums)

924 num = Word(nums)

925 func = Forward()

926 term = ident | num | Group('(' + func + ')')

927 func <<= ident + Group(Optional(delimitedList(term)))

928 result = func.parseString("fna a,b,(fnb c,d,200),100")

929 result.pprint(width=40)

930 prints::

931 ['fna',

932 ['a',

933 'b',

934 ['(', 'fnb', ['c', 'd', '200'], ')'],

935 '100']]

936 """

937 pprint.pprint(self.asList(), *args, **kwargs)

938

939 # add support for pickle protocol

940 def __getstate__(self):

941 return ( self.__toklist,

942 ( self.__tokdict.copy(),

943 self.__parent is not None and self.__parent() or None,

944 self.__accumNames,

945 self.__name ) )

946

947 def __setstate__(self,state):

948 self.__toklist = state[0]

949 (self.__tokdict,

950 par,

951 inAccumNames,

952 self.__name) = state[1]

953 self.__accumNames = {}

954 self.__accumNames.update(inAccumNames)

955 if par is not None:

956 self.__parent = wkref(par)

957 else:

958 self.__parent = None

959

960 def __getnewargs__(self):

961 return self.__toklist, self.__name, self.__asList, self.__modal

962

963 def __dir__(self):

964 return (dir(type(self)) + list(self.keys()))

965

966MutableMapping.register(ParseResults)

967

968def col (loc,strg):

969 """Returns current column within a string, counting newlines as line separators.

970 The first column is number 1.

971

972 Note: the default parsing behavior is to expand tabs in the input string

973 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information

974 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a

975 consistent view of the parsed string, the parse location, and line and column

976 positions within the parsed string.

977 """

978 s = strg

979 return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)

980

981def lineno(loc,strg):

982 """Returns current line number within a string, counting newlines as line separators.

983 The first line is number 1.

984

985 Note: the default parsing behavior is to expand tabs in the input string

986 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information

987 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a

988 consistent view of the parsed string, the parse location, and line and column

989 positions within the parsed string.

990 """

991 return strg.count("\n",0,loc) + 1

992

993def line( loc, strg ):

994 """Returns the line of text containing loc within a string, counting newlines as line separators.

995 """

996 lastCR = strg.rfind("\n", 0, loc)

997 nextCR = strg.find("\n", loc)

998 if nextCR >= 0:

999 return strg[lastCR+1:nextCR]

1000 else:

1001 return strg[lastCR+1:]

1002

1003def _defaultStartDebugAction( instring, loc, expr ):

1004 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))

1005

1006def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):

1007 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))

1008

1009def _defaultExceptionDebugAction( instring, loc, expr, exc ):

1010 print ("Exception raised:" + _ustr(exc))

1011

1012def nullDebugAction(*args):

1013 """'Do-nothing' debug action, to suppress debugging output during parsing."""

1014 pass

1015

1016# Only works on Python 3.x - nonlocal is toxic to Python 2 installs

1017#~ 'decorator to trim function calls to match the arity of the target'

1018#~ def _trim_arity(func, maxargs=3):

1019 #~ if func in singleArgBuiltins:

1020 #~ return lambda s,l,t: func(t)

1021 #~ limit = 0

1022 #~ foundArity = False

1023 #~ def wrapper(*args):

1024 #~ nonlocal limit,foundArity

1025 #~ while 1:

1026 #~ try:

1027 #~ ret = func(*args[limit:])

1028 #~ foundArity = True

1029 #~ return ret

1030 #~ except TypeError:

1031 #~ if limit == maxargs or foundArity:

1032 #~ raise

1033 #~ limit += 1

1034 #~ continue

1035 #~ return wrapper

1036

1037# this version is Python 2.x-3.x cross-compatible

1038'decorator to trim function calls to match the arity of the target'

1039def _trim_arity(func, maxargs=2):

1040 if func in singleArgBuiltins:

1041 return lambda s,l,t: func(t) 1041 ↛ exitline 1041 didn't run the lambda on line 1041

1042 limit = [0]

1043 foundArity = [False]

1044

1045 # traceback return data structure changed in Py3.5 - normalize back to plain tuples

1046 if system_version[:2] >= (3,5): 1046 ↛ 1057line 1046 didn't jump to line 1057, because the condition on line 1046 was never false

1047 def extract_stack(limit=0):

1048 # special handling for Python 3.5.0 - extra deep call stack by 1

1049 offset = -3 if system_version == (3,5,0) else -2

1050 frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]

1051 return [frame_summary[:2]]

1052 def extract_tb(tb, limit=0):

1053 frames = traceback.extract_tb(tb, limit=limit)

1054 frame_summary = frames[-1]

1055 return [frame_summary[:2]]

1056 else:

1057 extract_stack = traceback.extract_stack

1058 extract_tb = traceback.extract_tb

1059

1060 # synthesize what would be returned by traceback.extract_stack at the call to

1061 # user's parse action 'func', so that we don't incur call penalty at parse time

1062

1063 LINE_DIFF = 6

1064 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND

1065 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!

1066 this_line = extract_stack(limit=2)[-1]

1067 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)

1068

1069 def wrapper(*args):

1070 while 1:

1071 try:

1072 ret = func(*args[limit[0]:])

1073 foundArity[0] = True

1074 return ret

1075 except TypeError:

1076 # re-raise TypeErrors if they did not come from our arity testing

1077 if foundArity[0]:

1078 raise

1079 else:

1080 try:

1081 tb = sys.exc_info()[-1]

1082 if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:

1083 raise

1084 finally:

1085 del tb

1086

1087 if limit[0] <= maxargs:

1088 limit[0] += 1

1089 continue

1090 raise

1091

1092 # copy func name to wrapper for sensible debug output

1093 func_name = "<parse action>"

1094 try:

1095 func_name = getattr(func, '__name__',

1096 getattr(func, '__class__').__name__)

1097 except Exception:

1098 func_name = str(func)

1099 wrapper.__name__ = func_name

1100

1101 return wrapper

1102

1103class ParserElement(object):

1104 """Abstract base level parser element class."""

1105 DEFAULT_WHITE_CHARS = " \n\t\r"

1106 verbose_stacktrace = False

1107

1108 @staticmethod

1109 def setDefaultWhitespaceChars( chars ):

1110 r"""

1111 Overrides the default whitespace chars

1112

1113 Example::

1114 # default whitespace chars are space, <TAB> and newline

1115 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']

1116

1117 # change to just treat newline as significant

1118 ParserElement.setDefaultWhitespaceChars(" \t")

1119 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']

1120 """

1121 ParserElement.DEFAULT_WHITE_CHARS = chars

1122

1123 @staticmethod

1124 def inlineLiteralsUsing(cls):

1125 """

1126 Set class to be used for inclusion of string literals into a parser.

1127

1128 Example::

1129 # default literal class used is Literal

1130 integer = Word(nums)

1131 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")

1132

1133 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']

1134

1135

1136 # change to Suppress

1137 ParserElement.inlineLiteralsUsing(Suppress)

1138 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")

1139

1140 date_str.parseString("1999/12/31") # -> ['1999', '12', '31']

1141 """

1142 ParserElement._literalStringClass = cls

1143

1144 def __init__( self, savelist=False ):

1145 self.parseAction = list()

1146 self.failAction = None

1147 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall

1148 self.strRepr = None

1149 self.resultsName = None

1150 self.saveAsList = savelist

1151 self.skipWhitespace = True

1152 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS

1153 self.copyDefaultWhiteChars = True

1154 self.mayReturnEmpty = False # used when checking for left-recursion

1155 self.keepTabs = False

1156 self.ignoreExprs = list()

1157 self.debug = False

1158 self.streamlined = False

1159 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index

1160 self.errmsg = ""

1161 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)

1162 self.debugActions = ( None, None, None ) #custom debug actions

1163 self.re = None

1164 self.callPreparse = True # used to avoid redundant calls to preParse

1165 self.callDuringTry = False

1166

1167 def copy( self ):

1168 """

1169 Make a copy of this C{ParserElement}. Useful for defining different parse actions

1170 for the same parsing pattern, using copies of the original parse element.

1171

1172 Example::

1173 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))

1174 integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")

1175 integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")

1176

1177 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))

1178 prints::

1179 [5120, 100, 655360, 268435456]

1180 Equivalent form of C{expr.copy()} is just C{expr()}::

1181 integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")

1182 """

1183 cpy = copy.copy( self )

1184 cpy.parseAction = self.parseAction[:]

1185 cpy.ignoreExprs = self.ignoreExprs[:]

1186 if self.copyDefaultWhiteChars:

1187 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS

1188 return cpy

1189

1190 def setName( self, name ):

1191 """

1192 Define name for this expression, makes debugging and exception messages clearer.

1193

1194 Example::

1195 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)

1196 Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)

1197 """

1198 self.name = name

1199 self.errmsg = "Expected " + self.name

1200 if hasattr(self,"exception"): 1200 ↛ 1201line 1200 didn't jump to line 1201, because the condition on line 1200 was never true

1201 self.exception.msg = self.errmsg

1202 return self

1203

1204 def setResultsName( self, name, listAllMatches=False ):

1205 """

1206 Define name for referencing matching tokens as a nested attribute

1207 of the returned parse results.

1208 NOTE: this returns a *copy* of the original C{ParserElement} object;

1209 this is so that the client can define a basic element, such as an

1210 integer, and reference it in multiple places with different names.

1211

1212 You can also set results names using the abbreviated syntax,

1213 C{expr("name")} in place of C{expr.setResultsName("name")} -

1214 see L{I{__call__}<__call__>}.

1215

1216 Example::

1217 date_str = (integer.setResultsName("year") + '/'

1218 + integer.setResultsName("month") + '/'

1219 + integer.setResultsName("day"))

1220

1221 # equivalent form:

1222 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")

1223 """

1224 newself = self.copy()

1225 if name.endswith("*"): 1225 ↛ 1226line 1225 didn't jump to line 1226, because the condition on line 1225 was never true

1226 name = name[:-1]

1227 listAllMatches=True

1228 newself.resultsName = name

1229 newself.modalResults = not listAllMatches

1230 return newself

1231

1232 def setBreak(self,breakFlag = True):

1233 """Method to invoke the Python pdb debugger when this element is

1234 about to be parsed. Set C{breakFlag} to True to enable, False to

1235 disable.

1236 """

1237 if breakFlag:

1238 _parseMethod = self._parse

1239 def breaker(instring, loc, doActions=True, callPreParse=True):

1240 import pdb

1241 pdb.set_trace()

1242 return _parseMethod( instring, loc, doActions, callPreParse )

1243 breaker._originalParseMethod = _parseMethod

1244 self._parse = breaker

1245 else:

1246 if hasattr(self._parse,"_originalParseMethod"):

1247 self._parse = self._parse._originalParseMethod

1248 return self

1249

1250 def setParseAction( self, *fns, **kwargs ):

1251 """

1252 Define one or more actions to perform when successfully matching parse element definition.

1253 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},

1254 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:

1255 - s = the original string being parsed (see note below)

1256 - loc = the location of the matching substring

1257 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object

1258 If the functions in fns modify the tokens, they can return them as the return

1259 value from fn, and the modified list of tokens will replace the original.

1260 Otherwise, fn does not need to return any value.

1261

1262 Optional keyword arguments:

1263 - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing

1264

1265 Note: the default parsing behavior is to expand tabs in the input string

1266 before starting the parsing process. See L{I{parseString}<parseString>} for more information

1267 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a

1268 consistent view of the parsed string, the parse location, and line and column

1269 positions within the parsed string.

1270

1271 Example::

1272 integer = Word(nums)

1273 date_str = integer + '/' + integer + '/' + integer

1274

1275 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']

1276

1277 # use parse action to convert to ints at parse time

1278 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))

1279 date_str = integer + '/' + integer + '/' + integer

1280

1281 # note that integer fields are now ints, not strings

1282 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]

1283 """

1284 self.parseAction = list(map(_trim_arity, list(fns)))

1285 self.callDuringTry = kwargs.get("callDuringTry", False)

1286 return self

1287

1288 def addParseAction( self, *fns, **kwargs ):

1289 """

1290 Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.

1291

1292 See examples in L{I{copy}<copy>}.

1293 """

1294 self.parseAction += list(map(_trim_arity, list(fns)))

1295 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)

1296 return self

1297

1298 def addCondition(self, *fns, **kwargs):

1299 """Add a boolean predicate function to expression's list of parse actions. See

1300 L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},

1301 functions passed to C{addCondition} need to return boolean success/fail of the condition.

1302

1303 Optional keyword arguments:

1304 - message = define a custom message to be used in the raised exception

1305 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException

1306

1307 Example::

1308 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))

1309 year_int = integer.copy()

1310 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")

1311 date_str = year_int + '/' + integer + '/' + integer

1312

1313 result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)

1314 """

1315 msg = kwargs.get("message", "failed user-defined condition")

1316 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException

1317 for fn in fns:

1318 def pa(s,l,t):

1319 if not bool(_trim_arity(fn)(s,l,t)):

1320 raise exc_type(s,l,msg)

1321 self.parseAction.append(pa)

1322 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)

1323 return self

1324

1325 def setFailAction( self, fn ):

1326 """Define action to perform if parsing fails at this expression.

1327 Fail acton fn is a callable function that takes the arguments

1328 C{fn(s,loc,expr,err)} where:

1329 - s = string being parsed

1330 - loc = location where expression match was attempted and failed

1331 - expr = the parse expression that failed

1332 - err = the exception thrown

1333 The function returns no value. It may throw C{L{ParseFatalException}}

1334 if it is desired to stop parsing immediately."""

1335 self.failAction = fn

1336 return self

1337

1338 def _skipIgnorables( self, instring, loc ):

1339 exprsFound = True

1340 while exprsFound:

1341 exprsFound = False

1342 for e in self.ignoreExprs:

1343 try:

1344 while 1:

1345 loc,dummy = e._parse( instring, loc )

1346 exprsFound = True

1347 except ParseException:

1348 pass

1349 return loc

1350

1351 def preParse( self, instring, loc ):

1352 if self.ignoreExprs: 1352 ↛ 1353line 1352 didn't jump to line 1353, because the condition on line 1352 was never true

1353 loc = self._skipIgnorables( instring, loc )

1354

1355 if self.skipWhitespace:

1356 wt = self.whiteChars

1357 instrlen = len(instring)

1358 while loc < instrlen and instring[loc] in wt:

1359 loc += 1

1360

1361 return loc

1362

1363 def parseImpl( self, instring, loc, doActions=True ):

1364 return loc, []

1365

1366 def postParse( self, instring, loc, tokenlist ):

1367 return tokenlist

1368

1369 #~ @profile

1370 def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):

1371 debugging = ( self.debug ) #and doActions )

1372

1373 if debugging or self.failAction: 1373 ↛ 1375line 1373 didn't jump to line 1375, because the condition on line 1373 was never true

1374 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))

1375 if (self.debugActions[0] ):

1376 self.debugActions[0]( instring, loc, self )

1377 if callPreParse and self.callPreparse:

1378 preloc = self.preParse( instring, loc )

1379 else:

1380 preloc = loc

1381 tokensStart = preloc

1382 try:

1383 try:

1384 loc,tokens = self.parseImpl( instring, preloc, doActions )

1385 except IndexError:

1386 raise ParseException( instring, len(instring), self.errmsg, self )

1387 except ParseBaseException as err:

1388 #~ print ("Exception raised:", err)

1389 if self.debugActions[2]:

1390 self.debugActions[2]( instring, tokensStart, self, err )

1391 if self.failAction:

1392 self.failAction( instring, tokensStart, self, err )

1393 raise

1394 else:

1395 if callPreParse and self.callPreparse:

1396 preloc = self.preParse( instring, loc )

1397 else:

1398 preloc = loc

1399 tokensStart = preloc

1400 if self.mayIndexError or preloc >= len(instring):

1401 try:

1402 loc,tokens = self.parseImpl( instring, preloc, doActions )

1403 except IndexError:

1404 raise ParseException( instring, len(instring), self.errmsg, self )

1405 else:

1406 loc,tokens = self.parseImpl( instring, preloc, doActions )

1407

1408 tokens = self.postParse( instring, loc, tokens )

1409

1410 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )

1411 if self.parseAction and (doActions or self.callDuringTry):

1412 if debugging: 1412 ↛ 1413line 1412 didn't jump to line 1413, because the condition on line 1412 was never true

1413 try:

1414 for fn in self.parseAction:

1415 tokens = fn( instring, tokensStart, retTokens )

1416 if tokens is not None:

1417 retTokens = ParseResults( tokens,

1418 self.resultsName,

1419 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),

1420 modal=self.modalResults )

1421 except ParseBaseException as err:

1422 #~ print "Exception raised in user parse action:", err

1423 if (self.debugActions[2] ):

1424 self.debugActions[2]( instring, tokensStart, self, err )

1425 raise

1426 else:

1427 for fn in self.parseAction:

1428 tokens = fn( instring, tokensStart, retTokens )

1429 if tokens is not None: 1429 ↛ 1427line 1429 didn't jump to line 1427, because the condition on line 1429 was never false

1430 retTokens = ParseResults( tokens,

1431 self.resultsName,

1432 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),

1433 modal=self.modalResults )

1434 if debugging: 1434 ↛ 1436line 1434 didn't jump to line 1436, because the condition on line 1434 was never true

1435 #~ print ("Matched",self,"->",retTokens.asList())

1436 if (self.debugActions[1] ):

1437 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )

1438

1439 return loc, retTokens

1440

1441 def tryParse( self, instring, loc ):

1442 try:

1443 return self._parse( instring, loc, doActions=False )[0]

1444 except ParseFatalException:

1445 raise ParseException( instring, loc, self.errmsg, self)

1446

1447 def canParseNext(self, instring, loc):

1448 try:

1449 self.tryParse(instring, loc)

1450 except (ParseException, IndexError):

1451 return False

1452 else:

1453 return True

1454

1455 class _UnboundedCache(object):

1456 def __init__(self):

1457 cache = {}

1458 self.not_in_cache = not_in_cache = object()

1459

1460 def get(self, key):

1461 return cache.get(key, not_in_cache)

1462

1463 def set(self, key, value):

1464 cache[key] = value

1465

1466 def clear(self):

1467 cache.clear()

1468

1469 def cache_len(self):

1470 return len(cache)

1471

1472 self.get = types.MethodType(get, self)

1473 self.set = types.MethodType(set, self)

1474 self.clear = types.MethodType(clear, self)

1475 self.__len__ = types.MethodType(cache_len, self)

1476

1477 if _OrderedDict is not None: 1477 ↛ 1507line 1477 didn't jump to line 1507, because the condition on line 1477 was never false

1478 class _FifoCache(object):

1479 def __init__(self, size):

1480 self.not_in_cache = not_in_cache = object()

1481

1482 cache = _OrderedDict()

1483

1484 def get(self, key):

1485 return cache.get(key, not_in_cache)

1486

1487 def set(self, key, value):

1488 cache[key] = value

1489 while len(cache) > size:

1490 try:

1491 cache.popitem(False)

1492 except KeyError:

1493 pass

1494

1495 def clear(self):

1496 cache.clear()

1497

1498 def cache_len(self):

1499 return len(cache)

1500

1501 self.get = types.MethodType(get, self)

1502 self.set = types.MethodType(set, self)

1503 self.clear = types.MethodType(clear, self)

1504 self.__len__ = types.MethodType(cache_len, self)

1505

1506 else:

1507 class _FifoCache(object):

1508 def __init__(self, size):

1509 self.not_in_cache = not_in_cache = object()

1510

1511 cache = {}

1512 key_fifo = collections.deque([], size)

1513

1514 def get(self, key):

1515 return cache.get(key, not_in_cache)

1516

1517 def set(self, key, value):

1518 cache[key] = value

1519 while len(key_fifo) > size:

1520 cache.pop(key_fifo.popleft(), None)

1521 key_fifo.append(key)

1522

1523 def clear(self):

1524 cache.clear()

1525 key_fifo.clear()

1526

1527 def cache_len(self):

1528 return len(cache)

1529

1530 self.get = types.MethodType(get, self)

1531 self.set = types.MethodType(set, self)

1532 self.clear = types.MethodType(clear, self)

1533 self.__len__ = types.MethodType(cache_len, self)

1534

1535 # argument cache for optimizing repeated calls when backtracking through recursive expressions

1536 packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail

1537 packrat_cache_lock = RLock()

1538 packrat_cache_stats = [0, 0]

1539

1540 # this method gets repeatedly called during backtracking with the same arguments -

1541 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression

1542 def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):

1543 HIT, MISS = 0, 1

1544 lookup = (self, instring, loc, callPreParse, doActions)

1545 with ParserElement.packrat_cache_lock:

1546 cache = ParserElement.packrat_cache

1547 value = cache.get(lookup)

1548 if value is cache.not_in_cache:

1549 ParserElement.packrat_cache_stats[MISS] += 1

1550 try:

1551 value = self._parseNoCache(instring, loc, doActions, callPreParse)

1552 except ParseBaseException as pe:

1553 # cache a copy of the exception, without the traceback

1554 cache.set(lookup, pe.__class__(*pe.args))

1555 raise

1556 else:

1557 cache.set(lookup, (value[0], value[1].copy()))

1558 return value

1559 else:

1560 ParserElement.packrat_cache_stats[HIT] += 1

1561 if isinstance(value, Exception):

1562 raise value

1563 return (value[0], value[1].copy())

1564

1565 _parse = _parseNoCache

1566

1567 @staticmethod

1568 def resetCache():

1569 ParserElement.packrat_cache.clear()

1570 ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)

1571

1572 _packratEnabled = False

1573 @staticmethod

1574 def enablePackrat(cache_size_limit=128):

1575 """Enables "packrat" parsing, which adds memoizing to the parsing logic.

1576 Repeated parse attempts at the same string location (which happens

1577 often in many complex grammars) can immediately return a cached value,

1578 instead of re-executing parsing/validating code. Memoizing is done of

1579 both valid results and parsing exceptions.

1580

1581 Parameters:

1582 - cache_size_limit - (default=C{128}) - if an integer value is provided

1583 will limit the size of the packrat cache; if None is passed, then

1584 the cache size will be unbounded; if 0 is passed, the cache will

1585 be effectively disabled.

1586

1587 This speedup may break existing programs that use parse actions that

1588 have side-effects. For this reason, packrat parsing is disabled when

1589 you first import pyparsing. To activate the packrat feature, your

1590 program must call the class method C{ParserElement.enablePackrat()}. If

1591 your program uses C{psyco} to "compile as you go", you must call

1592 C{enablePackrat} before calling C{psyco.full()}. If you do not do this,

1593 Python will crash. For best results, call C{enablePackrat()} immediately

1594 after importing pyparsing.

1595

1596 Example::

1597 import pyparsing

1598 pyparsing.ParserElement.enablePackrat()

1599 """

1600 if not ParserElement._packratEnabled:

1601 ParserElement._packratEnabled = True

1602 if cache_size_limit is None:

1603 ParserElement.packrat_cache = ParserElement._UnboundedCache()

1604 else:

1605 ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)

1606 ParserElement._parse = ParserElement._parseCache

1607

1608 def parseString( self, instring, parseAll=False ):

1609 """

1610 Execute the parse expression with the given string.

1611 This is the main interface to the client code, once the complete

1612 expression has been built.

1613

1614 If you want the grammar to require that the entire input string be

1615 successfully parsed, then set C{parseAll} to True (equivalent to ending

1616 the grammar with C{L{StringEnd()}}).

1617

1618 Note: C{parseString} implicitly calls C{expandtabs()} on the input string,

1619 in order to report proper column numbers in parse actions.

1620 If the input string contains tabs and

1621 the grammar uses parse actions that use the C{loc} argument to index into the

1622 string being parsed, you can ensure you have a consistent view of the input

1623 string by:

1624 - calling C{parseWithTabs} on your grammar before calling C{parseString}

1625 (see L{I{parseWithTabs}<parseWithTabs>})

1626 - define your parse action using the full C{(s,loc,toks)} signature, and

1627 reference the input string using the parse action's C{s} argument

1628 - explictly expand the tabs in your input string before calling

1629 C{parseString}

1630

1631 Example::

1632 Word('a').parseString('aaaaabaaa') # -> ['aaaaa']

1633 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text

1634 """

1635 ParserElement.resetCache()

1636 if not self.streamlined:

1637 self.streamline()

1638 #~ self.saveAsList = True

1639 for e in self.ignoreExprs: 1639 ↛ 1640line 1639 didn't jump to line 1640, because the loop on line 1639 never started

1640 e.streamline()

1641 if not self.keepTabs: 1641 ↛ 1643line 1641 didn't jump to line 1643, because the condition on line 1641 was never false

1642 instring = instring.expandtabs()

1643 try:

1644 loc, tokens = self._parse( instring, 0 )

1645 if parseAll: 1645 ↛ 1646line 1645 didn't jump to line 1646, because the condition on line 1645 was never true

1646 loc = self.preParse( instring, loc )

1647 se = Empty() + StringEnd()

1648 se._parse( instring, loc )

1649 except ParseBaseException as exc:

1650 if ParserElement.verbose_stacktrace:

1651 raise

1652 else:

1653 # catch and re-raise exception from here, clears out pyparsing internal stack trace

1654 raise exc

1655 else:

1656 return tokens

1657

1658 def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):

1659 """

1660 Scan the input string for expression matches. Each match will return the

1661 matching tokens, start location, and end location. May be called with optional

1662 C{maxMatches} argument, to clip scanning after 'n' matches are found. If

1663 C{overlap} is specified, then overlapping matches will be reported.

1664

1665 Note that the start and end locations are reported relative to the string

1666 being parsed. See L{I{parseString}<parseString>} for more information on parsing

1667 strings with embedded tabs.

1668

1669 Example::

1670 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"

1671 print(source)

1672 for tokens,start,end in Word(alphas).scanString(source):

1673 print(' '*start + '^'*(end-start))

1674 print(' '*start + tokens[0])

1675

1676 prints::

1677

1678 sldjf123lsdjjkf345sldkjf879lkjsfd987

1679 ^^^^^

1680 sldjf

1681 ^^^^^^^

1682 lsdjjkf

1683 ^^^^^^

1684 sldkjf

1685 ^^^^^^

1686 lkjsfd

1687 """

1688 if not self.streamlined:

1689 self.streamline()

1690 for e in self.ignoreExprs:

1691 e.streamline()

1692

1693 if not self.keepTabs:

1694 instring = _ustr(instring).expandtabs()

1695 instrlen = len(instring)

1696 loc = 0

1697 preparseFn = self.preParse

1698 parseFn = self._parse

1699 ParserElement.resetCache()

1700 matches = 0

1701 try:

1702 while loc <= instrlen and matches < maxMatches:

1703 try:

1704 preloc = preparseFn( instring, loc )

1705 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )

1706 except ParseException:

1707 loc = preloc+1

1708 else:

1709 if nextLoc > loc:

1710 matches += 1

1711 yield tokens, preloc, nextLoc

1712 if overlap:

1713 nextloc = preparseFn( instring, loc )

1714 if nextloc > loc:

1715 loc = nextLoc

1716 else:

1717 loc += 1

1718 else:

1719 loc = nextLoc

1720 else:

1721 loc = preloc+1

1722 except ParseBaseException as exc:

1723 if ParserElement.verbose_stacktrace:

1724 raise

1725 else:

1726 # catch and re-raise exception from here, clears out pyparsing internal stack trace

1727 raise exc

1728

1729 def transformString( self, instring ):

1730 """

1731 Extension to C{L{scanString}}, to modify matching text with modified tokens that may

1732 be returned from a parse action. To use C{transformString}, define a grammar and

1733 attach a parse action to it that modifies the returned token list.

1734 Invoking C{transformString()} on a target string will then scan for matches,

1735 and replace the matched text patterns according to the logic in the parse

1736 action. C{transformString()} returns the resulting transformed string.

1737

1738 Example::

1739 wd = Word(alphas)

1740 wd.setParseAction(lambda toks: toks[0].title())

1741

1742 print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))

1743 Prints::

1744 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.

1745 """

1746 out = []

1747 lastE = 0

1748 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to

1749 # keep string locs straight between transformString and scanString

1750 self.keepTabs = True

1751 try:

1752 for t,s,e in self.scanString( instring ):

1753 out.append( instring[lastE:s] )

1754 if t:

1755 if isinstance(t,ParseResults):

1756 out += t.asList()

1757 elif isinstance(t,list):

1758 out += t

1759 else:

1760 out.append(t)

1761 lastE = e

1762 out.append(instring[lastE:])

1763 out = [o for o in out if o]

1764 return "".join(map(_ustr,_flatten(out)))

1765 except ParseBaseException as exc:

1766 if ParserElement.verbose_stacktrace:

1767 raise

1768 else:

1769 # catch and re-raise exception from here, clears out pyparsing internal stack trace

1770 raise exc

1771

1772 def searchString( self, instring, maxMatches=_MAX_INT ):

1773 """

1774 Another extension to C{L{scanString}}, simplifying the access to the tokens found

1775 to match the given parse expression. May be called with optional

1776 C{maxMatches} argument, to clip searching after 'n' matches are found.

1777

1778 Example::

1779 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters

1780 cap_word = Word(alphas.upper(), alphas.lower())

1781

1782 print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))

1783

1784 # the sum() builtin can be used to merge results into a single ParseResults object

1785 print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))

1786 prints::

1787 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]

1788 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']

1789 """

1790 try:

1791 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])

1792 except ParseBaseException as exc:

1793 if ParserElement.verbose_stacktrace:

1794 raise

1795 else:

1796 # catch and re-raise exception from here, clears out pyparsing internal stack trace

1797 raise exc

1798

1799 def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):

1800 """

1801 Generator method to split a string using the given expression as a separator.

1802 May be called with optional C{maxsplit} argument, to limit the number of splits;

1803 and the optional C{includeSeparators} argument (default=C{False}), if the separating

1804 matching text should be included in the split results.

1805

1806 Example::

1807 punc = oneOf(list(".,;:/-!?"))

1808 print(list(punc.split("This, this?, this sentence, is badly punctuated!")))

1809 prints::

1810 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']

1811 """

1812 splits = 0

1813 last = 0

1814 for t,s,e in self.scanString(instring, maxMatches=maxsplit):

1815 yield instring[last:s]

1816 if includeSeparators:

1817 yield t[0]

1818 last = e

1819 yield instring[last:]

1820

1821 def __add__(self, other ):

1822 """

1823 Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement

1824 converts them to L{Literal}s by default.

1825

1826 Example::

1827 greet = Word(alphas) + "," + Word(alphas) + "!"

1828 hello = "Hello, World!"

1829 print (hello, "->", greet.parseString(hello))

1830 Prints::

1831 Hello, World! -> ['Hello', ',', 'World', '!']

1832 """

1833 if isinstance( other, basestring ):

1834 other = ParserElement._literalStringClass( other )

1835 if not isinstance( other, ParserElement ): 1835 ↛ 1836line 1835 didn't jump to line 1836, because the condition on line 1835 was never true

1836 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1837 SyntaxWarning, stacklevel=2)

1838 return None

1839 return And( [ self, other ] )

1840

1841 def __radd__(self, other ):

1842 """

1843 Implementation of + operator when left operand is not a C{L{ParserElement}}

1844 """

1845 if isinstance( other, basestring ): 1845 ↛ 1847line 1845 didn't jump to line 1847, because the condition on line 1845 was never false

1846 other = ParserElement._literalStringClass( other )

1847 if not isinstance( other, ParserElement ): 1847 ↛ 1848line 1847 didn't jump to line 1848, because the condition on line 1847 was never true

1848 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1849 SyntaxWarning, stacklevel=2)

1850 return None

1851 return other + self

1852

1853 def __sub__(self, other):

1854 """

1855 Implementation of - operator, returns C{L{And}} with error stop

1856 """

1857 if isinstance( other, basestring ):

1858 other = ParserElement._literalStringClass( other )

1859 if not isinstance( other, ParserElement ):

1860 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1861 SyntaxWarning, stacklevel=2)

1862 return None

1863 return self + And._ErrorStop() + other

1864

1865 def __rsub__(self, other ):

1866 """

1867 Implementation of - operator when left operand is not a C{L{ParserElement}}

1868 """

1869 if isinstance( other, basestring ):

1870 other = ParserElement._literalStringClass( other )

1871 if not isinstance( other, ParserElement ):

1872 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1873 SyntaxWarning, stacklevel=2)

1874 return None

1875 return other - self

1876

1877 def __mul__(self,other):

1878 """

1879 Implementation of * operator, allows use of C{expr * 3} in place of

1880 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer

1881 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples

1882 may also include C{None} as in:

1883 - C{expr*(n,None)} or C{expr*(n,)} is equivalent

1884 to C{expr*n + L{ZeroOrMore}(expr)}

1885 (read as "at least n instances of C{expr}")

1886 - C{expr*(None,n)} is equivalent to C{expr*(0,n)}

1887 (read as "0 to n instances of C{expr}")

1888 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}

1889 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}

1890

1891 Note that C{expr*(None,n)} does not raise an exception if

1892 more than n exprs exist in the input stream; that is,

1893 C{expr*(None,n)} does not enforce a maximum number of expr

1894 occurrences. If this behavior is desired, then write

1895 C{expr*(None,n) + ~expr}

1896 """

1897 if isinstance(other,int):

1898 minElements, optElements = other,0

1899 elif isinstance(other,tuple): 1899 ↛ 1916line 1899 didn't jump to line 1916, because the condition on line 1899 was never false

1900 other = (other + (None, None))[:2]

1901 if other[0] is None: 1901 ↛ 1902line 1901 didn't jump to line 1902, because the condition on line 1901 was never true

1902 other = (0, other[1])

1903 if isinstance(other[0],int) and other[1] is None: 1903 ↛ 1904line 1903 didn't jump to line 1904, because the condition on line 1903 was never true

1904 if other[0] == 0:

1905 return ZeroOrMore(self)

1906 if other[0] == 1:

1907 return OneOrMore(self)

1908 else:

1909 return self*other[0] + ZeroOrMore(self)

1910 elif isinstance(other[0],int) and isinstance(other[1],int): 1910 ↛ 1914line 1910 didn't jump to line 1914, because the condition on line 1910 was never false

1911 minElements, optElements = other

1912 optElements -= minElements

1913 else:

1914 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))

1915 else:

1916 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))

1917

1918 if minElements < 0: 1918 ↛ 1919line 1918 didn't jump to line 1919, because the condition on line 1918 was never true

1919 raise ValueError("cannot multiply ParserElement by negative value")

1920 if optElements < 0: 1920 ↛ 1921line 1920 didn't jump to line 1921, because the condition on line 1920 was never true

1921 raise ValueError("second tuple value must be greater or equal to first tuple value")

1922 if minElements == optElements == 0: 1922 ↛ 1923line 1922 didn't jump to line 1923, because the condition on line 1922 was never true

1923 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")

1924

1925 if (optElements):

1926 def makeOptionalList(n):

1927 if n>1:

1928 return Optional(self + makeOptionalList(n-1))

1929 else:

1930 return Optional(self)

1931 if minElements: 1931 ↛ 1932line 1931 didn't jump to line 1932, because the condition on line 1931 was never true

1932 if minElements == 1:

1933 ret = self + makeOptionalList(optElements)

1934 else:

1935 ret = And([self]*minElements) + makeOptionalList(optElements)

1936 else:

1937 ret = makeOptionalList(optElements)

1938 else:

1939 if minElements == 1: 1939 ↛ 1940line 1939 didn't jump to line 1940, because the condition on line 1939 was never true

1940 ret = self

1941 else:

1942 ret = And([self]*minElements)

1943 return ret

1944

1945 def __rmul__(self, other):

1946 return self.__mul__(other)

1947

1948 def __or__(self, other ):

1949 """

1950 Implementation of | operator - returns C{L{MatchFirst}}

1951 """

1952 if isinstance( other, basestring ): 1952 ↛ 1953line 1952 didn't jump to line 1953, because the condition on line 1952 was never true

1953 other = ParserElement._literalStringClass( other )

1954 if not isinstance( other, ParserElement ): 1954 ↛ 1955line 1954 didn't jump to line 1955, because the condition on line 1954 was never true

1955 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1956 SyntaxWarning, stacklevel=2)

1957 return None

1958 return MatchFirst( [ self, other ] )

1959

1960 def __ror__(self, other ):

1961 """

1962 Implementation of | operator when left operand is not a C{L{ParserElement}}

1963 """

1964 if isinstance( other, basestring ):

1965 other = ParserElement._literalStringClass( other )

1966 if not isinstance( other, ParserElement ):

1967 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1968 SyntaxWarning, stacklevel=2)

1969 return None

1970 return other | self

1971

1972 def __xor__(self, other ):

1973 """

1974 Implementation of ^ operator - returns C{L{Or}}

1975 """

1976 if isinstance( other, basestring ): 1976 ↛ 1977line 1976 didn't jump to line 1977, because the condition on line 1976 was never true

1977 other = ParserElement._literalStringClass( other )

1978 if not isinstance( other, ParserElement ): 1978 ↛ 1979line 1978 didn't jump to line 1979, because the condition on line 1978 was never true

1979 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1980 SyntaxWarning, stacklevel=2)

1981 return None

1982 return Or( [ self, other ] )

1983

1984 def __rxor__(self, other ):

1985 """

1986 Implementation of ^ operator when left operand is not a C{L{ParserElement}}

1987 """

1988 if isinstance( other, basestring ):

1989 other = ParserElement._literalStringClass( other )

1990 if not isinstance( other, ParserElement ):

1991 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

1992 SyntaxWarning, stacklevel=2)

1993 return None

1994 return other ^ self

1995

1996 def __and__(self, other ):

1997 """

1998 Implementation of & operator - returns C{L{Each}}

1999 """

2000 if isinstance( other, basestring ):

2001 other = ParserElement._literalStringClass( other )

2002 if not isinstance( other, ParserElement ):

2003 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

2004 SyntaxWarning, stacklevel=2)

2005 return None

2006 return Each( [ self, other ] )

2007

2008 def __rand__(self, other ):

2009 """

2010 Implementation of & operator when left operand is not a C{L{ParserElement}}

2011 """

2012 if isinstance( other, basestring ):

2013 other = ParserElement._literalStringClass( other )

2014 if not isinstance( other, ParserElement ):

2015 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),

2016 SyntaxWarning, stacklevel=2)

2017 return None

2018 return other & self

2019

2020 def __invert__( self ):

2021 """

2022 Implementation of ~ operator - returns C{L{NotAny}}

2023 """

2024 return NotAny( self )

2025

2026 def __call__(self, name=None):

2027 """

2028 Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.

2029

2030 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be

2031 passed as C{True}.

2032

2033 If C{name} is omitted, same as calling C{L{copy}}.

2034

2035 Example::

2036 # these are equivalent

2037 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")

2038 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")

2039 """

2040 if name is not None:

2041 return self.setResultsName(name)

2042 else:

2043 return self.copy()

2044

2045 def suppress( self ):

2046 """

2047 Suppresses the output of this C{ParserElement}; useful to keep punctuation from

2048 cluttering up returned output.

2049 """

2050 return Suppress( self )

2051

2052 def leaveWhitespace( self ):

2053 """

2054 Disables the skipping of whitespace before matching the characters in the

2055 C{ParserElement}'s defined pattern. This is normally only used internally by

2056 the pyparsing module, but may be needed in some whitespace-sensitive grammars.

2057 """

2058 self.skipWhitespace = False

2059 return self

2060

2061 def setWhitespaceChars( self, chars ):

2062 """

2063 Overrides the default whitespace chars

2064 """

2065 self.skipWhitespace = True

2066 self.whiteChars = chars

2067 self.copyDefaultWhiteChars = False

2068 return self

2069

2070 def parseWithTabs( self ):

2071 """

2072 Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.

2073 Must be called before C{parseString} when the input grammar contains elements that

2074 match C{<TAB>} characters.

2075 """

2076 self.keepTabs = True

2077 return self

2078

2079 def ignore( self, other ):

2080 """

2081 Define expression to be ignored (e.g., comments) while doing pattern

2082 matching; may be called repeatedly, to define multiple comment or other

2083 ignorable patterns.

2084

2085 Example::

2086 patt = OneOrMore(Word(alphas))

2087 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']

2088

2089 patt.ignore(cStyleComment)

2090 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']

2091 """

2092 if isinstance(other, basestring):

2093 other = Suppress(other)

2094

2095 if isinstance( other, Suppress ):

2096 if other not in self.ignoreExprs:

2097 self.ignoreExprs.append(other)

2098 else:

2099 self.ignoreExprs.append( Suppress( other.copy() ) )

2100 return self

2101

2102 def setDebugActions( self, startAction, successAction, exceptionAction ):

2103 """

2104 Enable display of debugging messages while doing pattern matching.

2105 """

2106 self.debugActions = (startAction or _defaultStartDebugAction,

2107 successAction or _defaultSuccessDebugAction,

2108 exceptionAction or _defaultExceptionDebugAction)

2109 self.debug = True

2110 return self

2111

2112 def setDebug( self, flag=True ):

2113 """

2114 Enable display of debugging messages while doing pattern matching.

2115 Set C{flag} to True to enable, False to disable.

2116

2117 Example::

2118 wd = Word(alphas).setName("alphaword")

2119 integer = Word(nums).setName("numword")

2120 term = wd | integer

2121

2122 # turn on debugging for wd

2123 wd.setDebug()

2124

2125 OneOrMore(term).parseString("abc 123 xyz 890")

2126

2127 prints::

2128 Match alphaword at loc 0(1,1)

2129 Matched alphaword -> ['abc']

2130 Match alphaword at loc 3(1,4)

2131 Exception raised:Expected alphaword (at char 4), (line:1, col:5)

2132 Match alphaword at loc 7(1,8)

2133 Matched alphaword -> ['xyz']

2134 Match alphaword at loc 11(1,12)

2135 Exception raised:Expected alphaword (at char 12), (line:1, col:13)

2136 Match alphaword at loc 15(1,16)

2137 Exception raised:Expected alphaword (at char 15), (line:1, col:16)

2138

2139 The output shown is that produced by the default debug actions - custom debug actions can be

2140 specified using L{setDebugActions}. Prior to attempting

2141 to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"}

2142 is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"}

2143 message is shown. Also note the use of L{setName} to assign a human-readable name to the expression,

2144 which makes debugging and exception messages easier to understand - for instance, the default

2145 name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}.

2146 """

2147 if flag:

2148 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )

2149 else:

2150 self.debug = False

2151 return self

2152

2153 def __str__( self ):

2154 return self.name

2155

2156 def __repr__( self ):

2157 return _ustr(self)

2158

2159 def streamline( self ):

2160 self.streamlined = True

2161 self.strRepr = None

2162 return self

2163

2164 def checkRecursion( self, parseElementList ):

2165 pass

2166

2167 def validate( self, validateTrace=[] ):

2168 """

2169 Check defined expressions for valid structure, check for infinite recursive definitions.

2170 """

2171 self.checkRecursion( [] )

2172

2173 def parseFile( self, file_or_filename, parseAll=False ):

2174 """

2175 Execute the parse expression on the given file or filename.

2176 If a filename is specified (instead of a file object),

2177 the entire file is opened, read, and closed before parsing.

2178 """

2179 try:

2180 file_contents = file_or_filename.read()

2181 except AttributeError:

2182 with open(file_or_filename, "r") as f:

2183 file_contents = f.read()

2184 try:

2185 return self.parseString(file_contents, parseAll)

2186 except ParseBaseException as exc:

2187 if ParserElement.verbose_stacktrace:

2188 raise

2189 else:

2190 # catch and re-raise exception from here, clears out pyparsing internal stack trace

2191 raise exc

2192

2193 def __eq__(self,other):

2194 if isinstance(other, ParserElement):

2195 return self is other or vars(self) == vars(other)

2196 elif isinstance(other, basestring):

2197 return self.matches(other)

2198 else:

2199 return super(ParserElement,self)==other

2200

2201 def __ne__(self,other):

2202 return not (self == other)

2203

2204 def __hash__(self):

2205 return hash(id(self))

2206

2207 def __req__(self,other):

2208 return self == other

2209

2210 def __rne__(self,other):

2211 return not (self == other)

2212

2213 def matches(self, testString, parseAll=True):

2214 """

2215 Method for quick testing of a parser against a test string. Good for simple

2216 inline microtests of sub expressions while building up larger parser.

2217

2218 Parameters:

2219 - testString - to test against this expression for a match

2220 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests

2221

2222 Example::

2223 expr = Word(nums)

2224 assert expr.matches("100")

2225 """

2226 try:

2227 self.parseString(_ustr(testString), parseAll=parseAll)

2228 return True

2229 except ParseBaseException:

2230 return False

2231

2232 def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):

2233 """

2234 Execute the parse expression on a series of test strings, showing each

2235 test, the parsed results or where the parse failed. Quick and easy way to

2236 run a parse expression against a list of sample strings.

2237

2238 Parameters:

2239 - tests - a list of separate test strings, or a multiline string of test strings

2240 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests

2241 - comment - (default=C{'#'}) - expression for indicating embedded comments in the test

2242 string; pass None to disable comment filtering

2243 - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;

2244 if False, only dump nested list

2245 - printResults - (default=C{True}) prints test output to stdout

2246 - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing

2247

2248 Returns: a (success, results) tuple, where success indicates that all tests succeeded

2249 (or failed if C{failureTests} is True), and the results contain a list of lines of each

2250 test's output

2251

2252 Example::

2253 number_expr = pyparsing_common.number.copy()

2254

2255 result = number_expr.runTests('''

2256 # unsigned integer

2257 100

2258 # negative integer

2259 -100

2260 # float with scientific notation

2261 6.02e23

2262 # integer with scientific notation

2263 1e-12

2264 ''')

2265 print("Success" if result[0] else "Failed!")

2266

2267 result = number_expr.runTests('''

2268 # stray character

2269 100Z

2270 # missing leading digit before '.'

2271 -.100

2272 # too many '.'

2273 3.14.159

2274 ''', failureTests=True)

2275 print("Success" if result[0] else "Failed!")

2276 prints::

2277 # unsigned integer

2278 100

2279 [100]

2280

2281 # negative integer

2282 -100

2283 [-100]

2284

2285 # float with scientific notation

2286 6.02e23

2287 [6.02e+23]

2288

2289 # integer with scientific notation

2290 1e-12

2291 [1e-12]

2292

2293 Success

2294

2295 # stray character

2296 100Z

2297 ^

2298 FAIL: Expected end of text (at char 3), (line:1, col:4)

2299

2300 # missing leading digit before '.'

2301 -.100

2302 ^

2303 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)

2304

2305 # too many '.'

2306 3.14.159

2307 ^

2308 FAIL: Expected end of text (at char 4), (line:1, col:5)

2309

2310 Success

2311

2312 Each test string must be on a single line. If you want to test a string that spans multiple

2313 lines, create a test like this::

2314

2315 expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")

2316

2317 (Note that this is a raw string literal, you must include the leading 'r'.)

2318 """

2319 if isinstance(tests, basestring):

2320 tests = list(map(str.strip, tests.rstrip().splitlines()))

2321 if isinstance(comment, basestring):

2322 comment = Literal(comment)

2323 allResults = []

2324 comments = []

2325 success = True

2326 for t in tests:

2327 if comment is not None and comment.matches(t, False) or comments and not t:

2328 comments.append(t)

2329 continue

2330 if not t:

2331 continue

2332 out = ['\n'.join(comments), t]

2333 comments = []

2334 try:

2335 t = t.replace(r'\n','\n')

2336 result = self.parseString(t, parseAll=parseAll)

2337 out.append(result.dump(full=fullDump))

2338 success = success and not failureTests

2339 except ParseBaseException as pe:

2340 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""

2341 if '\n' in t:

2342 out.append(line(pe.loc, t))

2343 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)

2344 else:

2345 out.append(' '*pe.loc + '^' + fatal)

2346 out.append("FAIL: " + str(pe))

2347 success = success and failureTests

2348 result = pe

2349 except Exception as exc:

2350 out.append("FAIL-EXCEPTION: " + str(exc))

2351 success = success and failureTests

2352 result = exc

2353

2354 if printResults:

2355 if fullDump:

2356 out.append('')

2357 print('\n'.join(out))

2358

2359 allResults.append((t, result))

2360

2361 return success, allResults

2362

2363

2364class Token(ParserElement):

2365 """

2366 Abstract C{ParserElement} subclass, for defining atomic matching patterns.

2367 """

2368 def __init__( self ):

2369 super(Token,self).__init__( savelist=False )

2370

2371

2372class Empty(Token):

2373 """

2374 An empty token, will always match.

2375 """

2376 def __init__( self ):

2377 super(Empty,self).__init__()

2378 self.name = "Empty"

2379 self.mayReturnEmpty = True

2380 self.mayIndexError = False

2381

2382

2383class NoMatch(Token):

2384 """

2385 A token that will never match.

2386 """

2387 def __init__( self ):

2388 super(NoMatch,self).__init__()

2389 self.name = "NoMatch"

2390 self.mayReturnEmpty = True

2391 self.mayIndexError = False

2392 self.errmsg = "Unmatchable token"

2393

2394 def parseImpl( self, instring, loc, doActions=True ):

2395 raise ParseException(instring, loc, self.errmsg, self)

2396

2397

2398class Literal(Token):

2399 """

2400 Token to exactly match a specified string.

2401

2402 Example::

2403 Literal('blah').parseString('blah') # -> ['blah']

2404 Literal('blah').parseString('blahfooblah') # -> ['blah']

2405 Literal('blah').parseString('bla') # -> Exception: Expected "blah"

2406

2407 For case-insensitive matching, use L{CaselessLiteral}.

2408

2409 For keyword matching (force word break before and after the matched string),

2410 use L{Keyword} or L{CaselessKeyword}.

2411 """

2412 def __init__( self, matchString ):

2413 super(Literal,self).__init__()

2414 self.match = matchString

2415 self.matchLen = len(matchString)

2416 try:

2417 self.firstMatchChar = matchString[0]

2418 except IndexError:

2419 warnings.warn("null string passed to Literal; use Empty() instead",

2420 SyntaxWarning, stacklevel=2)

2421 self.__class__ = Empty

2422 self.name = '"%s"' % _ustr(self.match)

2423 self.errmsg = "Expected " + self.name

2424 self.mayReturnEmpty = False

2425 self.mayIndexError = False

2426

2427 # Performance tuning: this routine gets called a *lot*

2428 # if this is a single character match string and the first character matches,

2429 # short-circuit as quickly as possible, and avoid calling startswith

2430 #~ @profile

2431 def parseImpl( self, instring, loc, doActions=True ):

2432 if (instring[loc] == self.firstMatchChar and

2433 (self.matchLen==1 or instring.startswith(self.match,loc)) ):

2434 return loc+self.matchLen, self.match

2435 raise ParseException(instring, loc, self.errmsg, self)

2436_L = Literal

2437ParserElement._literalStringClass = Literal

2438

2439class Keyword(Token):

2440 """

2441 Token to exactly match a specified string as a keyword, that is, it must be

2442 immediately followed by a non-keyword character. Compare with C{L{Literal}}:

2443 - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.

2444 - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}

2445 Accepts two optional constructor arguments in addition to the keyword string:

2446 - C{identChars} is a string of characters that would be valid identifier characters,

2447 defaulting to all alphanumerics + "_" and "$"

2448 - C{caseless} allows case-insensitive matching, default is C{False}.

2449

2450 Example::

2451 Keyword("start").parseString("start") # -> ['start']

2452 Keyword("start").parseString("starting") # -> Exception

2453

2454 For case-insensitive matching, use L{CaselessKeyword}.

2455 """

2456 DEFAULT_KEYWORD_CHARS = alphanums+"_$"

2457

2458 def __init__( self, matchString, identChars=None, caseless=False ):

2459 super(Keyword,self).__init__()

2460 if identChars is None:

2461 identChars = Keyword.DEFAULT_KEYWORD_CHARS

2462 self.match = matchString

2463 self.matchLen = len(matchString)

2464 try:

2465 self.firstMatchChar = matchString[0]

2466 except IndexError:

2467 warnings.warn("null string passed to Keyword; use Empty() instead",

2468 SyntaxWarning, stacklevel=2)

2469 self.name = '"%s"' % self.match

2470 self.errmsg = "Expected " + self.name

2471 self.mayReturnEmpty = False

2472 self.mayIndexError = False

2473 self.caseless = caseless

2474 if caseless:

2475 self.caselessmatch = matchString.upper()

2476 identChars = identChars.upper()

2477 self.identChars = set(identChars)

2478

2479 def parseImpl( self, instring, loc, doActions=True ):

2480 if self.caseless:

2481 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and

2482 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and

2483 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):

2484 return loc+self.matchLen, self.match

2485 else:

2486 if (instring[loc] == self.firstMatchChar and

2487 (self.matchLen==1 or instring.startswith(self.match,loc)) and

2488 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and

2489 (loc == 0 or instring[loc-1] not in self.identChars) ):

2490 return loc+self.matchLen, self.match

2491 raise ParseException(instring, loc, self.errmsg, self)

2492

2493 def copy(self):

2494 c = super(Keyword,self).copy()

2495 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS

2496 return c

2497

2498 @staticmethod

2499 def setDefaultKeywordChars( chars ):

2500 """Overrides the default Keyword chars

2501 """

2502 Keyword.DEFAULT_KEYWORD_CHARS = chars

2503

2504class CaselessLiteral(Literal):

2505 """

2506 Token to match a specified string, ignoring case of letters.

2507 Note: the matched results will always be in the case of the given

2508 match string, NOT the case of the input text.

2509

2510 Example::

2511 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']

2512

2513 (Contrast with example for L{CaselessKeyword}.)

2514 """

2515 def __init__( self, matchString ):

2516 super(CaselessLiteral,self).__init__( matchString.upper() )

2517 # Preserve the defining literal.

2518 self.returnString = matchString

2519 self.name = "'%s'" % self.returnString

2520 self.errmsg = "Expected " + self.name

2521

2522 def parseImpl( self, instring, loc, doActions=True ):

2523 if instring[ loc:loc+self.matchLen ].upper() == self.match:

2524 return loc+self.matchLen, self.returnString

2525 raise ParseException(instring, loc, self.errmsg, self)

2526

2527class CaselessKeyword(Keyword):

2528 """

2529 Caseless version of L{Keyword}.

2530

2531 Example::

2532 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']

2533

2534 (Contrast with example for L{CaselessLiteral}.)

2535 """

2536 def __init__( self, matchString, identChars=None ):

2537 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )

2538

2539 def parseImpl( self, instring, loc, doActions=True ):

2540 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and

2541 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):

2542 return loc+self.matchLen, self.match

2543 raise ParseException(instring, loc, self.errmsg, self)

2544

2545class CloseMatch(Token):

2546 """

2547 A variation on L{Literal} which matches "close" matches, that is,

2548 strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters:

2549 - C{match_string} - string to be matched

2550 - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match

2551

2552 The results from a successful parse will contain the matched text from the input string and the following named results:

2553 - C{mismatches} - a list of the positions within the match_string where mismatches were found

2554 - C{original} - the original match_string used to compare against the input string

2555

2556 If C{mismatches} is an empty list, then the match was an exact match.

2557

2558 Example::

2559 patt = CloseMatch("ATCATCGAATGGA")

2560 patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})

2561 patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)

2562

2563 # exact match

2564 patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})

2565

2566 # close match allowing up to 2 mismatches

2567 patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)

2568 patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})

2569 """

2570 def __init__(self, match_string, maxMismatches=1):

2571 super(CloseMatch,self).__init__()

2572 self.name = match_string

2573 self.match_string = match_string

2574 self.maxMismatches = maxMismatches

2575 self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)

2576 self.mayIndexError = False

2577 self.mayReturnEmpty = False

2578

2579 def parseImpl( self, instring, loc, doActions=True ):

2580 start = loc

2581 instrlen = len(instring)

2582 maxloc = start + len(self.match_string)

2583

2584 if maxloc <= instrlen:

2585 match_string = self.match_string

2586 match_stringloc = 0

2587 mismatches = []

2588 maxMismatches = self.maxMismatches

2589

2590 for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):

2591 src,mat = s_m

2592 if src != mat:

2593 mismatches.append(match_stringloc)

2594 if len(mismatches) > maxMismatches:

2595 break

2596 else:

2597 loc = match_stringloc + 1

2598 results = ParseResults([instring[start:loc]])

2599 results['original'] = self.match_string

2600 results['mismatches'] = mismatches

2601 return loc, results

2602

2603 raise ParseException(instring, loc, self.errmsg, self)

2604

2605

2606class Word(Token):

2607 """

2608 Token for matching words composed of allowed character sets.

2609 Defined with string containing all allowed initial characters,

2610 an optional string containing allowed body characters (if omitted,

2611 defaults to the initial character set), and an optional minimum,

2612 maximum, and/or exact length. The default value for C{min} is 1 (a

2613 minimum value < 1 is not valid); the default values for C{max} and C{exact}

2614 are 0, meaning no maximum or exact length restriction. An optional

2615 C{excludeChars} parameter can list characters that might be found in

2616 the input C{bodyChars} string; useful to define a word of all printables

2617 except for one or two characters, for instance.

2618

2619 L{srange} is useful for defining custom character set strings for defining

2620 C{Word} expressions, using range notation from regular expression character sets.

2621

2622 A common mistake is to use C{Word} to match a specific literal string, as in

2623 C{Word("Address")}. Remember that C{Word} uses the string argument to define

2624 I{sets} of matchable characters. This expression would match "Add", "AAA",

2625 "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.

2626 To match an exact literal string, use L{Literal} or L{Keyword}.

2627

2628 pyparsing includes helper strings for building Words:

2629 - L{alphas}

2630 - L{nums}

2631 - L{alphanums}

2632 - L{hexnums}

2633 - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.)

2634 - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)

2635 - L{printables} (any non-whitespace character)

2636

2637 Example::

2638 # a word composed of digits

2639 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))

2640

2641 # a word with a leading capital, and zero or more lowercase

2642 capital_word = Word(alphas.upper(), alphas.lower())

2643

2644 # hostnames are alphanumeric, with leading alpha, and '-'

2645 hostname = Word(alphas, alphanums+'-')

2646

2647 # roman numeral (not a strict parser, accepts invalid mix of characters)

2648 roman = Word("IVXLCDM")

2649

2650 # any string of non-whitespace characters, except for ','

2651 csv_value = Word(printables, excludeChars=",")

2652 """

2653 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):

2654 super(Word,self).__init__()

2655 if excludeChars:

2656 initChars = ''.join(c for c in initChars if c not in excludeChars)

2657 if bodyChars: 2657 ↛ 2658line 2657 didn't jump to line 2658, because the condition on line 2657 was never true

2658 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)

2659 self.initCharsOrig = initChars

2660 self.initChars = set(initChars)

2661 if bodyChars :

2662 self.bodyCharsOrig = bodyChars

2663 self.bodyChars = set(bodyChars)

2664 else:

2665 self.bodyCharsOrig = initChars

2666 self.bodyChars = set(initChars)

2667

2668 self.maxSpecified = max > 0

2669

2670 if min < 1: 2670 ↛ 2671line 2670 didn't jump to line 2671, because the condition on line 2670 was never true

2671 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")

2672

2673 self.minLen = min

2674

2675 if max > 0: 2675 ↛ 2676line 2675 didn't jump to line 2676, because the condition on line 2675 was never true

2676 self.maxLen = max

2677 else:

2678 self.maxLen = _MAX_INT

2679

2680 if exact > 0:

2681 self.maxLen = exact

2682 self.minLen = exact

2683

2684 self.name = _ustr(self)

2685 self.errmsg = "Expected " + self.name

2686 self.mayIndexError = False

2687 self.asKeyword = asKeyword

2688

2689 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):

2690 if self.bodyCharsOrig == self.initCharsOrig:

2691 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)

2692 elif len(self.initCharsOrig) == 1: 2692 ↛ 2693line 2692 didn't jump to line 2693, because the condition on line 2692 was never true

2693 self.reString = "%s[%s]*" % \

2694 (re.escape(self.initCharsOrig),

2695 _escapeRegexRangeChars(self.bodyCharsOrig),)

2696 else:

2697 self.reString = "[%s][%s]*" % \

2698 (_escapeRegexRangeChars(self.initCharsOrig),

2699 _escapeRegexRangeChars(self.bodyCharsOrig),)

2700 if self.asKeyword: 2700 ↛ 2701line 2700 didn't jump to line 2701, because the condition on line 2700 was never true

2701 self.reString = r"\b"+self.reString+r"\b"

2702 try:

2703 self.re = re.compile( self.reString )

2704 except Exception:

2705 self.re = None

2706

2707 def parseImpl( self, instring, loc, doActions=True ):

2708 if self.re:

2709 result = self.re.match(instring,loc)

2710 if not result:

2711 raise ParseException(instring, loc, self.errmsg, self)

2712

2713 loc = result.end()

2714 return loc, result.group()

2715

2716 if not(instring[ loc ] in self.initChars):

2717 raise ParseException(instring, loc, self.errmsg, self)

2718

2719 start = loc

2720 loc += 1

2721 instrlen = len(instring)

2722 bodychars = self.bodyChars

2723 maxloc = start + self.maxLen

2724 maxloc = min( maxloc, instrlen )

2725 while loc < maxloc and instring[loc] in bodychars: 2725 ↛ 2726line 2725 didn't jump to line 2726, because the condition on line 2725 was never true

2726 loc += 1

2727

2728 throwException = False

2729 if loc - start < self.minLen: 2729 ↛ 2731line 2729 didn't jump to line 2731, because the condition on line 2729 was never false

2730 throwException = True

2731 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 2731 ↛ 2732line 2731 didn't jump to line 2732, because the condition on line 2731 was never true

2732 throwException = True

2733 if self.asKeyword: 2733 ↛ 2734line 2733 didn't jump to line 2734, because the condition on line 2733 was never true

2734 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):

2735 throwException = True

2736

2737 if throwException: 2737 ↛ 2740line 2737 didn't jump to line 2740, because the condition on line 2737 was never false

2738 raise ParseException(instring, loc, self.errmsg, self)

2739

2740 return loc, instring[start:loc]

2741

2742 def __str__( self ):

2743 try:

2744 return super(Word,self).__str__()

2745 except Exception:

2746 pass

2747

2748

2749 if self.strRepr is None:

2750

2751 def charsAsStr(s):

2752 if len(s)>4:

2753 return s[:4]+"..."

2754 else:

2755 return s

2756

2757 if ( self.initCharsOrig != self.bodyCharsOrig ):

2758 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )

2759 else:

2760 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)

2761

2762 return self.strRepr

2763

2764

2765class Regex(Token):

2766 r"""

2767 Token for matching strings that match a given regular expression.

2768 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.

2769 If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as

2770 named parse results.

2771

2772 Example::

2773 realnum = Regex(r"[+-]?\d+\.\d*")

2774 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')

2775 # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression

2776 roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")

2777 """

2778 compiledREtype = type(re.compile("[A-Z]"))

2779 def __init__( self, pattern, flags=0):

2780 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""

2781 super(Regex,self).__init__()

2782

2783 if isinstance(pattern, basestring): 2783 ↛ 2799line 2783 didn't jump to line 2799, because the condition on line 2783 was never false

2784 if not pattern: 2784 ↛ 2785line 2784 didn't jump to line 2785, because the condition on line 2784 was never true

2785 warnings.warn("null string passed to Regex; use Empty() instead",

2786 SyntaxWarning, stacklevel=2)

2787

2788 self.pattern = pattern

2789 self.flags = flags

2790

2791 try:

2792 self.re = re.compile(self.pattern, self.flags)

2793 self.reString = self.pattern

2794 except sre_constants.error:

2795 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,

2796 SyntaxWarning, stacklevel=2)

2797 raise

2798

2799 elif isinstance(pattern, Regex.compiledREtype):

2800 self.re = pattern

2801 self.pattern = \

2802 self.reString = str(pattern)

2803 self.flags = flags

2804

2805 else:

2806 raise ValueError("Regex may only be constructed with a string or a compiled RE object")

2807

2808 self.name = _ustr(self)

2809 self.errmsg = "Expected " + self.name

2810 self.mayIndexError = False

2811 self.mayReturnEmpty = True

2812

2813 def parseImpl( self, instring, loc, doActions=True ):

2814 result = self.re.match(instring,loc)

2815 if not result:

2816 raise ParseException(instring, loc, self.errmsg, self)

2817

2818 loc = result.end()

2819 d = result.groupdict()

2820 ret = ParseResults(result.group())

2821 if d:

2822 for k in d:

2823 ret[k] = d[k]

2824 return loc,ret

2825

2826 def __str__( self ):

2827 try:

2828 return super(Regex,self).__str__()

2829 except Exception:

2830 pass

2831

2832 if self.strRepr is None:

2833 self.strRepr = "Re:(%s)" % repr(self.pattern)

2834

2835 return self.strRepr

2836

2837

2838class QuotedString(Token):

2839 r"""

2840 Token for matching strings that are delimited by quoting characters.

2841

2842 Defined with the following parameters:

2843 - quoteChar - string of one or more characters defining the quote delimiting string

2844 - escChar - character to escape quotes, typically backslash (default=C{None})

2845 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None})

2846 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})

2847 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})

2848 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)

2849 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})

2850

2851 Example::

2852 qs = QuotedString('"')

2853 print(qs.searchString('lsjdf "This is the quote" sldjf'))

2854 complex_qs = QuotedString('{{', endQuoteChar='}}')

2855 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))

2856 sql_qs = QuotedString('"', escQuote='""')

2857 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))

2858 prints::

2859 [['This is the quote']]

2860 [['This is the "quote"']]

2861 [['This is the quote with "embedded" quotes']]

2862 """

2863 def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):

2864 super(QuotedString,self).__init__()

2865

2866 # remove white space from quote chars - wont work anyway

2867 quoteChar = quoteChar.strip()

2868 if not quoteChar: 2868 ↛ 2869line 2868 didn't jump to line 2869, because the condition on line 2868 was never true

2869 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)

2870 raise SyntaxError()

2871

2872 if endQuoteChar is None: 2872 ↛ 2875line 2872 didn't jump to line 2875, because the condition on line 2872 was never false

2873 endQuoteChar = quoteChar

2874 else:

2875 endQuoteChar = endQuoteChar.strip()

2876 if not endQuoteChar:

2877 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)

2878 raise SyntaxError()

2879

2880 self.quoteChar = quoteChar

2881 self.quoteCharLen = len(quoteChar)

2882 self.firstQuoteChar = quoteChar[0]

2883 self.endQuoteChar = endQuoteChar

2884 self.endQuoteCharLen = len(endQuoteChar)

2885 self.escChar = escChar

2886 self.escQuote = escQuote

2887 self.unquoteResults = unquoteResults

2888 self.convertWhitespaceEscapes = convertWhitespaceEscapes

2889

2890 if multiline: 2890 ↛ 2891line 2890 didn't jump to line 2891, because the condition on line 2890 was never true

2891 self.flags = re.MULTILINE | re.DOTALL

2892 self.pattern = r'%s(?:[^%s%s]' % \

2893 ( re.escape(self.quoteChar),

2894 _escapeRegexRangeChars(self.endQuoteChar[0]),

2895 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )

2896 else:

2897 self.flags = 0

2898 self.pattern = r'%s(?:[^%s\n\r%s]' % \

2899 ( re.escape(self.quoteChar),

2900 _escapeRegexRangeChars(self.endQuoteChar[0]),

2901 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )

2902 if len(self.endQuoteChar) > 1: 2902 ↛ 2903line 2902 didn't jump to line 2903, because the condition on line 2902 was never true

2903 self.pattern += (

2904 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),

2905 _escapeRegexRangeChars(self.endQuoteChar[i]))

2906 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'

2907 )

2908 if escQuote: 2908 ↛ 2909line 2908 didn't jump to line 2909, because the condition on line 2908 was never true

2909 self.pattern += (r'|(?:%s)' % re.escape(escQuote))

2910 if escChar: 2910 ↛ 2911line 2910 didn't jump to line 2911, because the condition on line 2910 was never true

2911 self.pattern += (r'|(?:%s.)' % re.escape(escChar))

2912 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"

2913 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))

2914

2915 try:

2916 self.re = re.compile(self.pattern, self.flags)

2917 self.reString = self.pattern

2918 except sre_constants.error:

2919 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,

2920 SyntaxWarning, stacklevel=2)

2921 raise

2922

2923 self.name = _ustr(self)

2924 self.errmsg = "Expected " + self.name

2925 self.mayIndexError = False

2926 self.mayReturnEmpty = True

2927

2928 def parseImpl( self, instring, loc, doActions=True ):

2929 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None

2930 if not result:

2931 raise ParseException(instring, loc, self.errmsg, self)

2932

2933 loc = result.end()

2934 ret = result.group()

2935

2936 if self.unquoteResults: 2936 ↛ 2961line 2936 didn't jump to line 2961, because the condition on line 2936 was never false

2937

2938 # strip off quotes

2939 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]

2940

2941 if isinstance(ret,basestring): 2941 ↛ 2961line 2941 didn't jump to line 2961, because the condition on line 2941 was never false

2942 # replace escaped whitespace

2943 if '\\' in ret and self.convertWhitespaceEscapes: 2943 ↛ 2944line 2943 didn't jump to line 2944

2944 ws_map = {

2945 r'\t' : '\t',

2946 r'\n' : '\n',

2947 r'\f' : '\f',

2948 r'\r' : '\r',

2949 }

2950 for wslit,wschar in ws_map.items():

2951 ret = ret.replace(wslit, wschar)

2952

2953 # replace escaped characters

2954 if self.escChar: 2954 ↛ 2955line 2954 didn't jump to line 2955, because the condition on line 2954 was never true

2955 ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)

2956

2957 # replace escaped quotes

2958 if self.escQuote: 2958 ↛ 2959line 2958 didn't jump to line 2959, because the condition on line 2958 was never true

2959 ret = ret.replace(self.escQuote, self.endQuoteChar)

2960

2961 return loc, ret

2962

2963 def __str__( self ):

2964 try:

2965 return super(QuotedString,self).__str__()

2966 except Exception:

2967 pass

2968

2969 if self.strRepr is None:

2970 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)

2971

2972 return self.strRepr

2973

2974

2975class CharsNotIn(Token):

2976 """

2977 Token for matching words composed of characters I{not} in a given set (will

2978 include whitespace in matched characters if not listed in the provided exclusion set - see example).

2979 Defined with string containing all disallowed characters, and an optional

2980 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a

2981 minimum value < 1 is not valid); the default values for C{max} and C{exact}

2982 are 0, meaning no maximum or exact length restriction.

2983

2984 Example::

2985 # define a comma-separated-value as anything that is not a ','

2986 csv_value = CharsNotIn(',')

2987 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))

2988 prints::

2989 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']

2990 """

2991 def __init__( self, notChars, min=1, max=0, exact=0 ):

2992 super(CharsNotIn,self).__init__()

2993 self.skipWhitespace = False

2994 self.notChars = notChars

2995

2996 if min < 1: 2996 ↛ 2997line 2996 didn't jump to line 2997, because the condition on line 2996 was never true

2997 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")

2998

2999 self.minLen = min

3000

3001 if max > 0: 3001 ↛ 3002line 3001 didn't jump to line 3002, because the condition on line 3001 was never true

3002 self.maxLen = max

3003 else:

3004 self.maxLen = _MAX_INT

3005

3006 if exact > 0: 3006 ↛ 3010line 3006 didn't jump to line 3010, because the condition on line 3006 was never false

3007 self.maxLen = exact

3008 self.minLen = exact

3009

3010 self.name = _ustr(self)

3011 self.errmsg = "Expected " + self.name

3012 self.mayReturnEmpty = ( self.minLen == 0 )

3013 self.mayIndexError = False

3014

3015 def parseImpl( self, instring, loc, doActions=True ):

3016 if instring[loc] in self.notChars: 3016 ↛ 3019line 3016 didn't jump to line 3019, because the condition on line 3016 was never false

3017 raise ParseException(instring, loc, self.errmsg, self)

3018

3019 start = loc

3020 loc += 1

3021 notchars = self.notChars

3022 maxlen = min( start+self.maxLen, len(instring) )

3023 while loc < maxlen and \

3024 (instring[loc] not in notchars):

3025 loc += 1

3026

3027 if loc - start < self.minLen:

3028 raise ParseException(instring, loc, self.errmsg, self)

3029

3030 return loc, instring[start:loc]

3031

3032 def __str__( self ):

3033 try:

3034 return super(CharsNotIn, self).__str__()

3035 except Exception:

3036 pass

3037

3038 if self.strRepr is None:

3039 if len(self.notChars) > 4:

3040 self.strRepr = "!W:(%s...)" % self.notChars[:4]

3041 else:

3042 self.strRepr = "!W:(%s)" % self.notChars

3043

3044 return self.strRepr

3045

3046class White(Token):

3047 """

3048 Special matching class for matching whitespace. Normally, whitespace is ignored

3049 by pyparsing grammars. This class is included when some whitespace structures

3050 are significant. Define with a string containing the whitespace characters to be

3051 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,

3052 as defined for the C{L{Word}} class.

3053 """

3054 whiteStrs = {

3055 " " : "<SPC>",

3056 "\t": "<TAB>",

3057 "\n": "<LF>",

3058 "\r": "<CR>",

3059 "\f": "<FF>",

3060 }

3061 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):

3062 super(White,self).__init__()

3063 self.matchWhite = ws

3064 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )

3065 #~ self.leaveWhitespace()

3066 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))

3067 self.mayReturnEmpty = True

3068 self.errmsg = "Expected " + self.name

3069

3070 self.minLen = min

3071

3072 if max > 0: 3072 ↛ 3073line 3072 didn't jump to line 3073, because the condition on line 3072 was never true

3073 self.maxLen = max

3074 else:

3075 self.maxLen = _MAX_INT

3076

3077 if exact > 0: 3077 ↛ 3078line 3077 didn't jump to line 3078, because the condition on line 3077 was never true

3078 self.maxLen = exact

3079 self.minLen = exact

3080

3081 def parseImpl( self, instring, loc, doActions=True ):

3082 if not(instring[ loc ] in self.matchWhite):

3083 raise ParseException(instring, loc, self.errmsg, self)

3084 start = loc

3085 loc += 1

3086 maxloc = start + self.maxLen

3087 maxloc = min( maxloc, len(instring) )

3088 while loc < maxloc and instring[loc] in self.matchWhite:

3089 loc += 1

3090

3091 if loc - start < self.minLen:

3092 raise ParseException(instring, loc, self.errmsg, self)

3093

3094 return loc, instring[start:loc]

3095

3096

3097class _PositionToken(Token):

3098 def __init__( self ):

3099 super(_PositionToken,self).__init__()

3100 self.name=self.__class__.__name__

3101 self.mayReturnEmpty = True

3102 self.mayIndexError = False

3103

3104class GoToColumn(_PositionToken):

3105 """

3106 Token to advance to a specific column of input text; useful for tabular report scraping.

3107 """

3108 def __init__( self, colno ):

3109 super(GoToColumn,self).__init__()

3110 self.col = colno

3111

3112 def preParse( self, instring, loc ):

3113 if col(loc,instring) != self.col:

3114 instrlen = len(instring)

3115 if self.ignoreExprs:

3116 loc = self._skipIgnorables( instring, loc )

3117 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :

3118 loc += 1

3119 return loc

3120

3121 def parseImpl( self, instring, loc, doActions=True ):

3122 thiscol = col( loc, instring )

3123 if thiscol > self.col:

3124 raise ParseException( instring, loc, "Text not in expected column", self )

3125 newloc = loc + self.col - thiscol

3126 ret = instring[ loc: newloc ]

3127 return newloc, ret

3128

3129

3130class LineStart(_PositionToken):

3131 """

3132 Matches if current position is at the beginning of a line within the parse string

3133

3134 Example::

3135

3136 test = '''\

3137 AAA this line

3138 AAA and this line

3139 AAA but not this one

3140 B AAA and definitely not this one

3141 '''

3142

3143 for t in (LineStart() + 'AAA' + restOfLine).searchString(test):

3144 print(t)

3145

3146 Prints::

3147 ['AAA', ' this line']

3148 ['AAA', ' and this line']

3149

3150 """

3151 def __init__( self ):

3152 super(LineStart,self).__init__()

3153 self.errmsg = "Expected start of line"

3154

3155 def parseImpl( self, instring, loc, doActions=True ):

3156 if col(loc, instring) == 1:

3157 return loc, []

3158 raise ParseException(instring, loc, self.errmsg, self)

3159

3160class LineEnd(_PositionToken):

3161 """

3162 Matches if current position is at the end of a line within the parse string

3163 """

3164 def __init__( self ):

3165 super(LineEnd,self).__init__()

3166 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )

3167 self.errmsg = "Expected end of line"

3168

3169 def parseImpl( self, instring, loc, doActions=True ):

3170 if loc<len(instring):

3171 if instring[loc] == "\n":

3172 return loc+1, "\n"

3173 else:

3174 raise ParseException(instring, loc, self.errmsg, self)

3175 elif loc == len(instring):

3176 return loc+1, []

3177 else:

3178 raise ParseException(instring, loc, self.errmsg, self)

3179

3180class StringStart(_PositionToken):

3181 """

3182 Matches if current position is at the beginning of the parse string

3183 """

3184 def __init__( self ):

3185 super(StringStart,self).__init__()

3186 self.errmsg = "Expected start of text"

3187

3188 def parseImpl( self, instring, loc, doActions=True ):

3189 if loc != 0: 3189 ↛ 3191line 3189 didn't jump to line 3191, because the condition on line 3189 was never true

3190 # see if entire string up to here is just whitespace and ignoreables

3191 if loc != self.preParse( instring, 0 ):

3192 raise ParseException(instring, loc, self.errmsg, self)

3193 return loc, []

3194

3195class StringEnd(_PositionToken):

3196 """

3197 Matches if current position is at the end of the parse string

3198 """

3199 def __init__( self ):

3200 super(StringEnd,self).__init__()

3201 self.errmsg = "Expected end of text"

3202

3203 def parseImpl( self, instring, loc, doActions=True ):

3204 if loc < len(instring): 3204 ↛ 3205line 3204 didn't jump to line 3205, because the condition on line 3204 was never true

3205 raise ParseException(instring, loc, self.errmsg, self)

3206 elif loc == len(instring): 3206 ↛ 3208line 3206 didn't jump to line 3208, because the condition on line 3206 was never false

3207 return loc+1, []

3208 elif loc > len(instring):

3209 return loc, []

3210 else:

3211 raise ParseException(instring, loc, self.errmsg, self)

3212

3213class WordStart(_PositionToken):

3214 """

3215 Matches if the current position is at the beginning of a Word, and

3216 is not preceded by any character in a given set of C{wordChars}

3217 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,

3218 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of

3219 the string being parsed, or at the beginning of a line.

3220 """

3221 def __init__(self, wordChars = printables):

3222 super(WordStart,self).__init__()

3223 self.wordChars = set(wordChars)

3224 self.errmsg = "Not at the start of a word"

3225

3226 def parseImpl(self, instring, loc, doActions=True ):

3227 if loc != 0:

3228 if (instring[loc-1] in self.wordChars or

3229 instring[loc] not in self.wordChars):

3230 raise ParseException(instring, loc, self.errmsg, self)

3231 return loc, []

3232

3233class WordEnd(_PositionToken):

3234 """

3235 Matches if the current position is at the end of a Word, and

3236 is not followed by any character in a given set of C{wordChars}

3237 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,

3238 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of

3239 the string being parsed, or at the end of a line.

3240 """

3241 def __init__(self, wordChars = printables):

3242 super(WordEnd,self).__init__()

3243 self.wordChars = set(wordChars)

3244 self.skipWhitespace = False

3245 self.errmsg = "Not at the end of a word"

3246

3247 def parseImpl(self, instring, loc, doActions=True ):

3248 instrlen = len(instring)

3249 if instrlen>0 and loc<instrlen:

3250 if (instring[loc] in self.wordChars or

3251 instring[loc-1] not in self.wordChars):

3252 raise ParseException(instring, loc, self.errmsg, self)

3253 return loc, []

3254

3255

3256class ParseExpression(ParserElement):

3257 """

3258 Abstract subclass of ParserElement, for combining and post-processing parsed tokens.

3259 """

3260 def __init__( self, exprs, savelist = False ):

3261 super(ParseExpression,self).__init__(savelist)

3262 if isinstance( exprs, _generatorType ): 3262 ↛ 3263line 3262 didn't jump to line 3263, because the condition on line 3262 was never true

3263 exprs = list(exprs)

3264

3265 if isinstance( exprs, basestring ): 3265 ↛ 3266line 3265 didn't jump to line 3266, because the condition on line 3265 was never true

3266 self.exprs = [ ParserElement._literalStringClass( exprs ) ]

3267 elif isinstance( exprs, Iterable ): 3267 ↛ 3274line 3267 didn't jump to line 3274, because the condition on line 3267 was never false

3268 exprs = list(exprs)

3269 # if sequence of strings provided, wrap with Literal

3270 if all(isinstance(expr, basestring) for expr in exprs): 3270 ↛ exit, 3270 ↛ 32712 missed branches: 1) line 3270 didn't finish the generator expression on line 3270, 2) line 3270 didn't jump to line 3271, because the condition on line 3270 was never true

3271 exprs = map(ParserElement._literalStringClass, exprs)

3272 self.exprs = list(exprs)

3273 else:

3274 try:

3275 self.exprs = list( exprs )

3276 except TypeError:

3277 self.exprs = [ exprs ]

3278 self.callPreparse = False

3279

3280 def __getitem__( self, i ):

3281 return self.exprs[i]

3282

3283 def append( self, other ):

3284 self.exprs.append( other )

3285 self.strRepr = None

3286 return self

3287

3288 def leaveWhitespace( self ):

3289 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on

3290 all contained expressions."""

3291 self.skipWhitespace = False

3292 self.exprs = [ e.copy() for e in self.exprs ]

3293 for e in self.exprs:

3294 e.leaveWhitespace()

3295 return self

3296

3297 def ignore( self, other ):

3298 if isinstance( other, Suppress ):

3299 if other not in self.ignoreExprs:

3300 super( ParseExpression, self).ignore( other )

3301 for e in self.exprs:

3302 e.ignore( self.ignoreExprs[-1] )

3303 else:

3304 super( ParseExpression, self).ignore( other )

3305 for e in self.exprs:

3306 e.ignore( self.ignoreExprs[-1] )

3307 return self

3308

3309 def __str__( self ):

3310 try:

3311 return super(ParseExpression,self).__str__()

3312 except Exception:

3313 pass

3314

3315 if self.strRepr is None:

3316 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )

3317 return self.strRepr

3318

3319 def streamline( self ):

3320 super(ParseExpression,self).streamline()

3321

3322 for e in self.exprs:

3323 e.streamline()

3324

3325 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )

3326 # but only if there are no parse actions or resultsNames on the nested And's

3327 # (likewise for Or's and MatchFirst's)

3328 if ( len(self.exprs) == 2 ):

3329 other = self.exprs[0]

3330 if ( isinstance( other, self.__class__ ) and

3331 not(other.parseAction) and

3332 other.resultsName is None and

3333 not other.debug ):

3334 self.exprs = other.exprs[:] + [ self.exprs[1] ]

3335 self.strRepr = None

3336 self.mayReturnEmpty |= other.mayReturnEmpty

3337 self.mayIndexError |= other.mayIndexError

3338

3339 other = self.exprs[-1]

3340 if ( isinstance( other, self.__class__ ) and

3341 not(other.parseAction) and

3342 other.resultsName is None and

3343 not other.debug ):

3344 self.exprs = self.exprs[:-1] + other.exprs[:]

3345 self.strRepr = None

3346 self.mayReturnEmpty |= other.mayReturnEmpty

3347 self.mayIndexError |= other.mayIndexError

3348

3349 self.errmsg = "Expected " + _ustr(self)

3350

3351 return self

3352

3353 def setResultsName( self, name, listAllMatches=False ):

3354 ret = super(ParseExpression,self).setResultsName(name,listAllMatches)

3355 return ret

3356

3357 def validate( self, validateTrace=[] ):

3358 tmp = validateTrace[:]+[self]

3359 for e in self.exprs:

3360 e.validate(tmp)

3361 self.checkRecursion( [] )

3362

3363 def copy(self):

3364 ret = super(ParseExpression,self).copy()

3365 ret.exprs = [e.copy() for e in self.exprs]

3366 return ret

3367

3368class And(ParseExpression):

3369 """

3370 Requires all given C{ParseExpression}s to be found in the given order.

3371 Expressions may be separated by whitespace.

3372 May be constructed using the C{'+'} operator.

3373 May also be constructed using the C{'-'} operator, which will suppress backtracking.

3374

3375 Example::

3376 integer = Word(nums)

3377 name_expr = OneOrMore(Word(alphas))

3378

3379 expr = And([integer("id"),name_expr("name"),integer("age")])

3380 # more easily written as:

3381 expr = integer("id") + name_expr("name") + integer("age")

3382 """

3383

3384 class _ErrorStop(Empty):

3385 def __init__(self, *args, **kwargs):

3386 super(And._ErrorStop,self).__init__(*args, **kwargs)

3387 self.name = '-'

3388 self.leaveWhitespace()

3389

3390 def __init__( self, exprs, savelist = True ):

3391 super(And,self).__init__(exprs, savelist)

3392 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

3393 self.setWhitespaceChars( self.exprs[0].whiteChars )

3394 self.skipWhitespace = self.exprs[0].skipWhitespace

3395 self.callPreparse = True

3396

3397 def parseImpl( self, instring, loc, doActions=True ):

3398 # pass False as last arg to _parse for first element, since we already

3399 # pre-parsed the string as part of our And pre-parsing

3400 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )

3401 errorStop = False

3402 for e in self.exprs[1:]:

3403 if isinstance(e, And._ErrorStop): 3403 ↛ 3404line 3403 didn't jump to line 3404, because the condition on line 3403 was never true

3404 errorStop = True

3405 continue

3406 if errorStop: 3406 ↛ 3407line 3406 didn't jump to line 3407, because the condition on line 3406 was never true

3407 try:

3408 loc, exprtokens = e._parse( instring, loc, doActions )

3409 except ParseSyntaxException:

3410 raise

3411 except ParseBaseException as pe:

3412 pe.__traceback__ = None

3413 raise ParseSyntaxException._from_exception(pe)

3414 except IndexError:

3415 raise ParseSyntaxException(instring, len(instring), self.errmsg, self)

3416 else:

3417 loc, exprtokens = e._parse( instring, loc, doActions )

3418 if exprtokens or exprtokens.haskeys():

3419 resultlist += exprtokens

3420 return loc, resultlist

3421

3422 def __iadd__(self, other ):

3423 if isinstance( other, basestring ):

3424 other = ParserElement._literalStringClass( other )

3425 return self.append( other ) #And( [ self, other ] )

3426

3427 def checkRecursion( self, parseElementList ):

3428 subRecCheckList = parseElementList[:] + [ self ]

3429 for e in self.exprs:

3430 e.checkRecursion( subRecCheckList )

3431 if not e.mayReturnEmpty:

3432 break

3433

3434 def __str__( self ):

3435 if hasattr(self,"name"):

3436 return self.name

3437

3438 if self.strRepr is None:

3439 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"

3440

3441 return self.strRepr

3442

3443

3444class Or(ParseExpression):

3445 """

3446 Requires that at least one C{ParseExpression} is found.

3447 If two expressions match, the expression that matches the longest string will be used.

3448 May be constructed using the C{'^'} operator.

3449

3450 Example::

3451 # construct Or using '^' operator

3452

3453 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))

3454 print(number.searchString("123 3.1416 789"))

3455 prints::

3456 [['123'], ['3.1416'], ['789']]

3457 """

3458 def __init__( self, exprs, savelist = False ):

3459 super(Or,self).__init__(exprs, savelist)

3460 if self.exprs: 3460 ↛ 3463line 3460 didn't jump to line 3463, because the condition on line 3460 was never false

3461 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 3461 ↛ exitline 3461 didn't finish the generator expression on line 3461

3462 else:

3463 self.mayReturnEmpty = True

3464

3465 def parseImpl( self, instring, loc, doActions=True ):

3466 maxExcLoc = -1

3467 maxException = None

3468 matches = []

3469 for e in self.exprs:

3470 try:

3471 loc2 = e.tryParse( instring, loc )

3472 except ParseException as err: 3472 ↛ 3477line 3472 didn't jump to line 3477

3473 err.__traceback__ = None

3474 if err.loc > maxExcLoc:

3475 maxException = err

3476 maxExcLoc = err.loc

3477 except IndexError:

3478 if len(instring) > maxExcLoc:

3479 maxException = ParseException(instring,len(instring),e.errmsg,self)

3480 maxExcLoc = len(instring)

3481 else:

3482 # save match among all matches, to retry longest to shortest

3483 matches.append((loc2, e))

3484

3485 if matches:

3486 matches.sort(key=lambda x: -x[0])

3487 for _,e in matches: 3487 ↛ 3496line 3487 didn't jump to line 3496, because the loop on line 3487 didn't complete

3488 try:

3489 return e._parse( instring, loc, doActions )

3490 except ParseException as err:

3491 err.__traceback__ = None

3492 if err.loc > maxExcLoc:

3493 maxException = err

3494 maxExcLoc = err.loc

3495

3496 if maxException is not None: 3496 ↛ 3500line 3496 didn't jump to line 3500, because the condition on line 3496 was never false

3497 maxException.msg = self.errmsg

3498 raise maxException

3499 else:

3500 raise ParseException(instring, loc, "no defined alternatives to match", self)

3501

3502

3503 def __ixor__(self, other ):

3504 if isinstance( other, basestring ):

3505 other = ParserElement._literalStringClass( other )

3506 return self.append( other ) #Or( [ self, other ] )

3507

3508 def __str__( self ):

3509 if hasattr(self,"name"):

3510 return self.name

3511

3512 if self.strRepr is None:

3513 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"

3514

3515 return self.strRepr

3516

3517 def checkRecursion( self, parseElementList ):

3518 subRecCheckList = parseElementList[:] + [ self ]

3519 for e in self.exprs:

3520 e.checkRecursion( subRecCheckList )

3521

3522

3523class MatchFirst(ParseExpression):

3524 """

3525 Requires that at least one C{ParseExpression} is found.

3526 If two expressions match, the first one listed is the one that will match.

3527 May be constructed using the C{'|'} operator.

3528

3529 Example::

3530 # construct MatchFirst using '|' operator

3531

3532 # watch the order of expressions to match

3533 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))

3534 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]

3535

3536 # put more selective expression first

3537 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)

3538 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]

3539 """

3540 def __init__( self, exprs, savelist = False ):

3541 super(MatchFirst,self).__init__(exprs, savelist)

3542 if self.exprs: 3542 ↛ 3545line 3542 didn't jump to line 3545, because the condition on line 3542 was never false

3543 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)

3544 else:

3545 self.mayReturnEmpty = True

3546

3547 def parseImpl( self, instring, loc, doActions=True ):

3548 maxExcLoc = -1

3549 maxException = None

3550 for e in self.exprs:

3551 try:

3552 ret = e._parse( instring, loc, doActions )

3553 return ret

3554 except ParseException as err: 3554 ↛ 3558line 3554 didn't jump to line 3558

3555 if err.loc > maxExcLoc:

3556 maxException = err

3557 maxExcLoc = err.loc

3558 except IndexError:

3559 if len(instring) > maxExcLoc:

3560 maxException = ParseException(instring,len(instring),e.errmsg,self)

3561 maxExcLoc = len(instring)

3562

3563 # only got here if no expression matched, raise exception for match that made it the furthest

3564 else:

3565 if maxException is not None: 3565 ↛ 3569line 3565 didn't jump to line 3569, because the condition on line 3565 was never false

3566 maxException.msg = self.errmsg

3567 raise maxException

3568 else:

3569 raise ParseException(instring, loc, "no defined alternatives to match", self)

3570

3571 def __ior__(self, other ):

3572 if isinstance( other, basestring ):

3573 other = ParserElement._literalStringClass( other )

3574 return self.append( other ) #MatchFirst( [ self, other ] )

3575

3576 def __str__( self ):

3577 if hasattr(self,"name"):

3578 return self.name

3579

3580 if self.strRepr is None:

3581 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"

3582

3583 return self.strRepr

3584

3585 def checkRecursion( self, parseElementList ):

3586 subRecCheckList = parseElementList[:] + [ self ]

3587 for e in self.exprs:

3588 e.checkRecursion( subRecCheckList )

3589

3590

3591class Each(ParseExpression):

3592 """

3593 Requires all given C{ParseExpression}s to be found, but in any order.

3594 Expressions may be separated by whitespace.

3595 May be constructed using the C{'&'} operator.

3596

3597 Example::

3598 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")

3599 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")

3600 integer = Word(nums)

3601 shape_attr = "shape:" + shape_type("shape")

3602 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")

3603 color_attr = "color:" + color("color")

3604 size_attr = "size:" + integer("size")

3605

3606 # use Each (using operator '&') to accept attributes in any order

3607 # (shape and posn are required, color and size are optional)

3608 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)

3609

3610 shape_spec.runTests('''

3611 shape: SQUARE color: BLACK posn: 100, 120

3612 shape: CIRCLE size: 50 color: BLUE posn: 50,80

3613 color:GREEN size:20 shape:TRIANGLE posn:20,40

3614 '''

3615 )

3616 prints::

3617 shape: SQUARE color: BLACK posn: 100, 120

3618 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]

3619 - color: BLACK

3620 - posn: ['100', ',', '120']

3621 - x: 100

3622 - y: 120

3623 - shape: SQUARE

3624

3625

3626 shape: CIRCLE size: 50 color: BLUE posn: 50,80

3627 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]

3628 - color: BLUE

3629 - posn: ['50', ',', '80']

3630 - x: 50

3631 - y: 80

3632 - shape: CIRCLE

3633 - size: 50

3634

3635

3636 color: GREEN size: 20 shape: TRIANGLE posn: 20,40

3637 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]

3638 - color: GREEN

3639 - posn: ['20', ',', '40']

3640 - x: 20

3641 - y: 40

3642 - shape: TRIANGLE

3643 - size: 20

3644 """

3645 def __init__( self, exprs, savelist = True ):

3646 super(Each,self).__init__(exprs, savelist)

3647 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)

3648 self.skipWhitespace = True

3649 self.initExprGroups = True

3650

3651 def parseImpl( self, instring, loc, doActions=True ):

3652 if self.initExprGroups:

3653 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))

3654 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]

3655 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]

3656 self.optionals = opt1 + opt2

3657 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]

3658 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]

3659 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]

3660 self.required += self.multirequired

3661 self.initExprGroups = False

3662 tmpLoc = loc

3663 tmpReqd = self.required[:]

3664 tmpOpt = self.optionals[:]

3665 matchOrder = []

3666

3667 keepMatching = True

3668 while keepMatching:

3669 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired

3670 failed = []

3671 for e in tmpExprs:

3672 try:

3673 tmpLoc = e.tryParse( instring, tmpLoc )

3674 except ParseException:

3675 failed.append(e)

3676 else:

3677 matchOrder.append(self.opt1map.get(id(e),e))

3678 if e in tmpReqd:

3679 tmpReqd.remove(e)

3680 elif e in tmpOpt:

3681 tmpOpt.remove(e)

3682 if len(failed) == len(tmpExprs):

3683 keepMatching = False

3684

3685 if tmpReqd:

3686 missing = ", ".join(_ustr(e) for e in tmpReqd)

3687 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )

3688

3689 # add any unmatched Optionals, in case they have default values defined

3690 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]

3691

3692 resultlist = []

3693 for e in matchOrder:

3694 loc,results = e._parse(instring,loc,doActions)

3695 resultlist.append(results)

3696

3697 finalResults = sum(resultlist, ParseResults([]))

3698 return loc, finalResults

3699

3700 def __str__( self ):

3701 if hasattr(self,"name"):

3702 return self.name

3703

3704 if self.strRepr is None:

3705 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"

3706

3707 return self.strRepr

3708

3709 def checkRecursion( self, parseElementList ):

3710 subRecCheckList = parseElementList[:] + [ self ]

3711 for e in self.exprs:

3712 e.checkRecursion( subRecCheckList )

3713

3714

3715class ParseElementEnhance(ParserElement):

3716 """

3717 Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.

3718 """

3719 def __init__( self, expr, savelist=False ):

3720 super(ParseElementEnhance,self).__init__(savelist)

3721 if isinstance( expr, basestring ):

3722 if issubclass(ParserElement._literalStringClass, Token): 3722 ↛ 3725line 3722 didn't jump to line 3725, because the condition on line 3722 was never false

3723 expr = ParserElement._literalStringClass(expr)

3724 else:

3725 expr = ParserElement._literalStringClass(Literal(expr))

3726 self.expr = expr

3727 self.strRepr = None

3728 if expr is not None:

3729 self.mayIndexError = expr.mayIndexError

3730 self.mayReturnEmpty = expr.mayReturnEmpty

3731 self.setWhitespaceChars( expr.whiteChars )

3732 self.skipWhitespace = expr.skipWhitespace

3733 self.saveAsList = expr.saveAsList

3734 self.callPreparse = expr.callPreparse

3735 self.ignoreExprs.extend(expr.ignoreExprs)

3736

3737 def parseImpl( self, instring, loc, doActions=True ):

3738 if self.expr is not None: 3738 ↛ 3741line 3738 didn't jump to line 3741, because the condition on line 3738 was never false

3739 return self.expr._parse( instring, loc, doActions, callPreParse=False )

3740 else:

3741 raise ParseException("",loc,self.errmsg,self)

3742

3743 def leaveWhitespace( self ):

3744 self.skipWhitespace = False

3745 self.expr = self.expr.copy()

3746 if self.expr is not None: 3746 ↛ 3748line 3746 didn't jump to line 3748, because the condition on line 3746 was never false

3747 self.expr.leaveWhitespace()

3748 return self

3749

3750 def ignore( self, other ):

3751 if isinstance( other, Suppress ):

3752 if other not in self.ignoreExprs:

3753 super( ParseElementEnhance, self).ignore( other )

3754 if self.expr is not None:

3755 self.expr.ignore( self.ignoreExprs[-1] )

3756 else:

3757 super( ParseElementEnhance, self).ignore( other )

3758 if self.expr is not None:

3759 self.expr.ignore( self.ignoreExprs[-1] )

3760 return self

3761

3762 def streamline( self ):

3763 super(ParseElementEnhance,self).streamline()

3764 if self.expr is not None: 3764 ↛ 3766line 3764 didn't jump to line 3766, because the condition on line 3764 was never false

3765 self.expr.streamline()

3766 return self

3767

3768 def checkRecursion( self, parseElementList ):

3769 if self in parseElementList:

3770 raise RecursiveGrammarException( parseElementList+[self] )

3771 subRecCheckList = parseElementList[:] + [ self ]

3772 if self.expr is not None:

3773 self.expr.checkRecursion( subRecCheckList )

3774

3775 def validate( self, validateTrace=[] ):

3776 tmp = validateTrace[:]+[self]

3777 if self.expr is not None:

3778 self.expr.validate(tmp)

3779 self.checkRecursion( [] )

3780

3781 def __str__( self ):

3782 try:

3783 return super(ParseElementEnhance,self).__str__()

3784 except Exception:

3785 pass

3786

3787 if self.strRepr is None and self.expr is not None:

3788 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )

3789 return self.strRepr

3790

3791

3792class FollowedBy(ParseElementEnhance):

3793 """

3794 Lookahead matching of the given parse expression. C{FollowedBy}

3795 does I{not} advance the parsing position within the input string, it only

3796 verifies that the specified parse expression matches at the current

3797 position. C{FollowedBy} always returns a null token list.

3798

3799 Example::

3800 # use FollowedBy to match a label only if it is followed by a ':'

3801 data_word = Word(alphas)

3802 label = data_word + FollowedBy(':')

3803 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))

3804

3805 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()

3806 prints::

3807 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]

3808 """

3809 def __init__( self, expr ):

3810 super(FollowedBy,self).__init__(expr)

3811 self.mayReturnEmpty = True

3812

3813 def parseImpl( self, instring, loc, doActions=True ):

3814 self.expr.tryParse( instring, loc )

3815 return loc, []

3816

3817

3818class NotAny(ParseElementEnhance):

3819 """

3820 Lookahead to disallow matching with the given parse expression. C{NotAny}

3821 does I{not} advance the parsing position within the input string, it only

3822 verifies that the specified parse expression does I{not} match at the current

3823 position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny}

3824 always returns a null token list. May be constructed using the '~' operator.

3825

3826 Example::

3827

3828 """

3829 def __init__( self, expr ):

3830 super(NotAny,self).__init__(expr)

3831 #~ self.leaveWhitespace()

3832 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs

3833 self.mayReturnEmpty = True

3834 self.errmsg = "Found unwanted token, "+_ustr(self.expr)

3835

3836 def parseImpl( self, instring, loc, doActions=True ):

3837 if self.expr.canParseNext(instring, loc):

3838 raise ParseException(instring, loc, self.errmsg, self)

3839 return loc, []

3840

3841 def __str__( self ):

3842 if hasattr(self,"name"):

3843 return self.name

3844

3845 if self.strRepr is None:

3846 self.strRepr = "~{" + _ustr(self.expr) + "}"

3847

3848 return self.strRepr

3849

3850class _MultipleMatch(ParseElementEnhance):

3851 def __init__( self, expr, stopOn=None):

3852 super(_MultipleMatch, self).__init__(expr)

3853 self.saveAsList = True

3854 ender = stopOn

3855 if isinstance(ender, basestring): 3855 ↛ 3856line 3855 didn't jump to line 3856, because the condition on line 3855 was never true

3856 ender = ParserElement._literalStringClass(ender)

3857 self.not_ender = ~ender if ender is not None else None

3858

3859 def parseImpl( self, instring, loc, doActions=True ):

3860 self_expr_parse = self.expr._parse

3861 self_skip_ignorables = self._skipIgnorables

3862 check_ender = self.not_ender is not None

3863 if check_ender: 3863 ↛ 3864line 3863 didn't jump to line 3864, because the condition on line 3863 was never true

3864 try_not_ender = self.not_ender.tryParse

3865

3866 # must be at least one (but first see if we are the stopOn sentinel;

3867 # if so, fail)

3868 if check_ender: 3868 ↛ 3869line 3868 didn't jump to line 3869, because the condition on line 3868 was never true

3869 try_not_ender(instring, loc)

3870 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )

3871 try:

3872 hasIgnoreExprs = (not not self.ignoreExprs)

3873 while 1:

3874 if check_ender: 3874 ↛ 3875line 3874 didn't jump to line 3875, because the condition on line 3874 was never true

3875 try_not_ender(instring, loc)

3876 if hasIgnoreExprs: 3876 ↛ 3877line 3876 didn't jump to line 3877, because the condition on line 3876 was never true

3877 preloc = self_skip_ignorables( instring, loc )

3878 else:

3879 preloc = loc

3880 loc, tmptokens = self_expr_parse( instring, preloc, doActions )

3881 if tmptokens or tmptokens.haskeys(): 3881 ↛ 3874line 3881 didn't jump to line 3874, because the condition on line 3881 was never false

3882 tokens += tmptokens

3883 except (ParseException,IndexError):

3884 pass

3885

3886 return loc, tokens

3887

3888class OneOrMore(_MultipleMatch):

3889 """

3890 Repetition of one or more of the given expression.

3891

3892 Parameters:

3893 - expr - expression that must match one or more times

3894 - stopOn - (default=C{None}) - expression for a terminating sentinel

3895 (only required if the sentinel would ordinarily match the repetition

3896 expression)

3897

3898 Example::

3899 data_word = Word(alphas)

3900 label = data_word + FollowedBy(':')

3901 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))

3902

3903 text = "shape: SQUARE posn: upper left color: BLACK"

3904 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]

3905

3906 # use stopOn attribute for OneOrMore to avoid reading label string as part of the data

3907 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))

3908 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]

3909

3910 # could also be written as

3911 (attr_expr * (1,)).parseString(text).pprint()

3912 """

3913

3914 def __str__( self ):

3915 if hasattr(self,"name"):

3916 return self.name

3917

3918 if self.strRepr is None:

3919 self.strRepr = "{" + _ustr(self.expr) + "}..."

3920

3921 return self.strRepr

3922

3923class ZeroOrMore(_MultipleMatch):

3924 """

3925 Optional repetition of zero or more of the given expression.

3926

3927 Parameters:

3928 - expr - expression that must match zero or more times

3929 - stopOn - (default=C{None}) - expression for a terminating sentinel

3930 (only required if the sentinel would ordinarily match the repetition

3931 expression)

3932

3933 Example: similar to L{OneOrMore}

3934 """

3935 def __init__( self, expr, stopOn=None):

3936 super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)

3937 self.mayReturnEmpty = True

3938

3939 def parseImpl( self, instring, loc, doActions=True ):

3940 try:

3941 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)

3942 except (ParseException,IndexError):

3943 return loc, []

3944

3945 def __str__( self ):

3946 if hasattr(self,"name"):

3947 return self.name

3948

3949 if self.strRepr is None:

3950 self.strRepr = "[" + _ustr(self.expr) + "]..."

3951

3952 return self.strRepr

3953

3954class _NullToken(object):

3955 def __bool__(self):

3956 return False

3957 __nonzero__ = __bool__

3958 def __str__(self):

3959 return ""

3960

3961_optionalNotMatched = _NullToken()

3962class Optional(ParseElementEnhance):

3963 """

3964 Optional matching of the given expression.

3965

3966 Parameters:

3967 - expr - expression that must match zero or more times

3968 - default (optional) - value to be returned if the optional expression is not found.

3969

3970 Example::

3971 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier

3972 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))

3973 zip.runTests('''

3974 # traditional ZIP code

3975 12345

3976

3977 # ZIP+4 form

3978 12101-0001

3979

3980 # invalid ZIP

3981 98765-

3982 ''')

3983 prints::

3984 # traditional ZIP code

3985 12345

3986 ['12345']

3987

3988 # ZIP+4 form

3989 12101-0001

3990 ['12101-0001']

3991

3992 # invalid ZIP

3993 98765-

3994 ^

3995 FAIL: Expected end of text (at char 5), (line:1, col:6)

3996 """

3997 def __init__( self, expr, default=_optionalNotMatched ):

3998 super(Optional,self).__init__( expr, savelist=False )

3999 self.saveAsList = self.expr.saveAsList

4000 self.defaultValue = default

4001 self.mayReturnEmpty = True

4002

4003 def parseImpl( self, instring, loc, doActions=True ):

4004 try:

4005 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )

4006 except (ParseException,IndexError):

4007 if self.defaultValue is not _optionalNotMatched: 4007 ↛ 4008line 4007 didn't jump to line 4008, because the condition on line 4007 was never true

4008 if self.expr.resultsName:

4009 tokens = ParseResults([ self.defaultValue ])

4010 tokens[self.expr.resultsName] = self.defaultValue

4011 else:

4012 tokens = [ self.defaultValue ]

4013 else:

4014 tokens = []

4015 return loc, tokens

4016

4017 def __str__( self ):

4018 if hasattr(self,"name"):

4019 return self.name

4020

4021 if self.strRepr is None:

4022 self.strRepr = "[" + _ustr(self.expr) + "]"

4023

4024 return self.strRepr

4025

4026class SkipTo(ParseElementEnhance):

4027 """

4028 Token for skipping over all undefined text until the matched expression is found.

4029

4030 Parameters:

4031 - expr - target expression marking the end of the data to be skipped

4032 - include - (default=C{False}) if True, the target expression is also parsed

4033 (the skipped text and target expression are returned as a 2-element list).

4034 - ignore - (default=C{None}) used to define grammars (typically quoted strings and

4035 comments) that might contain false matches to the target expression

4036 - failOn - (default=C{None}) define expressions that are not allowed to be

4037 included in the skipped test; if found before the target expression is found,

4038 the SkipTo is not a match

4039

4040 Example::

4041 report = '''

4042 Outstanding Issues Report - 1 Jan 2000

4043

4044 # | Severity | Description | Days Open

4045 -----+----------+-------------------------------------------+-----------

4046 101 | Critical | Intermittent system crash | 6

4047 94 | Cosmetic | Spelling error on Login ('log|n') | 14

4048 79 | Minor | System slow when running too many reports | 47

4049 '''

4050 integer = Word(nums)

4051 SEP = Suppress('|')

4052 # use SkipTo to simply match everything up until the next SEP

4053 # - ignore quoted strings, so that a '|' character inside a quoted string does not match

4054 # - parse action will call token.strip() for each matched token, i.e., the description body

4055 string_data = SkipTo(SEP, ignore=quotedString)

4056 string_data.setParseAction(tokenMap(str.strip))

4057 ticket_expr = (integer("issue_num") + SEP

4058 + string_data("sev") + SEP

4059 + string_data("desc") + SEP

4060 + integer("days_open"))

4061

4062 for tkt in ticket_expr.searchString(report):

4063 print tkt.dump()

4064 prints::

4065 ['101', 'Critical', 'Intermittent system crash', '6']

4066 - days_open: 6

4067 - desc: Intermittent system crash

4068 - issue_num: 101

4069 - sev: Critical

4070 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']

4071 - days_open: 14

4072 - desc: Spelling error on Login ('log|n')

4073 - issue_num: 94

4074 - sev: Cosmetic

4075 ['79', 'Minor', 'System slow when running too many reports', '47']

4076 - days_open: 47

4077 - desc: System slow when running too many reports

4078 - issue_num: 79

4079 - sev: Minor

4080 """

4081 def __init__( self, other, include=False, ignore=None, failOn=None ):

4082 super( SkipTo, self ).__init__( other )

4083 self.ignoreExpr = ignore

4084 self.mayReturnEmpty = True

4085 self.mayIndexError = False

4086 self.includeMatch = include

4087 self.asList = False

4088 if isinstance(failOn, basestring):

4089 self.failOn = ParserElement._literalStringClass(failOn)

4090 else:

4091 self.failOn = failOn

4092 self.errmsg = "No match found for "+_ustr(self.expr)

4093

4094 def parseImpl( self, instring, loc, doActions=True ):

4095 startloc = loc

4096 instrlen = len(instring)

4097 expr = self.expr

4098 expr_parse = self.expr._parse

4099 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None

4100 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None

4101

4102 tmploc = loc

4103 while tmploc <= instrlen:

4104 if self_failOn_canParseNext is not None:

4105 # break if failOn expression matches

4106 if self_failOn_canParseNext(instring, tmploc):

4107 break

4108

4109 if self_ignoreExpr_tryParse is not None:

4110 # advance past ignore expressions

4111 while 1:

4112 try:

4113 tmploc = self_ignoreExpr_tryParse(instring, tmploc)

4114 except ParseBaseException:

4115 break

4116

4117 try:

4118 expr_parse(instring, tmploc, doActions=False, callPreParse=False)

4119 except (ParseException, IndexError):

4120 # no match, advance loc in string

4121 tmploc += 1

4122 else:

4123 # matched skipto expr, done

4124 break

4125

4126 else:

4127 # ran off the end of the input string without matching skipto expr, fail

4128 raise ParseException(instring, loc, self.errmsg, self)

4129

4130 # build up return values

4131 loc = tmploc

4132 skiptext = instring[startloc:loc]

4133 skipresult = ParseResults(skiptext)

4134

4135 if self.includeMatch:

4136 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)

4137 skipresult += mat

4138

4139 return loc, skipresult

4140

4141class Forward(ParseElementEnhance):

4142 """

4143 Forward declaration of an expression to be defined later -

4144 used for recursive grammars, such as algebraic infix notation.

4145 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.

4146

4147 Note: take care when assigning to C{Forward} not to overlook precedence of operators.

4148 Specifically, '|' has a lower precedence than '<<', so that::

4149 fwdExpr << a | b | c

4150 will actually be evaluated as::

4151 (fwdExpr << a) | b | c

4152 thereby leaving b and c out as parseable alternatives. It is recommended that you

4153 explicitly group the values inserted into the C{Forward}::

4154 fwdExpr << (a | b | c)

4155 Converting to use the '<<=' operator instead will avoid this problem.

4156

4157 See L{ParseResults.pprint} for an example of a recursive parser created using

4158 C{Forward}.

4159 """

4160 def __init__( self, other=None ):

4161 super(Forward,self).__init__( other, savelist=False )

4162

4163 def __lshift__( self, other ):

4164 if isinstance( other, basestring ): 4164 ↛ 4165line 4164 didn't jump to line 4165, because the condition on line 4164 was never true

4165 other = ParserElement._literalStringClass(other)

4166 self.expr = other

4167 self.strRepr = None

4168 self.mayIndexError = self.expr.mayIndexError

4169 self.mayReturnEmpty = self.expr.mayReturnEmpty

4170 self.setWhitespaceChars( self.expr.whiteChars )

4171 self.skipWhitespace = self.expr.skipWhitespace

4172 self.saveAsList = self.expr.saveAsList

4173 self.ignoreExprs.extend(self.expr.ignoreExprs)

4174 return self

4175

4176 def __ilshift__(self, other):

4177 return self << other

4178

4179 def leaveWhitespace( self ):

4180 self.skipWhitespace = False

4181 return self

4182

4183 def streamline( self ):

4184 if not self.streamlined:

4185 self.streamlined = True

4186 if self.expr is not None: 4186 ↛ 4188line 4186 didn't jump to line 4188, because the condition on line 4186 was never false

4187 self.expr.streamline()

4188 return self

4189

4190 def validate( self, validateTrace=[] ):

4191 if self not in validateTrace:

4192 tmp = validateTrace[:]+[self]

4193 if self.expr is not None:

4194 self.expr.validate(tmp)

4195 self.checkRecursion([])

4196

4197 def __str__( self ):

4198 if hasattr(self,"name"):

4199 return self.name

4200 return self.__class__.__name__ + ": ..."

4201

4202 # stubbed out for now - creates awful memory and perf issues

4203 self._revertClass = self.__class__

4204 self.__class__ = _ForwardNoRecurse

4205 try:

4206 if self.expr is not None:

4207 retString = _ustr(self.expr)

4208 else:

4209 retString = "None"

4210 finally:

4211 self.__class__ = self._revertClass

4212 return self.__class__.__name__ + ": " + retString

4213

4214 def copy(self):

4215 if self.expr is not None: 4215 ↛ 4218line 4215 didn't jump to line 4218, because the condition on line 4215 was never false

4216 return super(Forward,self).copy()

4217 else:

4218 ret = Forward()

4219 ret <<= self

4220 return ret

4221

4222class _ForwardNoRecurse(Forward):

4223 def __str__( self ):

4224 return "..."

4225

4226class TokenConverter(ParseElementEnhance):

4227 """

4228 Abstract subclass of C{ParseExpression}, for converting parsed results.

4229 """

4230 def __init__( self, expr, savelist=False ):

4231 super(TokenConverter,self).__init__( expr )#, savelist )

4232 self.saveAsList = False

4233

4234class Combine(TokenConverter):

4235 """

4236 Converter to concatenate all matching tokens to a single string.

4237 By default, the matching patterns must also be contiguous in the input string;

4238 this can be disabled by specifying C{'adjacent=False'} in the constructor.

4239

4240 Example::

4241 real = Word(nums) + '.' + Word(nums)

4242 print(real.parseString('3.1416')) # -> ['3', '.', '1416']

4243 # will also erroneously match the following

4244 print(real.parseString('3. 1416')) # -> ['3', '.', '1416']

4245

4246 real = Combine(Word(nums) + '.' + Word(nums))

4247 print(real.parseString('3.1416')) # -> ['3.1416']

4248 # no match when there are internal spaces

4249 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)

4250 """

4251 def __init__( self, expr, joinString="", adjacent=True ):

4252 super(Combine,self).__init__( expr )

4253 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself

4254 if adjacent:

4255 self.leaveWhitespace()

4256 self.adjacent = adjacent

4257 self.skipWhitespace = True

4258 self.joinString = joinString

4259 self.callPreparse = True

4260

4261 def ignore( self, other ):

4262 if self.adjacent:

4263 ParserElement.ignore(self, other)

4264 else:

4265 super( Combine, self).ignore( other )

4266 return self

4267

4268 def postParse( self, instring, loc, tokenlist ):

4269 retToks = tokenlist.copy()

4270 del retToks[:]

4271 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)

4272

4273 if self.resultsName and retToks.haskeys():

4274 return [ retToks ]

4275 else:

4276 return retToks

4277

4278class Group(TokenConverter):

4279 """

4280 Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.

4281

4282 Example::

4283 ident = Word(alphas)

4284 num = Word(nums)

4285 term = ident | num

4286 func = ident + Optional(delimitedList(term))

4287 print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100']

4288

4289 func = ident + Group(Optional(delimitedList(term)))

4290 print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']]

4291 """

4292 def __init__( self, expr ):

4293 super(Group,self).__init__( expr )

4294 self.saveAsList = True

4295

4296 def postParse( self, instring, loc, tokenlist ):

4297 return [ tokenlist ]

4298

4299class Dict(TokenConverter):

4300 """

4301 Converter to return a repetitive expression as a list, but also as a dictionary.

4302 Each element can also be referenced using the first token in the expression as its key.

4303 Useful for tabular report scraping when the first column can be used as a item key.

4304

4305 Example::

4306 data_word = Word(alphas)

4307 label = data_word + FollowedBy(':')

4308 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))

4309

4310 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"

4311 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))

4312

4313 # print attributes as plain groups

4314 print(OneOrMore(attr_expr).parseString(text).dump())

4315

4316 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names

4317 result = Dict(OneOrMore(Group(attr_expr))).parseString(text)

4318 print(result.dump())

4319

4320 # access named fields as dict entries, or output as dict

4321 print(result['shape'])

4322 print(result.asDict())

4323 prints::

4324 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']

4325

4326 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]

4327 - color: light blue

4328 - posn: upper left

4329 - shape: SQUARE

4330 - texture: burlap

4331 SQUARE

4332 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}

4333 See more examples at L{ParseResults} of accessing fields by results name.

4334 """

4335 def __init__( self, expr ):

4336 super(Dict,self).__init__( expr )

4337 self.saveAsList = True

4338

4339 def postParse( self, instring, loc, tokenlist ):

4340 for i,tok in enumerate(tokenlist):

4341 if len(tok) == 0:

4342 continue

4343 ikey = tok[0]

4344 if isinstance(ikey,int):

4345 ikey = _ustr(tok[0]).strip()

4346 if len(tok)==1:

4347 tokenlist[ikey] = _ParseResultsWithOffset("",i)

4348 elif len(tok)==2 and not isinstance(tok[1],ParseResults):

4349 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)

4350 else:

4351 dictvalue = tok.copy() #ParseResults(i)

4352 del dictvalue[0]

4353 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):

4354 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)

4355 else:

4356 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)

4357

4358 if self.resultsName:

4359 return [ tokenlist ]

4360 else:

4361 return tokenlist

4362

4363

4364class Suppress(TokenConverter):

4365 """

4366 Converter for ignoring the results of a parsed expression.

4367

4368 Example::

4369 source = "a, b, c,d"

4370 wd = Word(alphas)

4371 wd_list1 = wd + ZeroOrMore(',' + wd)

4372 print(wd_list1.parseString(source))

4373

4374 # often, delimiters that are useful during parsing are just in the

4375 # way afterward - use Suppress to keep them out of the parsed output

4376 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)

4377 print(wd_list2.parseString(source))

4378 prints::

4379 ['a', ',', 'b', ',', 'c', ',', 'd']

4380 ['a', 'b', 'c', 'd']

4381 (See also L{delimitedList}.)

4382 """

4383 def postParse( self, instring, loc, tokenlist ):

4384 return []

4385

4386 def suppress( self ):

4387 return self

4388

4389

4390class OnlyOnce(object):

4391 """

4392 Wrapper for parse actions, to ensure they are only called once.

4393 """

4394 def __init__(self, methodCall):

4395 self.callable = _trim_arity(methodCall)

4396 self.called = False

4397 def __call__(self,s,l,t):

4398 if not self.called:

4399 results = self.callable(s,l,t)

4400 self.called = True

4401 return results

4402 raise ParseException(s,l,"")

4403 def reset(self):

4404 self.called = False

4405

4406def traceParseAction(f):

4407 """

4408 Decorator for debugging parse actions.

4409

4410 When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".}

4411 When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised.

4412

4413 Example::

4414 wd = Word(alphas)

4415

4416 @traceParseAction

4417 def remove_duplicate_chars(tokens):

4418 return ''.join(sorted(set(''.join(tokens))))

4419

4420 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)

4421 print(wds.parseString("slkdjs sld sldd sdlf sdljf"))

4422 prints::

4423 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))

4424 <<leaving remove_duplicate_chars (ret: 'dfjkls')

4425 ['dfjkls']

4426 """

4427 f = _trim_arity(f)

4428 def z(*paArgs):

4429 thisFunc = f.__name__

4430 s,l,t = paArgs[-3:]

4431 if len(paArgs)>3:

4432 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc

4433 sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )

4434 try:

4435 ret = f(*paArgs)

4436 except Exception as exc:

4437 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )

4438 raise

4439 sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )

4440 return ret

4441 try:

4442 z.__name__ = f.__name__

4443 except AttributeError:

4444 pass

4445 return z

4446

4447#

4448# global helpers

4449#

4450def delimitedList( expr, delim=",", combine=False ):

4451 """

4452 Helper to define a delimited list of expressions - the delimiter defaults to ','.

4453 By default, the list elements and delimiters can have intervening whitespace, and

4454 comments, but this can be overridden by passing C{combine=True} in the constructor.

4455 If C{combine} is set to C{True}, the matching tokens are returned as a single token

4456 string, with the delimiters included; otherwise, the matching tokens are returned

4457 as a list of tokens, with the delimiters suppressed.

4458

4459 Example::

4460 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']

4461 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']

4462 """

4463 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."

4464 if combine: 4464 ↛ 4465line 4464 didn't jump to line 4465, because the condition on line 4464 was never true

4465 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)

4466 else:

4467 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)

4468

4469def countedArray( expr, intExpr=None ):

4470 """

4471 Helper to define a counted list of expressions.

4472 This helper defines a pattern of the form::

4473 integer expr expr expr...

4474 where the leading integer tells how many expr expressions follow.

4475 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.

4476

4477 If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value.

4478

4479 Example::

4480 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']

4481

4482 # in this parser, the leading integer value is given in binary,

4483 # '10' indicating that 2 values are in the array

4484 binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))

4485 countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd']

4486 """

4487 arrayExpr = Forward()

4488 def countFieldParseAction(s,l,t):

4489 n = t[0]

4490 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))

4491 return []

4492 if intExpr is None:

4493 intExpr = Word(nums).setParseAction(lambda t:int(t[0]))

4494 else:

4495 intExpr = intExpr.copy()

4496 intExpr.setName("arrayLen")

4497 intExpr.addParseAction(countFieldParseAction, callDuringTry=True)

4498 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')

4499

4500def _flatten(L):

4501 ret = []

4502 for i in L:

4503 if isinstance(i,list):

4504 ret.extend(_flatten(i))

4505 else:

4506 ret.append(i)

4507 return ret

4508

4509def matchPreviousLiteral(expr):

4510 """

4511 Helper to define an expression that is indirectly defined from

4512 the tokens matched in a previous expression, that is, it looks

4513 for a 'repeat' of a previous expression. For example::

4514 first = Word(nums)

4515 second = matchPreviousLiteral(first)

4516 matchExpr = first + ":" + second

4517 will match C{"1:1"}, but not C{"1:2"}. Because this matches a

4518 previous literal, will also match the leading C{"1:1"} in C{"1:10"}.

4519 If this is not desired, use C{matchPreviousExpr}.

4520 Do I{not} use with packrat parsing enabled.

4521 """

4522 rep = Forward()

4523 def copyTokenToRepeater(s,l,t):

4524 if t:

4525 if len(t) == 1:

4526 rep << t[0]

4527 else:

4528 # flatten t tokens

4529 tflat = _flatten(t.asList())

4530 rep << And(Literal(tt) for tt in tflat)

4531 else:

4532 rep << Empty()

4533 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)

4534 rep.setName('(prev) ' + _ustr(expr))

4535 return rep

4536

4537def matchPreviousExpr(expr):

4538 """

4539 Helper to define an expression that is indirectly defined from

4540 the tokens matched in a previous expression, that is, it looks

4541 for a 'repeat' of a previous expression. For example::

4542 first = Word(nums)

4543 second = matchPreviousExpr(first)

4544 matchExpr = first + ":" + second

4545 will match C{"1:1"}, but not C{"1:2"}. Because this matches by

4546 expressions, will I{not} match the leading C{"1:1"} in C{"1:10"};

4547 the expressions are evaluated first, and then compared, so

4548 C{"1"} is compared with C{"10"}.

4549 Do I{not} use with packrat parsing enabled.

4550 """

4551 rep = Forward()

4552 e2 = expr.copy()

4553 rep <<= e2

4554 def copyTokenToRepeater(s,l,t):

4555 matchTokens = _flatten(t.asList())

4556 def mustMatchTheseTokens(s,l,t):

4557 theseTokens = _flatten(t.asList())

4558 if theseTokens != matchTokens:

4559 raise ParseException("",0,"")

4560 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )

4561 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)

4562 rep.setName('(prev) ' + _ustr(expr))

4563 return rep

4564

4565def _escapeRegexRangeChars(s):

4566 #~ escape these chars: ^-]

4567 for c in r"\^-]":

4568 s = s.replace(c,_bslash+c)

4569 s = s.replace("\n",r"\n")

4570 s = s.replace("\t",r"\t")

4571 return _ustr(s)

4572

4573def oneOf( strs, caseless=False, useRegex=True ):

4574 """

4575 Helper to quickly define a set of alternative Literals, and makes sure to do

4576 longest-first testing when there is a conflict, regardless of the input order,

4577 but returns a C{L{MatchFirst}} for best performance.

4578

4579 Parameters:

4580 - strs - a string of space-delimited literals, or a collection of string literals

4581 - caseless - (default=C{False}) - treat all literals as caseless

4582 - useRegex - (default=C{True}) - as an optimization, will generate a Regex

4583 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or

4584 if creating a C{Regex} raises an exception)

4585

4586 Example::

4587 comp_oper = oneOf("< = > <= >= !=")

4588 var = Word(alphas)

4589 number = Word(nums)

4590 term = var | number

4591 comparison_expr = term + comp_oper + term

4592 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))

4593 prints::

4594 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]

4595 """

4596 if caseless:

4597 isequal = ( lambda a,b: a.upper() == b.upper() )

4598 masks = ( lambda a,b: b.upper().startswith(a.upper()) )

4599 parseElementClass = CaselessLiteral

4600 else:

4601 isequal = ( lambda a,b: a == b )

4602 masks = ( lambda a,b: b.startswith(a) )

4603 parseElementClass = Literal

4604

4605 symbols = []

4606 if isinstance(strs,basestring):

4607 symbols = strs.split()

4608 elif isinstance(strs, Iterable):

4609 symbols = list(strs)

4610 else:

4611 warnings.warn("Invalid argument to oneOf, expected string or iterable",

4612 SyntaxWarning, stacklevel=2)

4613 if not symbols:

4614 return NoMatch()

4615

4616 i = 0

4617 while i < len(symbols)-1:

4618 cur = symbols[i]

4619 for j,other in enumerate(symbols[i+1:]):

4620 if ( isequal(other, cur) ):

4621 del symbols[i+j+1]

4622 break

4623 elif ( masks(cur, other) ):

4624 del symbols[i+j+1]

4625 symbols.insert(i,other)

4626 cur = other

4627 break

4628 else:

4629 i += 1

4630

4631 if not caseless and useRegex:

4632 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))

4633 try:

4634 if len(symbols)==len("".join(symbols)):

4635 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))

4636 else:

4637 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))

4638 except Exception:

4639 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",

4640 SyntaxWarning, stacklevel=2)

4641

4642

4643 # last resort, just use MatchFirst

4644 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))

4645

4646def dictOf( key, value ):

4647 """

4648 Helper to easily and clearly define a dictionary by specifying the respective patterns

4649 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens

4650 in the proper order. The key pattern can include delimiting markers or punctuation,

4651 as long as they are suppressed, thereby leaving the significant key text. The value

4652 pattern can include named results, so that the C{Dict} results can include named token

4653 fields.

4654

4655 Example::

4656 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"

4657 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))

4658 print(OneOrMore(attr_expr).parseString(text).dump())

4659

4660 attr_label = label

4661 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)

4662

4663 # similar to Dict, but simpler call format

4664 result = dictOf(attr_label, attr_value).parseString(text)

4665 print(result.dump())

4666 print(result['shape'])

4667 print(result.shape) # object attribute access works too

4668 print(result.asDict())

4669 prints::

4670 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]

4671 - color: light blue

4672 - posn: upper left

4673 - shape: SQUARE

4674 - texture: burlap

4675 SQUARE

4676 SQUARE

4677 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}

4678 """

4679 return Dict( ZeroOrMore( Group ( key + value ) ) )

4680

4681def originalTextFor(expr, asString=True):

4682 """

4683 Helper to return the original, untokenized text for a given expression. Useful to

4684 restore the parsed fields of an HTML start tag into the raw tag text itself, or to

4685 revert separate tokens with intervening whitespace back to the original matching

4686 input text. By default, returns astring containing the original parsed text.

4687

4688 If the optional C{asString} argument is passed as C{False}, then the return value is a

4689 C{L{ParseResults}} containing any results names that were originally matched, and a

4690 single token containing the original matched text from the input string. So if

4691 the expression passed to C{L{originalTextFor}} contains expressions with defined

4692 results names, you must set C{asString} to C{False} if you want to preserve those

4693 results name values.

4694

4695 Example::

4696 src = "this is test <b> bold <i>text</i> </b> normal text "

4697 for tag in ("b","i"):

4698 opener,closer = makeHTMLTags(tag)

4699 patt = originalTextFor(opener + SkipTo(closer) + closer)

4700 print(patt.searchString(src)[0])

4701 prints::

4702 ['<b> bold <i>text</i> </b>']

4703 ['<i>text</i>']

4704 """

4705 locMarker = Empty().setParseAction(lambda s,loc,t: loc)

4706 endlocMarker = locMarker.copy()

4707 endlocMarker.callPreparse = False

4708 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")

4709 if asString: 4709 ↛ 4712line 4709 didn't jump to line 4712, because the condition on line 4709 was never false

4710 extractText = lambda s,l,t: s[t._original_start:t._original_end] 4710 ↛ exitline 4710 didn't run the lambda on line 4710

4711 else:

4712 def extractText(s,l,t):

4713 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]

4714 matchExpr.setParseAction(extractText)

4715 matchExpr.ignoreExprs = expr.ignoreExprs

4716 return matchExpr

4717

4718def ungroup(expr):

4719 """

4720 Helper to undo pyparsing's default grouping of And expressions, even

4721 if all but one are non-empty.

4722 """

4723 return TokenConverter(expr).setParseAction(lambda t:t[0])

4724

4725def locatedExpr(expr):

4726 """

4727 Helper to decorate a returned token with its starting and ending locations in the input string.

4728 This helper adds the following results names:

4729 - locn_start = location where matched expression begins

4730 - locn_end = location where matched expression ends

4731 - value = the actual parsed results

4732

4733 Be careful if the input text contains C{<TAB>} characters, you may want to call

4734 C{L{ParserElement.parseWithTabs}}

4735

4736 Example::

4737 wd = Word(alphas)

4738 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):

4739 print(match)

4740 prints::

4741 [[0, 'ljsdf', 5]]

4742 [[8, 'lksdjjf', 15]]

4743 [[18, 'lkkjj', 23]]

4744 """

4745 locator = Empty().setParseAction(lambda s,l,t: l)

4746 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))

4747

4748

4749# convenience constants for positional expressions

4750empty = Empty().setName("empty")

4751lineStart = LineStart().setName("lineStart")

4752lineEnd = LineEnd().setName("lineEnd")

4753stringStart = StringStart().setName("stringStart")

4754stringEnd = StringEnd().setName("stringEnd")

4755

4756_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 4756 ↛ exitline 4756 didn't run the lambda on line 4756

4757_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))

4758_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 4758 ↛ exitline 4758 didn't run the lambda on line 4758

4759_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1)

4760_charRange = Group(_singleChar + Suppress("-") + _singleChar)

4761_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"

4762

4763def srange(s):

4764 r"""

4765 Helper to easily define string ranges for use in Word construction. Borrows

4766 syntax from regexp '[]' string range definitions::

4767 srange("[0-9]") -> "0123456789"

4768 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"

4769 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"

4770 The input string must be enclosed in []'s, and the returned string is the expanded

4771 character set joined into a single string.

4772 The values enclosed in the []'s may be:

4773 - a single character

4774 - an escaped character with a leading backslash (such as C{\-} or C{\]})

4775 - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)

4776 (C{\0x##} is also supported for backwards compatibility)

4777 - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)

4778 - a range of any of the above, separated by a dash (C{'a-z'}, etc.)

4779 - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)

4780 """

4781 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))

4782 try:

4783 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)

4784 except Exception:

4785 return ""

4786

4787def matchOnlyAtCol(n):

4788 """

4789 Helper method for defining parse actions that require matching at a specific

4790 column in the input text.

4791 """

4792 def verifyCol(strg,locn,toks):

4793 if col(locn,strg) != n:

4794 raise ParseException(strg,locn,"matched token not at column %d" % n)

4795 return verifyCol

4796

4797def replaceWith(replStr):

4798 """

4799 Helper method for common parse actions that simply return a literal value. Especially

4800 useful when used with C{L{transformString<ParserElement.transformString>}()}.

4801

4802 Example::

4803 num = Word(nums).setParseAction(lambda toks: int(toks[0]))

4804 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))

4805 term = na | num

4806

4807 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]

4808 """

4809 return lambda s,l,t: [replStr]

4810

4811def removeQuotes(s,l,t):

4812 """

4813 Helper parse action for removing quotation marks from parsed quoted strings.

4814

4815 Example::

4816 # by default, quotation marks are included in parsed results

4817 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]

4818

4819 # use removeQuotes to strip quotation marks from parsed results

4820 quotedString.setParseAction(removeQuotes)

4821 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]

4822 """

4823 return t[0][1:-1]

4824

4825def tokenMap(func, *args):

4826 """

4827 Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional

4828 args are passed, they are forwarded to the given function as additional arguments after

4829 the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the

4830 parsed data to an integer using base 16.

4831

4832 Example (compare the last to example in L{ParserElement.transformString}::

4833 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))

4834 hex_ints.runTests('''

4835 00 11 22 aa FF 0a 0d 1a

4836 ''')

4837

4838 upperword = Word(alphas).setParseAction(tokenMap(str.upper))

4839 OneOrMore(upperword).runTests('''

4840 my kingdom for a horse

4841 ''')

4842

4843 wd = Word(alphas).setParseAction(tokenMap(str.title))

4844 OneOrMore(wd).setParseAction(' '.join).runTests('''

4845 now is the winter of our discontent made glorious summer by this sun of york

4846 ''')

4847 prints::

4848 00 11 22 aa FF 0a 0d 1a

4849 [0, 17, 34, 170, 255, 10, 13, 26]

4850

4851 my kingdom for a horse

4852 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']

4853

4854 now is the winter of our discontent made glorious summer by this sun of york

4855 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']

4856 """

4857 def pa(s,l,t):

4858 return [func(tokn, *args) for tokn in t]

4859

4860 try:

4861 func_name = getattr(func, '__name__',

4862 getattr(func, '__class__').__name__)

4863 except Exception:

4864 func_name = str(func)

4865 pa.__name__ = func_name

4866

4867 return pa

4868

4869upcaseTokens = tokenMap(lambda t: _ustr(t).upper()) 4869 ↛ exitline 4869 didn't run the lambda on line 4869

4870"""(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}"""

4871

4872downcaseTokens = tokenMap(lambda t: _ustr(t).lower()) 4872 ↛ exitline 4872 didn't run the lambda on line 4872

4873"""(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""

4874

4875def _makeTags(tagStr, xml):

4876 """Internal helper to construct opening and closing tag expressions, given a tag name"""

4877 if isinstance(tagStr,basestring): 4877 ↛ 4878line 4877 didn't jump to line 4878, because the condition on line 4877 was never true

4878 resname = tagStr

4879 tagStr = Keyword(tagStr, caseless=not xml)

4880 else:

4881 resname = tagStr.name

4882

4883 tagAttrName = Word(alphas,alphanums+"_-:")

4884 if (xml): 4884 ↛ 4885line 4884 didn't jump to line 4885, because the condition on line 4884 was never true

4885 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )

4886 openTag = Suppress("<") + tagStr("tag") + \

4887 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \

4888 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")

4889 else:

4890 printablesLessRAbrack = "".join(c for c in printables if c not in ">")

4891 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)

4892 openTag = Suppress("<") + tagStr("tag") + \ 4892 ↛ exitline 4892 didn't jump to the function exit

4893 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \

4894 Optional( Suppress("=") + tagAttrValue ) ))) + \

4895 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")

4896 closeTag = Combine(_L("</") + tagStr + ">")

4897

4898 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname)

4899 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname)

4900 openTag.tag = resname

4901 closeTag.tag = resname

4902 return openTag, closeTag

4903

4904def makeHTMLTags(tagStr):

4905 """

4906 Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches

4907 tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values.

4908

4909 Example::

4910 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'

4911 # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple

4912 a,a_end = makeHTMLTags("A")

4913 link_expr = a + SkipTo(a_end)("link_text") + a_end

4914

4915 for link in link_expr.searchString(text):

4916 # attributes in the <A> tag (like "href" shown here) are also accessible as named results

4917 print(link.link_text, '->', link.href)

4918 prints::

4919 pyparsing -> http://pyparsing.wikispaces.com

4920 """

4921 return _makeTags( tagStr, False )

4922

4923def makeXMLTags(tagStr):

4924 """

4925 Helper to construct opening and closing tag expressions for XML, given a tag name. Matches

4926 tags only in the given upper/lower case.

4927

4928 Example: similar to L{makeHTMLTags}

4929 """

4930 return _makeTags( tagStr, True )

4931

4932def withAttribute(*args,**attrDict):

4933 """

4934 Helper to create a validating parse action to be used with start tags created

4935 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag

4936 with a required attribute value, to avoid false matches on common tags such as

4937 C{<TD>} or C{<DIV>}.

4938

4939 Call C{withAttribute} with a series of attribute names and values. Specify the list

4940 of filter attributes names and values as:

4941 - keyword arguments, as in C{(align="right")}, or

4942 - as an explicit dict with C{**} operator, when an attribute name is also a Python

4943 reserved word, as in C{**{"class":"Customer", "align":"right"}}

4944 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )

4945 For attribute names with a namespace prefix, you must use the second form. Attribute

4946 names are matched insensitive to upper/lower case.

4947

4948 If just testing for C{class} (with or without a namespace), use C{L{withClass}}.

4949

4950 To verify that the attribute exists, but without specifying a value, pass

4951 C{withAttribute.ANY_VALUE} as the value.

4952

4953 Example::

4954 html = '''

4955 <div>

4956 Some text

4957 <div type="grid">1 4 0 1 0</div>

4958 <div type="graph">1,3 2,3 1,1</div>

4959 <div>this has no type</div>

4960 </div>

4961

4962 '''

4963 div,div_end = makeHTMLTags("div")

4964

4965 # only match div tag having a type attribute with value "grid"

4966 div_grid = div().setParseAction(withAttribute(type="grid"))

4967 grid_expr = div_grid + SkipTo(div | div_end)("body")

4968 for grid_header in grid_expr.searchString(html):

4969 print(grid_header.body)

4970

4971 # construct a match with any div tag having a type attribute, regardless of the value

4972 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))

4973 div_expr = div_any_type + SkipTo(div | div_end)("body")

4974 for div_header in div_expr.searchString(html):

4975 print(div_header.body)

4976 prints::

4977 1 4 0 1 0

4978

4979 1 4 0 1 0

4980 1,3 2,3 1,1

4981 """

4982 if args:

4983 attrs = args[:]

4984 else:

4985 attrs = attrDict.items()

4986 attrs = [(k,v) for k,v in attrs]

4987 def pa(s,l,tokens):

4988 for attrName,attrValue in attrs:

4989 if attrName not in tokens:

4990 raise ParseException(s,l,"no matching attribute " + attrName)

4991 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:

4992 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %

4993 (attrName, tokens[attrName], attrValue))

4994 return pa

4995withAttribute.ANY_VALUE = object()

4996

4997def withClass(classname, namespace=''):

4998 """

4999 Simplified version of C{L{withAttribute}} when matching on a div class - made

5000 difficult because C{class} is a reserved word in Python.

5001

5002 Example::

5003 html = '''

5004 <div>

5005 Some text

5006 <div class="grid">1 4 0 1 0</div>

5007 <div class="graph">1,3 2,3 1,1</div>

5008 <div>this <div> has no class</div>

5009 </div>

5010

5011 '''

5012 div,div_end = makeHTMLTags("div")

5013 div_grid = div().setParseAction(withClass("grid"))

5014

5015 grid_expr = div_grid + SkipTo(div | div_end)("body")

5016 for grid_header in grid_expr.searchString(html):

5017 print(grid_header.body)

5018

5019 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))

5020 div_expr = div_any_type + SkipTo(div | div_end)("body")

5021 for div_header in div_expr.searchString(html):

5022 print(div_header.body)

5023 prints::

5024 1 4 0 1 0

5025

5026 1 4 0 1 0

5027 1,3 2,3 1,1

5028 """

5029 classattr = "%s:class" % namespace if namespace else "class"

5030 return withAttribute(**{classattr : classname})

5031

5032opAssoc = _Constants()

5033opAssoc.LEFT = object()

5034opAssoc.RIGHT = object()

5035

5036def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):

5037 """

5038 Helper method for constructing grammars of expressions made up of

5039 operators working in a precedence hierarchy. Operators may be unary or

5040 binary, left- or right-associative. Parse actions can also be attached

5041 to operator expressions. The generated parser will also recognize the use

5042 of parentheses to override operator precedences (see example below).

5043

5044 Note: if you define a deep operator list, you may see performance issues

5045 when using infixNotation. See L{ParserElement.enablePackrat} for a

5046 mechanism to potentially improve your parser performance.

5047

5048 Parameters:

5049 - baseExpr - expression representing the most basic element for the nested

5050 - opList - list of tuples, one for each operator precedence level in the

5051 expression grammar; each tuple is of the form

5052 (opExpr, numTerms, rightLeftAssoc, parseAction), where:

5053 - opExpr is the pyparsing expression for the operator;

5054 may also be a string, which will be converted to a Literal;

5055 if numTerms is 3, opExpr is a tuple of two expressions, for the

5056 two operators separating the 3 terms

5057 - numTerms is the number of terms for this operator (must

5058 be 1, 2, or 3)

5059 - rightLeftAssoc is the indicator whether the operator is

5060 right or left associative, using the pyparsing-defined

5061 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.

5062 - parseAction is the parse action to be associated with

5063 expressions matching this operator expression (the

5064 parse action tuple member may be omitted); if the parse action

5065 is passed a tuple or list of functions, this is equivalent to

5066 calling C{setParseAction(*fn)} (L{ParserElement.setParseAction})

5067 - lpar - expression for matching left-parentheses (default=C{Suppress('(')})

5068 - rpar - expression for matching right-parentheses (default=C{Suppress(')')})

5069

5070 Example::

5071 # simple example of four-function arithmetic with ints and variable names

5072 integer = pyparsing_common.signed_integer

5073 varname = pyparsing_common.identifier

5074

5075 arith_expr = infixNotation(integer | varname,

5076 [

5077 ('-', 1, opAssoc.RIGHT),

5078 (oneOf('* /'), 2, opAssoc.LEFT),

5079 (oneOf('+ -'), 2, opAssoc.LEFT),

5080 ])

5081

5082 arith_expr.runTests('''

5083 5+3*6

5084 (5+3)*6

5085 -2--11

5086 ''', fullDump=False)

5087 prints::

5088 5+3*6

5089 [[5, '+', [3, '*', 6]]]

5090

5091 (5+3)*6

5092 [[[5, '+', 3], '*', 6]]

5093

5094 -2--11

5095 [[['-', 2], '-', ['-', 11]]]

5096 """

5097 ret = Forward()

5098 lastExpr = baseExpr | ( lpar + ret + rpar )

5099 for i,operDef in enumerate(opList):

5100 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]

5101 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr

5102 if arity == 3:

5103 if opExpr is None or len(opExpr) != 2:

5104 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")

5105 opExpr1, opExpr2 = opExpr

5106 thisExpr = Forward().setName(termName)

5107 if rightLeftAssoc == opAssoc.LEFT:

5108 if arity == 1:

5109 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )

5110 elif arity == 2:

5111 if opExpr is not None:

5112 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )

5113 else:

5114 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )

5115 elif arity == 3:

5116 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \

5117 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )

5118 else:

5119 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")

5120 elif rightLeftAssoc == opAssoc.RIGHT:

5121 if arity == 1:

5122 # try to avoid LR with this extra test

5123 if not isinstance(opExpr, Optional):

5124 opExpr = Optional(opExpr)

5125 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )

5126 elif arity == 2:

5127 if opExpr is not None:

5128 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )

5129 else:

5130 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )

5131 elif arity == 3:

5132 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \

5133 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )

5134 else:

5135 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")

5136 else:

5137 raise ValueError("operator must indicate right or left associativity")

5138 if pa:

5139 if isinstance(pa, (tuple, list)):

5140 matchExpr.setParseAction(*pa)

5141 else:

5142 matchExpr.setParseAction(pa)

5143 thisExpr <<= ( matchExpr.setName(termName) | lastExpr )

5144 lastExpr = thisExpr

5145 ret <<= lastExpr

5146 return ret

5147

5148operatorPrecedence = infixNotation

5149"""(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""

5150

5151dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")

5152sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")

5153quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|

5154 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")

5155unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")

5156

5157def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):

5158 """

5159 Helper method for defining nested lists enclosed in opening and closing

5160 delimiters ("(" and ")" are the default).

5161

5162 Parameters:

5163 - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression

5164 - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression

5165 - content - expression for items within the nested lists (default=C{None})

5166 - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString})

5167

5168 If an expression is not provided for the content argument, the nested

5169 expression will capture all whitespace-delimited content between delimiters

5170 as a list of separate values.

5171

5172 Use the C{ignoreExpr} argument to define expressions that may contain

5173 opening or closing characters that should not be treated as opening

5174 or closing characters for nesting, such as quotedString or a comment

5175 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.

5176 The default is L{quotedString}, but if no expressions are to be ignored,

5177 then pass C{None} for this argument.

5178

5179 Example::

5180 data_type = oneOf("void int short long char float double")

5181 decl_data_type = Combine(data_type + Optional(Word('*')))

5182 ident = Word(alphas+'_', alphanums+'_')

5183 number = pyparsing_common.number

5184 arg = Group(decl_data_type + ident)

5185 LPAR,RPAR = map(Suppress, "()")

5186

5187 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))

5188

5189 c_function = (decl_data_type("type")

5190 + ident("name")

5191 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR

5192 + code_body("body"))

5193 c_function.ignore(cStyleComment)

5194

5195 source_code = '''

5196 int is_odd(int x) {

5197 return (x%2);

5198 }

5199

5200 int dec_to_hex(char hchar) {

5201 if (hchar >= '0' && hchar <= '9') {

5202 return (ord(hchar)-ord('0'));

5203 } else {

5204 return (10+ord(hchar)-ord('A'));

5205 }

5206 }

5207 '''

5208 for func in c_function.searchString(source_code):

5209 print("%(name)s (%(type)s) args: %(args)s" % func)

5210

5211 prints::

5212 is_odd (int) args: [['int', 'x']]

5213 dec_to_hex (int) args: [['char', 'hchar']]

5214 """

5215 if opener == closer:

5216 raise ValueError("opening and closing strings cannot be the same")

5217 if content is None:

5218 if isinstance(opener,basestring) and isinstance(closer,basestring):

5219 if len(opener) == 1 and len(closer)==1:

5220 if ignoreExpr is not None:

5221 content = (Combine(OneOrMore(~ignoreExpr +

5222 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))

5223 ).setParseAction(lambda t:t[0].strip()))

5224 else:

5225 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS

5226 ).setParseAction(lambda t:t[0].strip()))

5227 else:

5228 if ignoreExpr is not None:

5229 content = (Combine(OneOrMore(~ignoreExpr +

5230 ~Literal(opener) + ~Literal(closer) +

5231 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))

5232 ).setParseAction(lambda t:t[0].strip()))

5233 else:

5234 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +

5235 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))

5236 ).setParseAction(lambda t:t[0].strip()))

5237 else:

5238 raise ValueError("opening and closing arguments must be strings if no content expression is given")

5239 ret = Forward()

5240 if ignoreExpr is not None:

5241 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )

5242 else:

5243 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )

5244 ret.setName('nested %s%s expression' % (opener,closer))

5245 return ret

5246

5247def indentedBlock(blockStatementExpr, indentStack, indent=True):

5248 """

5249 Helper method for defining space-delimited indentation blocks, such as

5250 those used to define block statements in Python source code.

5251

5252 Parameters:

5253 - blockStatementExpr - expression defining syntax of statement that

5254 is repeated within the indented block

5255 - indentStack - list created by caller to manage indentation stack

5256 (multiple statementWithIndentedBlock expressions within a single grammar

5257 should share a common indentStack)

5258 - indent - boolean indicating whether block must be indented beyond the

5259 the current level; set to False for block of left-most statements

5260 (default=C{True})

5261

5262 A valid block must contain at least one C{blockStatement}.

5263

5264 Example::

5265 data = '''

5266 def A(z):

5267 A1

5268 B = 100

5269 G = A2

5270 A2

5271 A3

5272 B

5273 def BB(a,b,c):

5274 BB1

5275 def BBA():

5276 bba1

5277 bba2

5278 bba3

5279 C

5280 D

5281 def spam(x,y):

5282 def eggs(z):

5283 pass

5284 '''

5285

5286

5287 indentStack = [1]

5288 stmt = Forward()

5289

5290 identifier = Word(alphas, alphanums)

5291 funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")

5292 func_body = indentedBlock(stmt, indentStack)

5293 funcDef = Group( funcDecl + func_body )

5294

5295 rvalue = Forward()

5296 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")

5297 rvalue << (funcCall | identifier | Word(nums))

5298 assignment = Group(identifier + "=" + rvalue)

5299 stmt << ( funcDef | assignment | identifier )

5300

5301 module_body = OneOrMore(stmt)

5302

5303 parseTree = module_body.parseString(data)

5304 parseTree.pprint()

5305 prints::

5306 [['def',

5307 'A',

5308 ['(', 'z', ')'],

5309 ':',

5310 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],

5311 'B',

5312 ['def',

5313 'BB',

5314 ['(', 'a', 'b', 'c', ')'],

5315 ':',

5316 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],

5317 'C',

5318 'D',

5319 ['def',

5320 'spam',

5321 ['(', 'x', 'y', ')'],

5322 ':',

5323 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]

5324 """

5325 def checkPeerIndent(s,l,t):

5326 if l >= len(s): return

5327 curCol = col(l,s)

5328 if curCol != indentStack[-1]:

5329 if curCol > indentStack[-1]:

5330 raise ParseFatalException(s,l,"illegal nesting")

5331 raise ParseException(s,l,"not a peer entry")

5332

5333 def checkSubIndent(s,l,t):

5334 curCol = col(l,s)

5335 if curCol > indentStack[-1]:

5336 indentStack.append( curCol )

5337 else:

5338 raise ParseException(s,l,"not a subentry")

5339

5340 def checkUnindent(s,l,t):

5341 if l >= len(s): return

5342 curCol = col(l,s)

5343 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):

5344 raise ParseException(s,l,"not an unindent")

5345 indentStack.pop()

5346

5347 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())

5348 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')

5349 PEER = Empty().setParseAction(checkPeerIndent).setName('')

5350 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')

5351 if indent:

5352 smExpr = Group( Optional(NL) +

5353 #~ FollowedBy(blockStatementExpr) +

5354 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)

5355 else:

5356 smExpr = Group( Optional(NL) +

5357 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )

5358 blockStatementExpr.ignore(_bslash + LineEnd())

5359 return smExpr.setName('indented block')

5360

5361alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")

5362punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")

5363

5364anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))

5365_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))

5366commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")

5367def replaceHTMLEntity(t):

5368 """Helper parser action to replace common HTML entities with their special characters"""

5369 return _htmlEntityMap.get(t.entity)

5370

5371# it's easy to get these comment structures wrong - they're very common, so may as well make them available

5372cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")

5373"Comment of the form C{/* ... */}"

5374

5375htmlComment = Regex(r"").setName("HTML comment")

5376"Comment of the form C{}"

5377

5378restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")

5379dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")

5380"Comment of the form C{// ... (to end of line)}"

5381

5382cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")

5383"Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"

5384

5385javaStyleComment = cppStyleComment

5386"Same as C{L{cppStyleComment}}"

5387

5388pythonStyleComment = Regex(r"#.*").setName("Python style comment")

5389"Comment of the form C{# ... (to end of line)}"

5390

5391_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +

5392 Optional( Word(" \t") +

5393 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")

5394commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")

5395"""(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas.

5396 This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}."""

5397

5398# some other useful expressions - using lower-case class name since we are really using this as a namespace

5399class pyparsing_common:

5400 """

5401 Here are some common low-level expressions that may be useful in jump-starting parser development:

5402 - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>})

5403 - common L{programming identifiers<identifier>}

5404 - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})

5405 - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}

5406 - L{UUID<uuid>}

5407 - L{comma-separated list<comma_separated_list>}

5408 Parse actions:

5409 - C{L{convertToInteger}}

5410 - C{L{convertToFloat}}

5411 - C{L{convertToDate}}

5412 - C{L{convertToDatetime}}

5413 - C{L{stripHTMLTags}}

5414 - C{L{upcaseTokens}}

5415 - C{L{downcaseTokens}}

5416

5417 Example::

5418 pyparsing_common.number.runTests('''

5419 # any int or real number, returned as the appropriate type

5420 100

5421 -100

5422 +100

5423 3.14159

5424 6.02e23

5425 1e-12

5426 ''')

5427

5428 pyparsing_common.fnumber.runTests('''

5429 # any int or real number, returned as float

5430 100

5431 -100

5432 +100

5433 3.14159

5434 6.02e23

5435 1e-12

5436 ''')

5437

5438 pyparsing_common.hex_integer.runTests('''

5439 # hex numbers

5440 100

5441 FF

5442 ''')

5443

5444 pyparsing_common.fraction.runTests('''

5445 # fractions

5446 1/2

5447 -3/4

5448 ''')

5449

5450 pyparsing_common.mixed_integer.runTests('''

5451 # mixed fractions

5452 1

5453 1/2

5454 -3/4

5455 1-3/4

5456 ''')

5457

5458 import uuid

5459 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))

5460 pyparsing_common.uuid.runTests('''

5461 # uuid

5462 12345678-1234-5678-1234-567812345678

5463 ''')

5464 prints::

5465 # any int or real number, returned as the appropriate type

5466 100

5467 [100]

5468

5469 -100

5470 [-100]

5471

5472 +100

5473 [100]

5474

5475 3.14159

5476 [3.14159]

5477

5478 6.02e23

5479 [6.02e+23]

5480

5481 1e-12

5482 [1e-12]

5483

5484 # any int or real number, returned as float

5485 100

5486 [100.0]

5487

5488 -100

5489 [-100.0]

5490

5491 +100

5492 [100.0]

5493

5494 3.14159

5495 [3.14159]

5496

5497 6.02e23

5498 [6.02e+23]

5499

5500 1e-12

5501 [1e-12]

5502

5503 # hex numbers

5504 100

5505 [256]

5506

5507 FF

5508 [255]

5509

5510 # fractions

5511 1/2

5512 [0.5]

5513

5514 -3/4

5515 [-0.75]

5516

5517 # mixed fractions

5518 1

5519 [1]

5520

5521 1/2

5522 [0.5]

5523

5524 -3/4

5525 [-0.75]

5526

5527 1-3/4

5528 [1.75]

5529

5530 # uuid

5531 12345678-1234-5678-1234-567812345678

5532 [UUID('12345678-1234-5678-1234-567812345678')]

5533 """

5534

5535 convertToInteger = tokenMap(int)

5536 """

5537 Parse action for converting parsed integers to Python int

5538 """

5539

5540 convertToFloat = tokenMap(float)

5541 """

5542 Parse action for converting parsed numbers to Python float

5543 """

5544

5545 integer = Word(nums).setName("integer").setParseAction(convertToInteger)

5546 """expression that parses an unsigned integer, returns an int"""

5547

5548 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))

5549 """expression that parses a hexadecimal integer, returns an int"""

5550

5551 signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)

5552 """expression that parses an integer with optional leading sign, returns an int"""

5553

5554 fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")

5555 """fractional expression of an integer divided by an integer, returns a float"""

5556 fraction.addParseAction(lambda t: t[0]/t[-1]) 5556 ↛ exitline 5556 didn't run the lambda on line 5556

5557

5558 mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")

5559 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""

5560 mixed_integer.addParseAction(sum)

5561

5562 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)

5563 """expression that parses a floating point number and returns a float"""

5564

5565 sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)

5566 """expression that parses a floating point number with optional scientific notation and returns a float"""

5567

5568 # streamlining this expression makes the docs nicer-looking

5569 number = (sci_real | real | signed_integer).streamline()

5570 """any numeric expression, returns the corresponding Python type"""

5571

5572 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)

5573 """any int or real number, returned as float"""

5574

5575 identifier = Word(alphas+'_', alphanums+'_').setName("identifier")

5576 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""

5577

5578 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")

5579 "IPv4 address (C{0.0.0.0 - 255.255.255.255})"

5580

5581 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")

5582 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")

5583 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")

5584 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) 5584 ↛ exitline 5584 didn't run the lambda on line 5584 or line 5584 didn't run the generator expression on line 5584

5585 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")

5586 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")

5587 "IPv6 address (long, short, or mixed form)"

5588

5589 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")

5590 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"

5591

5592 @staticmethod

5593 def convertToDate(fmt="%Y-%m-%d"):

5594 """

5595 Helper to create a parse action for converting parsed date string to Python datetime.date

5596

5597 Params -

5598 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})

5599

5600 Example::

5601 date_expr = pyparsing_common.iso8601_date.copy()

5602 date_expr.setParseAction(pyparsing_common.convertToDate())

5603 print(date_expr.parseString("1999-12-31"))

5604 prints::

5605 [datetime.date(1999, 12, 31)]

5606 """

5607 def cvt_fn(s,l,t):

5608 try:

5609 return datetime.strptime(t[0], fmt).date()

5610 except ValueError as ve:

5611 raise ParseException(s, l, str(ve))

5612 return cvt_fn

5613

5614 @staticmethod

5615 def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):

5616 """

5617 Helper to create a parse action for converting parsed datetime string to Python datetime.datetime

5618

5619 Params -

5620 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})

5621

5622 Example::

5623 dt_expr = pyparsing_common.iso8601_datetime.copy()

5624 dt_expr.setParseAction(pyparsing_common.convertToDatetime())

5625 print(dt_expr.parseString("1999-12-31T23:59:59.999"))

5626 prints::

5627 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]

5628 """

5629 def cvt_fn(s,l,t):

5630 try:

5631 return datetime.strptime(t[0], fmt)

5632 except ValueError as ve:

5633 raise ParseException(s, l, str(ve))

5634 return cvt_fn

5635

5636 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")

5637 "ISO8601 date (C{yyyy-mm-dd})"

5638

5639 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")

5640 "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"

5641

5642 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")

5643 "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"

5644

5645 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()

5646 @staticmethod

5647 def stripHTMLTags(s, l, tokens):

5648 """

5649 Parse action to remove HTML tags from web page HTML source

5650

5651 Example::

5652 # strip HTML links from normal text

5653 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'

5654 td,td_end = makeHTMLTags("TD")

5655 table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end

5656

5657 print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page'

5658 """

5659 return pyparsing_common._html_stripper.transformString(tokens[0])

5660

5661 _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',')

5662 + Optional( White(" \t") ) ) ).streamline().setName("commaItem")

5663 comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list")

5664 """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""

5665

5666 upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper())) 5666 ↛ exitline 5666 didn't run the lambda on line 5666

5667 """Parse action to convert tokens to upper case."""

5668

5669 downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower())) 5669 ↛ exitline 5669 didn't run the lambda on line 5669

5670 """Parse action to convert tokens to lower case."""

5671

5672

5673if __name__ == "__main__": 5673 ↛ 5675line 5673 didn't jump to line 5675, because the condition on line 5673 was never true

5674

5675 selectToken = CaselessLiteral("select")

5676 fromToken = CaselessLiteral("from")

5677

5678 ident = Word(alphas, alphanums + "_$")

5679

5680 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)

5681 columnNameList = Group(delimitedList(columnName)).setName("columns")

5682 columnSpec = ('*' | columnNameList)

5683

5684 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)

5685 tableNameList = Group(delimitedList(tableName)).setName("tables")

5686

5687 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")

5688

5689 # demo runTests method, including embedded comments in test string

5690 simpleSQL.runTests("""

5691 # '*' as column list and dotted table name

5692 select * from SYS.XYZZY

5693

5694 # caseless match on "SELECT", and casts back to "select"

5695 SELECT * from XYZZY, ABC

5696

5697 # list of column names, and mixed case SELECT keyword

5698 Select AA,BB,CC from Sys.dual

5699

5700 # multiple tables

5701 Select A, B, C from Sys.dual, Table2

5702

5703 # invalid SELECT keyword - should fail

5704 Xelect A, B, C from Sys.dual

5705

5706 # incomplete command - should fail

5707 Select

5708

5709 # invalid column name - should fail

5710 Select ^^^ frox Sys.dual

5711

5712 """)

5713

5714 pyparsing_common.number.runTests("""

5715 100

5716 -100

5717 +100

5718 3.14159

5719 6.02e23

5720 1e-12

5721 """)

5722

5723 # any int or real number, returned as float

5724 pyparsing_common.fnumber.runTests("""

5725 100

5726 -100

5727 +100

5728 3.14159

5729 6.02e23

5730 1e-12

5731 """)

5732

5733 pyparsing_common.hex_integer.runTests("""

5734 100

5735 FF

5736 """)

5737

5738 import uuid

5739 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))

5740 pyparsing_common.uuid.runTests("""

5741 12345678-1234-5678-1234-567812345678

5742 """)

Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pkg_resources/_vendor/pyparsing.py: 43%

2407 statements