Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pkg_resources/_vendor/pyparsing.py: 43%

2407 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1# module pyparsing.py 

2# 

3# Copyright (c) 2003-2018 Paul T. McGuire 

4# 

5# Permission is hereby granted, free of charge, to any person obtaining 

6# a copy of this software and associated documentation files (the 

7# "Software"), to deal in the Software without restriction, including 

8# without limitation the rights to use, copy, modify, merge, publish, 

9# distribute, sublicense, and/or sell copies of the Software, and to 

10# permit persons to whom the Software is furnished to do so, subject to 

11# the following conditions: 

12# 

13# The above copyright notice and this permission notice shall be 

14# included in all copies or substantial portions of the Software. 

15# 

16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 

17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 

18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 

19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 

20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 

21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 

22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 

23# 

24 

25__doc__ = \ 

26""" 

27pyparsing module - Classes and methods to define and execute parsing grammars 

28============================================================================= 

29 

30The pyparsing module is an alternative approach to creating and executing simple grammars, 

31vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you 

32don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 

33provides a library of classes that you use to construct the grammar directly in Python. 

34 

35Here is a program to parse "Hello, World!" (or any greeting of the form  

36C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements  

37(L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to 

38L{Literal} expressions):: 

39 

40 from pyparsing import Word, alphas 

41 

42 # define grammar of a greeting 

43 greet = Word(alphas) + "," + Word(alphas) + "!" 

44 

45 hello = "Hello, World!" 

46 print (hello, "->", greet.parseString(hello)) 

47 

48The program outputs the following:: 

49 

50 Hello, World! -> ['Hello', ',', 'World', '!'] 

51 

52The Python representation of the grammar is quite readable, owing to the self-explanatory 

53class names, and the use of '+', '|' and '^' operators. 

54 

55The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an 

56object with named attributes. 

57 

58The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 

59 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.) 

60 - quoted strings 

61 - embedded comments 

62 

63 

64Getting Started - 

65----------------- 

66Visit the classes L{ParserElement} and L{ParseResults} to see the base classes that most other pyparsing 

67classes inherit from. Use the docstrings for examples of how to: 

68 - construct literal match expressions from L{Literal} and L{CaselessLiteral} classes 

69 - construct character word-group expressions using the L{Word} class 

70 - see how to create repetitive expressions using L{ZeroOrMore} and L{OneOrMore} classes 

71 - use L{'+'<And>}, L{'|'<MatchFirst>}, L{'^'<Or>}, and L{'&'<Each>} operators to combine simple expressions into more complex ones 

72 - associate names with your parsed results using L{ParserElement.setResultsName} 

73 - find some helpful expression short-cuts like L{delimitedList} and L{oneOf} 

74 - find more useful common expressions in the L{pyparsing_common} namespace class 

75""" 

76 

77__version__ = "2.2.1" 

78__versionTime__ = "18 Sep 2018 00:49 UTC" 

79__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 

80 

81import string 

82from weakref import ref as wkref 

83import copy 

84import sys 

85import warnings 

86import re 

87import sre_constants 

88import collections 

89import pprint 

90import traceback 

91import types 

92from datetime import datetime 

93 

94try: 

95 from _thread import RLock 

96except ImportError: 

97 from threading import RLock 

98 

99try: 

100 # Python 3 

101 from collections.abc import Iterable 

102 from collections.abc import MutableMapping 

103except ImportError: 

104 # Python 2.7 

105 from collections import Iterable 

106 from collections import MutableMapping 

107 

108try: 

109 from collections import OrderedDict as _OrderedDict 

110except ImportError: 

111 try: 

112 from ordereddict import OrderedDict as _OrderedDict 

113 except ImportError: 

114 _OrderedDict = None 

115 

116#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 

117 

118__all__ = [ 

119'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 

120'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 

121'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 

122'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 

123'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 

124'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 

125'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 

126'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 

127'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 

128'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 

129'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', 

130'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 

131'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 

132'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 

133'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 

134'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 

135'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', 

136'CloseMatch', 'tokenMap', 'pyparsing_common', 

137] 

138 

139system_version = tuple(sys.version_info)[:3] 

140PY_3 = system_version[0] == 3 

141if PY_3: 141 ↛ 151line 141 didn't jump to line 151, because the condition on line 141 was never false

142 _MAX_INT = sys.maxsize 

143 basestring = str 

144 unichr = chr 

145 _ustr = str 

146 

147 # build list of single arg builtins, that can be used as parse actions 

148 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 

149 

150else: 

151 _MAX_INT = sys.maxint 

152 range = xrange 

153 

154 def _ustr(obj): 

155 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 

156 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 

157 then < returns the unicode object | encodes it with the default encoding | ... >. 

158 """ 

159 if isinstance(obj,unicode): 

160 return obj 

161 

162 try: 

163 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 

164 # it won't break any existing code. 

165 return str(obj) 

166 

167 except UnicodeEncodeError: 

168 # Else encode it 

169 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') 

170 xmlcharref = Regex(r'&#\d+;') 

171 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) 

172 return xmlcharref.transformString(ret) 

173 

174 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 

175 singleArgBuiltins = [] 

176 import __builtin__ 

177 for fname in "sum len sorted reversed list tuple set any all min max".split(): 

178 try: 

179 singleArgBuiltins.append(getattr(__builtin__,fname)) 

180 except AttributeError: 

181 continue 

182 

183_generatorType = type((y for y in range(1))) 183 ↛ exitline 183 didn't run the generator expression on line 183

184 

185def _xml_escape(data): 

186 """Escape &, <, >, ", ', etc. in a string of data.""" 

187 

188 # ampersand must be replaced first 

189 from_symbols = '&><"\'' 

190 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) 

191 for from_,to_ in zip(from_symbols, to_symbols): 

192 data = data.replace(from_, to_) 

193 return data 

194 

195class _Constants(object): 

196 pass 

197 

198alphas = string.ascii_uppercase + string.ascii_lowercase 

199nums = "0123456789" 

200hexnums = nums + "ABCDEFabcdef" 

201alphanums = alphas + nums 

202_bslash = chr(92) 

203printables = "".join(c for c in string.printable if c not in string.whitespace) 

204 

205class ParseBaseException(Exception): 

206 """base exception class for all parsing runtime exceptions""" 

207 # Performance tuning: we construct a *lot* of these, so keep this 

208 # constructor as small and fast as possible 

209 def __init__( self, pstr, loc=0, msg=None, elem=None ): 

210 self.loc = loc 

211 if msg is None: 211 ↛ 212line 211 didn't jump to line 212, because the condition on line 211 was never true

212 self.msg = pstr 

213 self.pstr = "" 

214 else: 

215 self.msg = msg 

216 self.pstr = pstr 

217 self.parserElement = elem 

218 self.args = (pstr, loc, msg) 

219 

220 @classmethod 

221 def _from_exception(cls, pe): 

222 """ 

223 internal factory method to simplify creating one type of ParseException  

224 from another - avoids having __init__ signature conflicts among subclasses 

225 """ 

226 return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement) 

227 

228 def __getattr__( self, aname ): 

229 """supported attributes by name are: 

230 - lineno - returns the line number of the exception text 

231 - col - returns the column number of the exception text 

232 - line - returns the line containing the exception text 

233 """ 

234 if( aname == "lineno" ): 

235 return lineno( self.loc, self.pstr ) 

236 elif( aname in ("col", "column") ): 

237 return col( self.loc, self.pstr ) 

238 elif( aname == "line" ): 

239 return line( self.loc, self.pstr ) 

240 else: 

241 raise AttributeError(aname) 

242 

243 def __str__( self ): 

244 return "%s (at char %d), (line:%d, col:%d)" % \ 

245 ( self.msg, self.loc, self.lineno, self.column ) 

246 def __repr__( self ): 

247 return _ustr(self) 

248 def markInputline( self, markerString = ">!<" ): 

249 """Extracts the exception line from the input string, and marks 

250 the location of the exception with a special symbol. 

251 """ 

252 line_str = self.line 

253 line_column = self.column - 1 

254 if markerString: 

255 line_str = "".join((line_str[:line_column], 

256 markerString, line_str[line_column:])) 

257 return line_str.strip() 

258 def __dir__(self): 

259 return "lineno col line".split() + dir(type(self)) 

260 

261class ParseException(ParseBaseException): 

262 """ 

263 Exception thrown when parse expressions don't match class; 

264 supported attributes by name are: 

265 - lineno - returns the line number of the exception text 

266 - col - returns the column number of the exception text 

267 - line - returns the line containing the exception text 

268  

269 Example:: 

270 try: 

271 Word(nums).setName("integer").parseString("ABC") 

272 except ParseException as pe: 

273 print(pe) 

274 print("column: {}".format(pe.col)) 

275  

276 prints:: 

277 Expected integer (at char 0), (line:1, col:1) 

278 column: 1 

279 """ 

280 pass 

281 

282class ParseFatalException(ParseBaseException): 

283 """user-throwable exception thrown when inconsistent parse content 

284 is found; stops all parsing immediately""" 

285 pass 

286 

287class ParseSyntaxException(ParseFatalException): 

288 """just like L{ParseFatalException}, but thrown internally when an 

289 L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop  

290 immediately because an unbacktrackable syntax error has been found""" 

291 pass 

292 

293#~ class ReparseException(ParseBaseException): 

294 #~ """Experimental class - parse actions can raise this exception to cause 

295 #~ pyparsing to reparse the input string: 

296 #~ - with a modified input string, and/or 

297 #~ - with a modified start location 

298 #~ Set the values of the ReparseException in the constructor, and raise the 

299 #~ exception in a parse action to cause pyparsing to use the new string/location. 

300 #~ Setting the values as None causes no change to be made. 

301 #~ """ 

302 #~ def __init_( self, newstring, restartLoc ): 

303 #~ self.newParseText = newstring 

304 #~ self.reparseLoc = restartLoc 

305 

306class RecursiveGrammarException(Exception): 

307 """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive""" 

308 def __init__( self, parseElementList ): 

309 self.parseElementTrace = parseElementList 

310 

311 def __str__( self ): 

312 return "RecursiveGrammarException: %s" % self.parseElementTrace 

313 

314class _ParseResultsWithOffset(object): 

315 def __init__(self,p1,p2): 

316 self.tup = (p1,p2) 

317 def __getitem__(self,i): 

318 return self.tup[i] 

319 def __repr__(self): 

320 return repr(self.tup[0]) 

321 def setOffset(self,i): 

322 self.tup = (self.tup[0],i) 

323 

324class ParseResults(object): 

325 """ 

326 Structured parse results, to provide multiple means of access to the parsed data: 

327 - as a list (C{len(results)}) 

328 - by list index (C{results[0], results[1]}, etc.) 

329 - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName}) 

330 

331 Example:: 

332 integer = Word(nums) 

333 date_str = (integer.setResultsName("year") + '/'  

334 + integer.setResultsName("month") + '/'  

335 + integer.setResultsName("day")) 

336 # equivalent form: 

337 # date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

338 

339 # parseString returns a ParseResults object 

340 result = date_str.parseString("1999/12/31") 

341 

342 def test(s, fn=repr): 

343 print("%s -> %s" % (s, fn(eval(s)))) 

344 test("list(result)") 

345 test("result[0]") 

346 test("result['month']") 

347 test("result.day") 

348 test("'month' in result") 

349 test("'minutes' in result") 

350 test("result.dump()", str) 

351 prints:: 

352 list(result) -> ['1999', '/', '12', '/', '31'] 

353 result[0] -> '1999' 

354 result['month'] -> '12' 

355 result.day -> '31' 

356 'month' in result -> True 

357 'minutes' in result -> False 

358 result.dump() -> ['1999', '/', '12', '/', '31'] 

359 - day: 31 

360 - month: 12 

361 - year: 1999 

362 """ 

363 def __new__(cls, toklist=None, name=None, asList=True, modal=True ): 

364 if isinstance(toklist, cls): 

365 return toklist 

366 retobj = object.__new__(cls) 

367 retobj.__doinit = True 

368 return retobj 

369 

370 # Performance tuning: we construct a *lot* of these, so keep this 

371 # constructor as small and fast as possible 

372 def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ): 

373 if self.__doinit: 

374 self.__doinit = False 

375 self.__name = None 

376 self.__parent = None 

377 self.__accumNames = {} 

378 self.__asList = asList 

379 self.__modal = modal 

380 if toklist is None: 380 ↛ 381line 380 didn't jump to line 381, because the condition on line 380 was never true

381 toklist = [] 

382 if isinstance(toklist, list): 

383 self.__toklist = toklist[:] 

384 elif isinstance(toklist, _generatorType): 384 ↛ 385line 384 didn't jump to line 385, because the condition on line 384 was never true

385 self.__toklist = list(toklist) 

386 else: 

387 self.__toklist = [toklist] 

388 self.__tokdict = dict() 

389 

390 if name is not None and name: 

391 if not modal: 391 ↛ 392line 391 didn't jump to line 392, because the condition on line 391 was never true

392 self.__accumNames[name] = 0 

393 if isinstance(name,int): 393 ↛ 394line 393 didn't jump to line 394, because the condition on line 393 was never true

394 name = _ustr(name) # will always return a str, but use _ustr for consistency 

395 self.__name = name 

396 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): 

397 if isinstance(toklist,basestring): 

398 toklist = [ toklist ] 

399 if asList: 

400 if isinstance(toklist,ParseResults): 

401 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 

402 else: 

403 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 

404 self[name].__name = name 

405 else: 

406 try: 

407 self[name] = toklist[0] 

408 except (KeyError,TypeError,IndexError): 

409 self[name] = toklist 

410 

411 def __getitem__( self, i ): 

412 if isinstance( i, (int,slice) ): 

413 return self.__toklist[i] 

414 else: 

415 if i not in self.__accumNames: 415 ↛ 418line 415 didn't jump to line 418, because the condition on line 415 was never false

416 return self.__tokdict[i][-1][0] 

417 else: 

418 return ParseResults([ v[0] for v in self.__tokdict[i] ]) 

419 

420 def __setitem__( self, k, v, isinstance=isinstance ): 

421 if isinstance(v,_ParseResultsWithOffset): 

422 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 

423 sub = v[0] 

424 elif isinstance(k,(int,slice)): 424 ↛ 425line 424 didn't jump to line 425, because the condition on line 424 was never true

425 self.__toklist[k] = v 

426 sub = v 

427 else: 

428 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 

429 sub = v 

430 if isinstance(sub,ParseResults): 

431 sub.__parent = wkref(self) 

432 

433 def __delitem__( self, i ): 

434 if isinstance(i,(int,slice)): 434 ↛ 452line 434 didn't jump to line 452, because the condition on line 434 was never false

435 mylen = len( self.__toklist ) 

436 del self.__toklist[i] 

437 

438 # convert int to slice 

439 if isinstance(i, int): 439 ↛ 440line 439 didn't jump to line 440, because the condition on line 439 was never true

440 if i < 0: 

441 i += mylen 

442 i = slice(i, i+1) 

443 # get removed indices 

444 removed = list(range(*i.indices(mylen))) 

445 removed.reverse() 

446 # fixup indices in token dictionary 

447 for name,occurrences in self.__tokdict.items(): 

448 for j in removed: 

449 for k, (value, position) in enumerate(occurrences): 

450 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 

451 else: 

452 del self.__tokdict[i] 

453 

454 def __contains__( self, k ): 

455 return k in self.__tokdict 

456 

457 def __len__( self ): return len( self.__toklist ) 

458 def __bool__(self): return ( not not self.__toklist ) 

459 __nonzero__ = __bool__ 

460 def __iter__( self ): return iter( self.__toklist ) 

461 def __reversed__( self ): return iter( self.__toklist[::-1] ) 461 ↛ exitline 461 didn't return from function '__reversed__', because the return on line 461 wasn't executed

462 def _iterkeys( self ): 

463 if hasattr(self.__tokdict, "iterkeys"): 

464 return self.__tokdict.iterkeys() 

465 else: 

466 return iter(self.__tokdict) 

467 

468 def _itervalues( self ): 

469 return (self[k] for k in self._iterkeys()) 

470 

471 def _iteritems( self ): 

472 return ((k, self[k]) for k in self._iterkeys()) 

473 

474 if PY_3: 474 ↛ 485line 474 didn't jump to line 485, because the condition on line 474 was never false

475 keys = _iterkeys 

476 """Returns an iterator of all named result keys (Python 3.x only).""" 

477 

478 values = _itervalues 

479 """Returns an iterator of all named result values (Python 3.x only).""" 

480 

481 items = _iteritems 

482 """Returns an iterator of all named result key-value tuples (Python 3.x only).""" 

483 

484 else: 

485 iterkeys = _iterkeys 

486 """Returns an iterator of all named result keys (Python 2.x only).""" 

487 

488 itervalues = _itervalues 

489 """Returns an iterator of all named result values (Python 2.x only).""" 

490 

491 iteritems = _iteritems 

492 """Returns an iterator of all named result key-value tuples (Python 2.x only).""" 

493 

494 def keys( self ): 

495 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x).""" 

496 return list(self.iterkeys()) 

497 

498 def values( self ): 

499 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x).""" 

500 return list(self.itervalues()) 

501 

502 def items( self ): 

503 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x).""" 

504 return list(self.iteritems()) 

505 

506 def haskeys( self ): 

507 """Since keys() returns an iterator, this method is helpful in bypassing 

508 code that looks for the existence of any defined results names.""" 

509 return bool(self.__tokdict) 

510 

511 def pop( self, *args, **kwargs): 

512 """ 

513 Removes and returns item at specified index (default=C{last}). 

514 Supports both C{list} and C{dict} semantics for C{pop()}. If passed no 

515 argument or an integer argument, it will use C{list} semantics 

516 and pop tokens from the list of parsed tokens. If passed a  

517 non-integer argument (most likely a string), it will use C{dict} 

518 semantics and pop the corresponding value from any defined  

519 results names. A second default return value argument is  

520 supported, just as in C{dict.pop()}. 

521 

522 Example:: 

523 def remove_first(tokens): 

524 tokens.pop(0) 

525 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 

526 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321'] 

527 

528 label = Word(alphas) 

529 patt = label("LABEL") + OneOrMore(Word(nums)) 

530 print(patt.parseString("AAB 123 321").dump()) 

531 

532 # Use pop() in a parse action to remove named result (note that corresponding value is not 

533 # removed from list form of results) 

534 def remove_LABEL(tokens): 

535 tokens.pop("LABEL") 

536 return tokens 

537 patt.addParseAction(remove_LABEL) 

538 print(patt.parseString("AAB 123 321").dump()) 

539 prints:: 

540 ['AAB', '123', '321'] 

541 - LABEL: AAB 

542 

543 ['AAB', '123', '321'] 

544 """ 

545 if not args: 

546 args = [-1] 

547 for k,v in kwargs.items(): 

548 if k == 'default': 

549 args = (args[0], v) 

550 else: 

551 raise TypeError("pop() got an unexpected keyword argument '%s'" % k) 

552 if (isinstance(args[0], int) or 

553 len(args) == 1 or 

554 args[0] in self): 

555 index = args[0] 

556 ret = self[index] 

557 del self[index] 

558 return ret 

559 else: 

560 defaultvalue = args[1] 

561 return defaultvalue 

562 

563 def get(self, key, defaultValue=None): 

564 """ 

565 Returns named result matching the given key, or if there is no 

566 such name, then returns the given C{defaultValue} or C{None} if no 

567 C{defaultValue} is specified. 

568 

569 Similar to C{dict.get()}. 

570  

571 Example:: 

572 integer = Word(nums) 

573 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")  

574 

575 result = date_str.parseString("1999/12/31") 

576 print(result.get("year")) # -> '1999' 

577 print(result.get("hour", "not specified")) # -> 'not specified' 

578 print(result.get("hour")) # -> None 

579 """ 

580 if key in self: 

581 return self[key] 

582 else: 

583 return defaultValue 

584 

585 def insert( self, index, insStr ): 

586 """ 

587 Inserts new element at location index in the list of parsed tokens. 

588  

589 Similar to C{list.insert()}. 

590 

591 Example:: 

592 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 

593 

594 # use a parse action to insert the parse location in the front of the parsed results 

595 def insert_locn(locn, tokens): 

596 tokens.insert(0, locn) 

597 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321'] 

598 """ 

599 self.__toklist.insert(index, insStr) 

600 # fixup indices in token dictionary 

601 for name,occurrences in self.__tokdict.items(): 

602 for k, (value, position) in enumerate(occurrences): 

603 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) 

604 

605 def append( self, item ): 

606 """ 

607 Add single element to end of ParseResults list of elements. 

608 

609 Example:: 

610 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 

611  

612 # use a parse action to compute the sum of the parsed integers, and add it to the end 

613 def append_sum(tokens): 

614 tokens.append(sum(map(int, tokens))) 

615 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444] 

616 """ 

617 self.__toklist.append(item) 

618 

619 def extend( self, itemseq ): 

620 """ 

621 Add sequence of elements to end of ParseResults list of elements. 

622 

623 Example:: 

624 patt = OneOrMore(Word(alphas)) 

625  

626 # use a parse action to append the reverse of the matched strings, to make a palindrome 

627 def make_palindrome(tokens): 

628 tokens.extend(reversed([t[::-1] for t in tokens])) 

629 return ''.join(tokens) 

630 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' 

631 """ 

632 if isinstance(itemseq, ParseResults): 

633 self += itemseq 

634 else: 

635 self.__toklist.extend(itemseq) 

636 

637 def clear( self ): 

638 """ 

639 Clear all elements and results names. 

640 """ 

641 del self.__toklist[:] 

642 self.__tokdict.clear() 

643 

644 def __getattr__( self, name ): 

645 try: 

646 return self[name] 

647 except KeyError: 

648 return "" 

649 

650 if name in self.__tokdict: 

651 if name not in self.__accumNames: 

652 return self.__tokdict[name][-1][0] 

653 else: 

654 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 

655 else: 

656 return "" 

657 

658 def __add__( self, other ): 

659 ret = self.copy() 

660 ret += other 

661 return ret 

662 

663 def __iadd__( self, other ): 

664 if other.__tokdict: 

665 offset = len(self.__toklist) 

666 addoffset = lambda a: offset if a<0 else a+offset 

667 otheritems = other.__tokdict.items() 

668 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 

669 for (k,vlist) in otheritems for v in vlist] 

670 for k,v in otherdictitems: 

671 self[k] = v 

672 if isinstance(v[0],ParseResults): 

673 v[0].__parent = wkref(self) 

674 

675 self.__toklist += other.__toklist 

676 self.__accumNames.update( other.__accumNames ) 

677 return self 

678 

679 def __radd__(self, other): 

680 if isinstance(other,int) and other == 0: 

681 # useful for merging many ParseResults using sum() builtin 

682 return self.copy() 

683 else: 

684 # this may raise a TypeError - so be it 

685 return other + self 

686 

687 def __repr__( self ): 

688 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) 

689 

690 def __str__( self ): 

691 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']' 

692 

693 def _asStringList( self, sep='' ): 

694 out = [] 

695 for item in self.__toklist: 

696 if out and sep: 

697 out.append(sep) 

698 if isinstance( item, ParseResults ): 698 ↛ 699line 698 didn't jump to line 699, because the condition on line 698 was never true

699 out += item._asStringList() 

700 else: 

701 out.append( _ustr(item) ) 

702 return out 

703 

704 def asList( self ): 

705 """ 

706 Returns the parse results as a nested list of matching tokens, all converted to strings. 

707 

708 Example:: 

709 patt = OneOrMore(Word(alphas)) 

710 result = patt.parseString("sldkj lsdkj sldkj") 

711 # even though the result prints in string-like form, it is actually a pyparsing ParseResults 

712 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj'] 

713  

714 # Use asList() to create an actual list 

715 result_list = result.asList() 

716 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj'] 

717 """ 

718 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist] 

719 

720 def asDict( self ): 

721 """ 

722 Returns the named parse results as a nested dictionary. 

723 

724 Example:: 

725 integer = Word(nums) 

726 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

727  

728 result = date_str.parseString('12/31/1999') 

729 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) 

730  

731 result_dict = result.asDict() 

732 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'} 

733 

734 # even though a ParseResults supports dict-like access, sometime you just need to have a dict 

735 import json 

736 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable 

737 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"} 

738 """ 

739 if PY_3: 

740 item_fn = self.items 

741 else: 

742 item_fn = self.iteritems 

743 

744 def toItem(obj): 

745 if isinstance(obj, ParseResults): 

746 if obj.haskeys(): 

747 return obj.asDict() 

748 else: 

749 return [toItem(v) for v in obj] 

750 else: 

751 return obj 

752 

753 return dict((k,toItem(v)) for k,v in item_fn()) 

754 

755 def copy( self ): 

756 """ 

757 Returns a new copy of a C{ParseResults} object. 

758 """ 

759 ret = ParseResults( self.__toklist ) 

760 ret.__tokdict = self.__tokdict.copy() 

761 ret.__parent = self.__parent 

762 ret.__accumNames.update( self.__accumNames ) 

763 ret.__name = self.__name 

764 return ret 

765 

766 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): 

767 """ 

768 (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names. 

769 """ 

770 nl = "\n" 

771 out = [] 

772 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() 

773 for v in vlist) 

774 nextLevelIndent = indent + " " 

775 

776 # collapse out indents if formatting is not desired 

777 if not formatted: 

778 indent = "" 

779 nextLevelIndent = "" 

780 nl = "" 

781 

782 selfTag = None 

783 if doctag is not None: 

784 selfTag = doctag 

785 else: 

786 if self.__name: 

787 selfTag = self.__name 

788 

789 if not selfTag: 

790 if namedItemsOnly: 

791 return "" 

792 else: 

793 selfTag = "ITEM" 

794 

795 out += [ nl, indent, "<", selfTag, ">" ] 

796 

797 for i,res in enumerate(self.__toklist): 

798 if isinstance(res,ParseResults): 

799 if i in namedItems: 

800 out += [ res.asXML(namedItems[i], 

801 namedItemsOnly and doctag is None, 

802 nextLevelIndent, 

803 formatted)] 

804 else: 

805 out += [ res.asXML(None, 

806 namedItemsOnly and doctag is None, 

807 nextLevelIndent, 

808 formatted)] 

809 else: 

810 # individual token, see if there is a name for it 

811 resTag = None 

812 if i in namedItems: 

813 resTag = namedItems[i] 

814 if not resTag: 

815 if namedItemsOnly: 

816 continue 

817 else: 

818 resTag = "ITEM" 

819 xmlBodyText = _xml_escape(_ustr(res)) 

820 out += [ nl, nextLevelIndent, "<", resTag, ">", 

821 xmlBodyText, 

822 "</", resTag, ">" ] 

823 

824 out += [ nl, indent, "</", selfTag, ">" ] 

825 return "".join(out) 

826 

827 def __lookup(self,sub): 

828 for k,vlist in self.__tokdict.items(): 

829 for v,loc in vlist: 

830 if sub is v: 

831 return k 

832 return None 

833 

834 def getName(self): 

835 r""" 

836 Returns the results name for this token expression. Useful when several  

837 different expressions might match at a particular location. 

838 

839 Example:: 

840 integer = Word(nums) 

841 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") 

842 house_number_expr = Suppress('#') + Word(nums, alphanums) 

843 user_data = (Group(house_number_expr)("house_number")  

844 | Group(ssn_expr)("ssn") 

845 | Group(integer)("age")) 

846 user_info = OneOrMore(user_data) 

847  

848 result = user_info.parseString("22 111-22-3333 #221B") 

849 for item in result: 

850 print(item.getName(), ':', item[0]) 

851 prints:: 

852 age : 22 

853 ssn : 111-22-3333 

854 house_number : 221B 

855 """ 

856 if self.__name: 

857 return self.__name 

858 elif self.__parent: 

859 par = self.__parent() 

860 if par: 

861 return par.__lookup(self) 

862 else: 

863 return None 

864 elif (len(self) == 1 and 

865 len(self.__tokdict) == 1 and 

866 next(iter(self.__tokdict.values()))[0][1] in (0,-1)): 

867 return next(iter(self.__tokdict.keys())) 

868 else: 

869 return None 

870 

871 def dump(self, indent='', depth=0, full=True): 

872 """ 

873 Diagnostic method for listing out the contents of a C{ParseResults}. 

874 Accepts an optional C{indent} argument so that this string can be embedded 

875 in a nested display of other data. 

876 

877 Example:: 

878 integer = Word(nums) 

879 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

880  

881 result = date_str.parseString('12/31/1999') 

882 print(result.dump()) 

883 prints:: 

884 ['12', '/', '31', '/', '1999'] 

885 - day: 1999 

886 - month: 31 

887 - year: 12 

888 """ 

889 out = [] 

890 NL = '\n' 

891 out.append( indent+_ustr(self.asList()) ) 

892 if full: 

893 if self.haskeys(): 

894 items = sorted((str(k), v) for k,v in self.items()) 

895 for k,v in items: 

896 if out: 

897 out.append(NL) 

898 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 

899 if isinstance(v,ParseResults): 

900 if v: 

901 out.append( v.dump(indent,depth+1) ) 

902 else: 

903 out.append(_ustr(v)) 

904 else: 

905 out.append(repr(v)) 

906 elif any(isinstance(vv,ParseResults) for vv in self): 

907 v = self 

908 for i,vv in enumerate(v): 

909 if isinstance(vv,ParseResults): 

910 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) 

911 else: 

912 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) 

913 

914 return "".join(out) 

915 

916 def pprint(self, *args, **kwargs): 

917 """ 

918 Pretty-printer for parsed results as a list, using the C{pprint} module. 

919 Accepts additional positional or keyword args as defined for the  

920 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint}) 

921 

922 Example:: 

923 ident = Word(alphas, alphanums) 

924 num = Word(nums) 

925 func = Forward() 

926 term = ident | num | Group('(' + func + ')') 

927 func <<= ident + Group(Optional(delimitedList(term))) 

928 result = func.parseString("fna a,b,(fnb c,d,200),100") 

929 result.pprint(width=40) 

930 prints:: 

931 ['fna', 

932 ['a', 

933 'b', 

934 ['(', 'fnb', ['c', 'd', '200'], ')'], 

935 '100']] 

936 """ 

937 pprint.pprint(self.asList(), *args, **kwargs) 

938 

939 # add support for pickle protocol 

940 def __getstate__(self): 

941 return ( self.__toklist, 

942 ( self.__tokdict.copy(), 

943 self.__parent is not None and self.__parent() or None, 

944 self.__accumNames, 

945 self.__name ) ) 

946 

947 def __setstate__(self,state): 

948 self.__toklist = state[0] 

949 (self.__tokdict, 

950 par, 

951 inAccumNames, 

952 self.__name) = state[1] 

953 self.__accumNames = {} 

954 self.__accumNames.update(inAccumNames) 

955 if par is not None: 

956 self.__parent = wkref(par) 

957 else: 

958 self.__parent = None 

959 

960 def __getnewargs__(self): 

961 return self.__toklist, self.__name, self.__asList, self.__modal 

962 

963 def __dir__(self): 

964 return (dir(type(self)) + list(self.keys())) 

965 

966MutableMapping.register(ParseResults) 

967 

968def col (loc,strg): 

969 """Returns current column within a string, counting newlines as line separators. 

970 The first column is number 1. 

971 

972 Note: the default parsing behavior is to expand tabs in the input string 

973 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 

974 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 

975 consistent view of the parsed string, the parse location, and line and column 

976 positions within the parsed string. 

977 """ 

978 s = strg 

979 return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc) 

980 

981def lineno(loc,strg): 

982 """Returns current line number within a string, counting newlines as line separators. 

983 The first line is number 1. 

984 

985 Note: the default parsing behavior is to expand tabs in the input string 

986 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 

987 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 

988 consistent view of the parsed string, the parse location, and line and column 

989 positions within the parsed string. 

990 """ 

991 return strg.count("\n",0,loc) + 1 

992 

993def line( loc, strg ): 

994 """Returns the line of text containing loc within a string, counting newlines as line separators. 

995 """ 

996 lastCR = strg.rfind("\n", 0, loc) 

997 nextCR = strg.find("\n", loc) 

998 if nextCR >= 0: 

999 return strg[lastCR+1:nextCR] 

1000 else: 

1001 return strg[lastCR+1:] 

1002 

1003def _defaultStartDebugAction( instring, loc, expr ): 

1004 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))) 

1005 

1006def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): 

1007 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) 

1008 

1009def _defaultExceptionDebugAction( instring, loc, expr, exc ): 

1010 print ("Exception raised:" + _ustr(exc)) 

1011 

1012def nullDebugAction(*args): 

1013 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 

1014 pass 

1015 

1016# Only works on Python 3.x - nonlocal is toxic to Python 2 installs 

1017#~ 'decorator to trim function calls to match the arity of the target' 

1018#~ def _trim_arity(func, maxargs=3): 

1019 #~ if func in singleArgBuiltins: 

1020 #~ return lambda s,l,t: func(t) 

1021 #~ limit = 0 

1022 #~ foundArity = False 

1023 #~ def wrapper(*args): 

1024 #~ nonlocal limit,foundArity 

1025 #~ while 1: 

1026 #~ try: 

1027 #~ ret = func(*args[limit:]) 

1028 #~ foundArity = True 

1029 #~ return ret 

1030 #~ except TypeError: 

1031 #~ if limit == maxargs or foundArity: 

1032 #~ raise 

1033 #~ limit += 1 

1034 #~ continue 

1035 #~ return wrapper 

1036 

1037# this version is Python 2.x-3.x cross-compatible 

1038'decorator to trim function calls to match the arity of the target' 

1039def _trim_arity(func, maxargs=2): 

1040 if func in singleArgBuiltins: 

1041 return lambda s,l,t: func(t) 1041 ↛ exitline 1041 didn't run the lambda on line 1041

1042 limit = [0] 

1043 foundArity = [False] 

1044 

1045 # traceback return data structure changed in Py3.5 - normalize back to plain tuples 

1046 if system_version[:2] >= (3,5): 1046 ↛ 1057line 1046 didn't jump to line 1057, because the condition on line 1046 was never false

1047 def extract_stack(limit=0): 

1048 # special handling for Python 3.5.0 - extra deep call stack by 1 

1049 offset = -3 if system_version == (3,5,0) else -2 

1050 frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset] 

1051 return [frame_summary[:2]] 

1052 def extract_tb(tb, limit=0): 

1053 frames = traceback.extract_tb(tb, limit=limit) 

1054 frame_summary = frames[-1] 

1055 return [frame_summary[:2]] 

1056 else: 

1057 extract_stack = traceback.extract_stack 

1058 extract_tb = traceback.extract_tb 

1059 

1060 # synthesize what would be returned by traceback.extract_stack at the call to  

1061 # user's parse action 'func', so that we don't incur call penalty at parse time 

1062 

1063 LINE_DIFF = 6 

1064 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND  

1065 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 

1066 this_line = extract_stack(limit=2)[-1] 

1067 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF) 

1068 

1069 def wrapper(*args): 

1070 while 1: 

1071 try: 

1072 ret = func(*args[limit[0]:]) 

1073 foundArity[0] = True 

1074 return ret 

1075 except TypeError: 

1076 # re-raise TypeErrors if they did not come from our arity testing 

1077 if foundArity[0]: 

1078 raise 

1079 else: 

1080 try: 

1081 tb = sys.exc_info()[-1] 

1082 if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth: 

1083 raise 

1084 finally: 

1085 del tb 

1086 

1087 if limit[0] <= maxargs: 

1088 limit[0] += 1 

1089 continue 

1090 raise 

1091 

1092 # copy func name to wrapper for sensible debug output 

1093 func_name = "<parse action>" 

1094 try: 

1095 func_name = getattr(func, '__name__', 

1096 getattr(func, '__class__').__name__) 

1097 except Exception: 

1098 func_name = str(func) 

1099 wrapper.__name__ = func_name 

1100 

1101 return wrapper 

1102 

1103class ParserElement(object): 

1104 """Abstract base level parser element class.""" 

1105 DEFAULT_WHITE_CHARS = " \n\t\r" 

1106 verbose_stacktrace = False 

1107 

1108 @staticmethod 

1109 def setDefaultWhitespaceChars( chars ): 

1110 r""" 

1111 Overrides the default whitespace chars 

1112 

1113 Example:: 

1114 # default whitespace chars are space, <TAB> and newline 

1115 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] 

1116  

1117 # change to just treat newline as significant 

1118 ParserElement.setDefaultWhitespaceChars(" \t") 

1119 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def'] 

1120 """ 

1121 ParserElement.DEFAULT_WHITE_CHARS = chars 

1122 

1123 @staticmethod 

1124 def inlineLiteralsUsing(cls): 

1125 """ 

1126 Set class to be used for inclusion of string literals into a parser. 

1127  

1128 Example:: 

1129 # default literal class used is Literal 

1130 integer = Word(nums) 

1131 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")  

1132 

1133 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 

1134 

1135 

1136 # change to Suppress 

1137 ParserElement.inlineLiteralsUsing(Suppress) 

1138 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")  

1139 

1140 date_str.parseString("1999/12/31") # -> ['1999', '12', '31'] 

1141 """ 

1142 ParserElement._literalStringClass = cls 

1143 

1144 def __init__( self, savelist=False ): 

1145 self.parseAction = list() 

1146 self.failAction = None 

1147 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 

1148 self.strRepr = None 

1149 self.resultsName = None 

1150 self.saveAsList = savelist 

1151 self.skipWhitespace = True 

1152 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 

1153 self.copyDefaultWhiteChars = True 

1154 self.mayReturnEmpty = False # used when checking for left-recursion 

1155 self.keepTabs = False 

1156 self.ignoreExprs = list() 

1157 self.debug = False 

1158 self.streamlined = False 

1159 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 

1160 self.errmsg = "" 

1161 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 

1162 self.debugActions = ( None, None, None ) #custom debug actions 

1163 self.re = None 

1164 self.callPreparse = True # used to avoid redundant calls to preParse 

1165 self.callDuringTry = False 

1166 

1167 def copy( self ): 

1168 """ 

1169 Make a copy of this C{ParserElement}. Useful for defining different parse actions 

1170 for the same parsing pattern, using copies of the original parse element. 

1171  

1172 Example:: 

1173 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 

1174 integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K") 

1175 integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") 

1176  

1177 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M")) 

1178 prints:: 

1179 [5120, 100, 655360, 268435456] 

1180 Equivalent form of C{expr.copy()} is just C{expr()}:: 

1181 integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") 

1182 """ 

1183 cpy = copy.copy( self ) 

1184 cpy.parseAction = self.parseAction[:] 

1185 cpy.ignoreExprs = self.ignoreExprs[:] 

1186 if self.copyDefaultWhiteChars: 

1187 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 

1188 return cpy 

1189 

1190 def setName( self, name ): 

1191 """ 

1192 Define name for this expression, makes debugging and exception messages clearer. 

1193  

1194 Example:: 

1195 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1) 

1196 Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) 

1197 """ 

1198 self.name = name 

1199 self.errmsg = "Expected " + self.name 

1200 if hasattr(self,"exception"): 1200 ↛ 1201line 1200 didn't jump to line 1201, because the condition on line 1200 was never true

1201 self.exception.msg = self.errmsg 

1202 return self 

1203 

1204 def setResultsName( self, name, listAllMatches=False ): 

1205 """ 

1206 Define name for referencing matching tokens as a nested attribute 

1207 of the returned parse results. 

1208 NOTE: this returns a *copy* of the original C{ParserElement} object; 

1209 this is so that the client can define a basic element, such as an 

1210 integer, and reference it in multiple places with different names. 

1211 

1212 You can also set results names using the abbreviated syntax, 

1213 C{expr("name")} in place of C{expr.setResultsName("name")} -  

1214 see L{I{__call__}<__call__>}. 

1215 

1216 Example:: 

1217 date_str = (integer.setResultsName("year") + '/'  

1218 + integer.setResultsName("month") + '/'  

1219 + integer.setResultsName("day")) 

1220 

1221 # equivalent form: 

1222 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

1223 """ 

1224 newself = self.copy() 

1225 if name.endswith("*"): 1225 ↛ 1226line 1225 didn't jump to line 1226, because the condition on line 1225 was never true

1226 name = name[:-1] 

1227 listAllMatches=True 

1228 newself.resultsName = name 

1229 newself.modalResults = not listAllMatches 

1230 return newself 

1231 

1232 def setBreak(self,breakFlag = True): 

1233 """Method to invoke the Python pdb debugger when this element is 

1234 about to be parsed. Set C{breakFlag} to True to enable, False to 

1235 disable. 

1236 """ 

1237 if breakFlag: 

1238 _parseMethod = self._parse 

1239 def breaker(instring, loc, doActions=True, callPreParse=True): 

1240 import pdb 

1241 pdb.set_trace() 

1242 return _parseMethod( instring, loc, doActions, callPreParse ) 

1243 breaker._originalParseMethod = _parseMethod 

1244 self._parse = breaker 

1245 else: 

1246 if hasattr(self._parse,"_originalParseMethod"): 

1247 self._parse = self._parse._originalParseMethod 

1248 return self 

1249 

1250 def setParseAction( self, *fns, **kwargs ): 

1251 """ 

1252 Define one or more actions to perform when successfully matching parse element definition. 

1253 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 

1254 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 

1255 - s = the original string being parsed (see note below) 

1256 - loc = the location of the matching substring 

1257 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 

1258 If the functions in fns modify the tokens, they can return them as the return 

1259 value from fn, and the modified list of tokens will replace the original. 

1260 Otherwise, fn does not need to return any value. 

1261 

1262 Optional keyword arguments: 

1263 - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing 

1264 

1265 Note: the default parsing behavior is to expand tabs in the input string 

1266 before starting the parsing process. See L{I{parseString}<parseString>} for more information 

1267 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 

1268 consistent view of the parsed string, the parse location, and line and column 

1269 positions within the parsed string. 

1270  

1271 Example:: 

1272 integer = Word(nums) 

1273 date_str = integer + '/' + integer + '/' + integer 

1274 

1275 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 

1276 

1277 # use parse action to convert to ints at parse time 

1278 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 

1279 date_str = integer + '/' + integer + '/' + integer 

1280 

1281 # note that integer fields are now ints, not strings 

1282 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31] 

1283 """ 

1284 self.parseAction = list(map(_trim_arity, list(fns))) 

1285 self.callDuringTry = kwargs.get("callDuringTry", False) 

1286 return self 

1287 

1288 def addParseAction( self, *fns, **kwargs ): 

1289 """ 

1290 Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}. 

1291  

1292 See examples in L{I{copy}<copy>}. 

1293 """ 

1294 self.parseAction += list(map(_trim_arity, list(fns))) 

1295 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 

1296 return self 

1297 

1298 def addCondition(self, *fns, **kwargs): 

1299 """Add a boolean predicate function to expression's list of parse actions. See  

1300 L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},  

1301 functions passed to C{addCondition} need to return boolean success/fail of the condition. 

1302 

1303 Optional keyword arguments: 

1304 - message = define a custom message to be used in the raised exception 

1305 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException 

1306  

1307 Example:: 

1308 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 

1309 year_int = integer.copy() 

1310 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") 

1311 date_str = year_int + '/' + integer + '/' + integer 

1312 

1313 result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1) 

1314 """ 

1315 msg = kwargs.get("message", "failed user-defined condition") 

1316 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException 

1317 for fn in fns: 

1318 def pa(s,l,t): 

1319 if not bool(_trim_arity(fn)(s,l,t)): 

1320 raise exc_type(s,l,msg) 

1321 self.parseAction.append(pa) 

1322 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 

1323 return self 

1324 

1325 def setFailAction( self, fn ): 

1326 """Define action to perform if parsing fails at this expression. 

1327 Fail acton fn is a callable function that takes the arguments 

1328 C{fn(s,loc,expr,err)} where: 

1329 - s = string being parsed 

1330 - loc = location where expression match was attempted and failed 

1331 - expr = the parse expression that failed 

1332 - err = the exception thrown 

1333 The function returns no value. It may throw C{L{ParseFatalException}} 

1334 if it is desired to stop parsing immediately.""" 

1335 self.failAction = fn 

1336 return self 

1337 

1338 def _skipIgnorables( self, instring, loc ): 

1339 exprsFound = True 

1340 while exprsFound: 

1341 exprsFound = False 

1342 for e in self.ignoreExprs: 

1343 try: 

1344 while 1: 

1345 loc,dummy = e._parse( instring, loc ) 

1346 exprsFound = True 

1347 except ParseException: 

1348 pass 

1349 return loc 

1350 

1351 def preParse( self, instring, loc ): 

1352 if self.ignoreExprs: 1352 ↛ 1353line 1352 didn't jump to line 1353, because the condition on line 1352 was never true

1353 loc = self._skipIgnorables( instring, loc ) 

1354 

1355 if self.skipWhitespace: 

1356 wt = self.whiteChars 

1357 instrlen = len(instring) 

1358 while loc < instrlen and instring[loc] in wt: 

1359 loc += 1 

1360 

1361 return loc 

1362 

1363 def parseImpl( self, instring, loc, doActions=True ): 

1364 return loc, [] 

1365 

1366 def postParse( self, instring, loc, tokenlist ): 

1367 return tokenlist 

1368 

1369 #~ @profile 

1370 def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): 

1371 debugging = ( self.debug ) #and doActions ) 

1372 

1373 if debugging or self.failAction: 1373 ↛ 1375line 1373 didn't jump to line 1375, because the condition on line 1373 was never true

1374 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 

1375 if (self.debugActions[0] ): 

1376 self.debugActions[0]( instring, loc, self ) 

1377 if callPreParse and self.callPreparse: 

1378 preloc = self.preParse( instring, loc ) 

1379 else: 

1380 preloc = loc 

1381 tokensStart = preloc 

1382 try: 

1383 try: 

1384 loc,tokens = self.parseImpl( instring, preloc, doActions ) 

1385 except IndexError: 

1386 raise ParseException( instring, len(instring), self.errmsg, self ) 

1387 except ParseBaseException as err: 

1388 #~ print ("Exception raised:", err) 

1389 if self.debugActions[2]: 

1390 self.debugActions[2]( instring, tokensStart, self, err ) 

1391 if self.failAction: 

1392 self.failAction( instring, tokensStart, self, err ) 

1393 raise 

1394 else: 

1395 if callPreParse and self.callPreparse: 

1396 preloc = self.preParse( instring, loc ) 

1397 else: 

1398 preloc = loc 

1399 tokensStart = preloc 

1400 if self.mayIndexError or preloc >= len(instring): 

1401 try: 

1402 loc,tokens = self.parseImpl( instring, preloc, doActions ) 

1403 except IndexError: 

1404 raise ParseException( instring, len(instring), self.errmsg, self ) 

1405 else: 

1406 loc,tokens = self.parseImpl( instring, preloc, doActions ) 

1407 

1408 tokens = self.postParse( instring, loc, tokens ) 

1409 

1410 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 

1411 if self.parseAction and (doActions or self.callDuringTry): 

1412 if debugging: 1412 ↛ 1413line 1412 didn't jump to line 1413, because the condition on line 1412 was never true

1413 try: 

1414 for fn in self.parseAction: 

1415 tokens = fn( instring, tokensStart, retTokens ) 

1416 if tokens is not None: 

1417 retTokens = ParseResults( tokens, 

1418 self.resultsName, 

1419 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 

1420 modal=self.modalResults ) 

1421 except ParseBaseException as err: 

1422 #~ print "Exception raised in user parse action:", err 

1423 if (self.debugActions[2] ): 

1424 self.debugActions[2]( instring, tokensStart, self, err ) 

1425 raise 

1426 else: 

1427 for fn in self.parseAction: 

1428 tokens = fn( instring, tokensStart, retTokens ) 

1429 if tokens is not None: 1429 ↛ 1427line 1429 didn't jump to line 1427, because the condition on line 1429 was never false

1430 retTokens = ParseResults( tokens, 

1431 self.resultsName, 

1432 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 

1433 modal=self.modalResults ) 

1434 if debugging: 1434 ↛ 1436line 1434 didn't jump to line 1436, because the condition on line 1434 was never true

1435 #~ print ("Matched",self,"->",retTokens.asList()) 

1436 if (self.debugActions[1] ): 

1437 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 

1438 

1439 return loc, retTokens 

1440 

1441 def tryParse( self, instring, loc ): 

1442 try: 

1443 return self._parse( instring, loc, doActions=False )[0] 

1444 except ParseFatalException: 

1445 raise ParseException( instring, loc, self.errmsg, self) 

1446 

1447 def canParseNext(self, instring, loc): 

1448 try: 

1449 self.tryParse(instring, loc) 

1450 except (ParseException, IndexError): 

1451 return False 

1452 else: 

1453 return True 

1454 

1455 class _UnboundedCache(object): 

1456 def __init__(self): 

1457 cache = {} 

1458 self.not_in_cache = not_in_cache = object() 

1459 

1460 def get(self, key): 

1461 return cache.get(key, not_in_cache) 

1462 

1463 def set(self, key, value): 

1464 cache[key] = value 

1465 

1466 def clear(self): 

1467 cache.clear() 

1468 

1469 def cache_len(self): 

1470 return len(cache) 

1471 

1472 self.get = types.MethodType(get, self) 

1473 self.set = types.MethodType(set, self) 

1474 self.clear = types.MethodType(clear, self) 

1475 self.__len__ = types.MethodType(cache_len, self) 

1476 

1477 if _OrderedDict is not None: 1477 ↛ 1507line 1477 didn't jump to line 1507, because the condition on line 1477 was never false

1478 class _FifoCache(object): 

1479 def __init__(self, size): 

1480 self.not_in_cache = not_in_cache = object() 

1481 

1482 cache = _OrderedDict() 

1483 

1484 def get(self, key): 

1485 return cache.get(key, not_in_cache) 

1486 

1487 def set(self, key, value): 

1488 cache[key] = value 

1489 while len(cache) > size: 

1490 try: 

1491 cache.popitem(False) 

1492 except KeyError: 

1493 pass 

1494 

1495 def clear(self): 

1496 cache.clear() 

1497 

1498 def cache_len(self): 

1499 return len(cache) 

1500 

1501 self.get = types.MethodType(get, self) 

1502 self.set = types.MethodType(set, self) 

1503 self.clear = types.MethodType(clear, self) 

1504 self.__len__ = types.MethodType(cache_len, self) 

1505 

1506 else: 

1507 class _FifoCache(object): 

1508 def __init__(self, size): 

1509 self.not_in_cache = not_in_cache = object() 

1510 

1511 cache = {} 

1512 key_fifo = collections.deque([], size) 

1513 

1514 def get(self, key): 

1515 return cache.get(key, not_in_cache) 

1516 

1517 def set(self, key, value): 

1518 cache[key] = value 

1519 while len(key_fifo) > size: 

1520 cache.pop(key_fifo.popleft(), None) 

1521 key_fifo.append(key) 

1522 

1523 def clear(self): 

1524 cache.clear() 

1525 key_fifo.clear() 

1526 

1527 def cache_len(self): 

1528 return len(cache) 

1529 

1530 self.get = types.MethodType(get, self) 

1531 self.set = types.MethodType(set, self) 

1532 self.clear = types.MethodType(clear, self) 

1533 self.__len__ = types.MethodType(cache_len, self) 

1534 

1535 # argument cache for optimizing repeated calls when backtracking through recursive expressions 

1536 packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail 

1537 packrat_cache_lock = RLock() 

1538 packrat_cache_stats = [0, 0] 

1539 

1540 # this method gets repeatedly called during backtracking with the same arguments - 

1541 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression 

1542 def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): 

1543 HIT, MISS = 0, 1 

1544 lookup = (self, instring, loc, callPreParse, doActions) 

1545 with ParserElement.packrat_cache_lock: 

1546 cache = ParserElement.packrat_cache 

1547 value = cache.get(lookup) 

1548 if value is cache.not_in_cache: 

1549 ParserElement.packrat_cache_stats[MISS] += 1 

1550 try: 

1551 value = self._parseNoCache(instring, loc, doActions, callPreParse) 

1552 except ParseBaseException as pe: 

1553 # cache a copy of the exception, without the traceback 

1554 cache.set(lookup, pe.__class__(*pe.args)) 

1555 raise 

1556 else: 

1557 cache.set(lookup, (value[0], value[1].copy())) 

1558 return value 

1559 else: 

1560 ParserElement.packrat_cache_stats[HIT] += 1 

1561 if isinstance(value, Exception): 

1562 raise value 

1563 return (value[0], value[1].copy()) 

1564 

1565 _parse = _parseNoCache 

1566 

1567 @staticmethod 

1568 def resetCache(): 

1569 ParserElement.packrat_cache.clear() 

1570 ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats) 

1571 

1572 _packratEnabled = False 

1573 @staticmethod 

1574 def enablePackrat(cache_size_limit=128): 

1575 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 

1576 Repeated parse attempts at the same string location (which happens 

1577 often in many complex grammars) can immediately return a cached value, 

1578 instead of re-executing parsing/validating code. Memoizing is done of 

1579 both valid results and parsing exceptions. 

1580  

1581 Parameters: 

1582 - cache_size_limit - (default=C{128}) - if an integer value is provided 

1583 will limit the size of the packrat cache; if None is passed, then 

1584 the cache size will be unbounded; if 0 is passed, the cache will 

1585 be effectively disabled. 

1586  

1587 This speedup may break existing programs that use parse actions that 

1588 have side-effects. For this reason, packrat parsing is disabled when 

1589 you first import pyparsing. To activate the packrat feature, your 

1590 program must call the class method C{ParserElement.enablePackrat()}. If 

1591 your program uses C{psyco} to "compile as you go", you must call 

1592 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 

1593 Python will crash. For best results, call C{enablePackrat()} immediately 

1594 after importing pyparsing. 

1595  

1596 Example:: 

1597 import pyparsing 

1598 pyparsing.ParserElement.enablePackrat() 

1599 """ 

1600 if not ParserElement._packratEnabled: 

1601 ParserElement._packratEnabled = True 

1602 if cache_size_limit is None: 

1603 ParserElement.packrat_cache = ParserElement._UnboundedCache() 

1604 else: 

1605 ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit) 

1606 ParserElement._parse = ParserElement._parseCache 

1607 

1608 def parseString( self, instring, parseAll=False ): 

1609 """ 

1610 Execute the parse expression with the given string. 

1611 This is the main interface to the client code, once the complete 

1612 expression has been built. 

1613 

1614 If you want the grammar to require that the entire input string be 

1615 successfully parsed, then set C{parseAll} to True (equivalent to ending 

1616 the grammar with C{L{StringEnd()}}). 

1617 

1618 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 

1619 in order to report proper column numbers in parse actions. 

1620 If the input string contains tabs and 

1621 the grammar uses parse actions that use the C{loc} argument to index into the 

1622 string being parsed, you can ensure you have a consistent view of the input 

1623 string by: 

1624 - calling C{parseWithTabs} on your grammar before calling C{parseString} 

1625 (see L{I{parseWithTabs}<parseWithTabs>}) 

1626 - define your parse action using the full C{(s,loc,toks)} signature, and 

1627 reference the input string using the parse action's C{s} argument 

1628 - explictly expand the tabs in your input string before calling 

1629 C{parseString} 

1630  

1631 Example:: 

1632 Word('a').parseString('aaaaabaaa') # -> ['aaaaa'] 

1633 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text 

1634 """ 

1635 ParserElement.resetCache() 

1636 if not self.streamlined: 

1637 self.streamline() 

1638 #~ self.saveAsList = True 

1639 for e in self.ignoreExprs: 1639 ↛ 1640line 1639 didn't jump to line 1640, because the loop on line 1639 never started

1640 e.streamline() 

1641 if not self.keepTabs: 1641 ↛ 1643line 1641 didn't jump to line 1643, because the condition on line 1641 was never false

1642 instring = instring.expandtabs() 

1643 try: 

1644 loc, tokens = self._parse( instring, 0 ) 

1645 if parseAll: 1645 ↛ 1646line 1645 didn't jump to line 1646, because the condition on line 1645 was never true

1646 loc = self.preParse( instring, loc ) 

1647 se = Empty() + StringEnd() 

1648 se._parse( instring, loc ) 

1649 except ParseBaseException as exc: 

1650 if ParserElement.verbose_stacktrace: 

1651 raise 

1652 else: 

1653 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1654 raise exc 

1655 else: 

1656 return tokens 

1657 

1658 def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): 

1659 """ 

1660 Scan the input string for expression matches. Each match will return the 

1661 matching tokens, start location, and end location. May be called with optional 

1662 C{maxMatches} argument, to clip scanning after 'n' matches are found. If 

1663 C{overlap} is specified, then overlapping matches will be reported. 

1664 

1665 Note that the start and end locations are reported relative to the string 

1666 being parsed. See L{I{parseString}<parseString>} for more information on parsing 

1667 strings with embedded tabs. 

1668 

1669 Example:: 

1670 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 

1671 print(source) 

1672 for tokens,start,end in Word(alphas).scanString(source): 

1673 print(' '*start + '^'*(end-start)) 

1674 print(' '*start + tokens[0]) 

1675  

1676 prints:: 

1677  

1678 sldjf123lsdjjkf345sldkjf879lkjsfd987 

1679 ^^^^^ 

1680 sldjf 

1681 ^^^^^^^ 

1682 lsdjjkf 

1683 ^^^^^^ 

1684 sldkjf 

1685 ^^^^^^ 

1686 lkjsfd 

1687 """ 

1688 if not self.streamlined: 

1689 self.streamline() 

1690 for e in self.ignoreExprs: 

1691 e.streamline() 

1692 

1693 if not self.keepTabs: 

1694 instring = _ustr(instring).expandtabs() 

1695 instrlen = len(instring) 

1696 loc = 0 

1697 preparseFn = self.preParse 

1698 parseFn = self._parse 

1699 ParserElement.resetCache() 

1700 matches = 0 

1701 try: 

1702 while loc <= instrlen and matches < maxMatches: 

1703 try: 

1704 preloc = preparseFn( instring, loc ) 

1705 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 

1706 except ParseException: 

1707 loc = preloc+1 

1708 else: 

1709 if nextLoc > loc: 

1710 matches += 1 

1711 yield tokens, preloc, nextLoc 

1712 if overlap: 

1713 nextloc = preparseFn( instring, loc ) 

1714 if nextloc > loc: 

1715 loc = nextLoc 

1716 else: 

1717 loc += 1 

1718 else: 

1719 loc = nextLoc 

1720 else: 

1721 loc = preloc+1 

1722 except ParseBaseException as exc: 

1723 if ParserElement.verbose_stacktrace: 

1724 raise 

1725 else: 

1726 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1727 raise exc 

1728 

1729 def transformString( self, instring ): 

1730 """ 

1731 Extension to C{L{scanString}}, to modify matching text with modified tokens that may 

1732 be returned from a parse action. To use C{transformString}, define a grammar and 

1733 attach a parse action to it that modifies the returned token list. 

1734 Invoking C{transformString()} on a target string will then scan for matches, 

1735 and replace the matched text patterns according to the logic in the parse 

1736 action. C{transformString()} returns the resulting transformed string. 

1737  

1738 Example:: 

1739 wd = Word(alphas) 

1740 wd.setParseAction(lambda toks: toks[0].title()) 

1741  

1742 print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york.")) 

1743 Prints:: 

1744 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. 

1745 """ 

1746 out = [] 

1747 lastE = 0 

1748 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 

1749 # keep string locs straight between transformString and scanString 

1750 self.keepTabs = True 

1751 try: 

1752 for t,s,e in self.scanString( instring ): 

1753 out.append( instring[lastE:s] ) 

1754 if t: 

1755 if isinstance(t,ParseResults): 

1756 out += t.asList() 

1757 elif isinstance(t,list): 

1758 out += t 

1759 else: 

1760 out.append(t) 

1761 lastE = e 

1762 out.append(instring[lastE:]) 

1763 out = [o for o in out if o] 

1764 return "".join(map(_ustr,_flatten(out))) 

1765 except ParseBaseException as exc: 

1766 if ParserElement.verbose_stacktrace: 

1767 raise 

1768 else: 

1769 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1770 raise exc 

1771 

1772 def searchString( self, instring, maxMatches=_MAX_INT ): 

1773 """ 

1774 Another extension to C{L{scanString}}, simplifying the access to the tokens found 

1775 to match the given parse expression. May be called with optional 

1776 C{maxMatches} argument, to clip searching after 'n' matches are found. 

1777  

1778 Example:: 

1779 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters 

1780 cap_word = Word(alphas.upper(), alphas.lower()) 

1781  

1782 print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")) 

1783 

1784 # the sum() builtin can be used to merge results into a single ParseResults object 

1785 print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))) 

1786 prints:: 

1787 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] 

1788 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] 

1789 """ 

1790 try: 

1791 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 

1792 except ParseBaseException as exc: 

1793 if ParserElement.verbose_stacktrace: 

1794 raise 

1795 else: 

1796 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

1797 raise exc 

1798 

1799 def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False): 

1800 """ 

1801 Generator method to split a string using the given expression as a separator. 

1802 May be called with optional C{maxsplit} argument, to limit the number of splits; 

1803 and the optional C{includeSeparators} argument (default=C{False}), if the separating 

1804 matching text should be included in the split results. 

1805  

1806 Example::  

1807 punc = oneOf(list(".,;:/-!?")) 

1808 print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) 

1809 prints:: 

1810 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 

1811 """ 

1812 splits = 0 

1813 last = 0 

1814 for t,s,e in self.scanString(instring, maxMatches=maxsplit): 

1815 yield instring[last:s] 

1816 if includeSeparators: 

1817 yield t[0] 

1818 last = e 

1819 yield instring[last:] 

1820 

1821 def __add__(self, other ): 

1822 """ 

1823 Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement 

1824 converts them to L{Literal}s by default. 

1825  

1826 Example:: 

1827 greet = Word(alphas) + "," + Word(alphas) + "!" 

1828 hello = "Hello, World!" 

1829 print (hello, "->", greet.parseString(hello)) 

1830 Prints:: 

1831 Hello, World! -> ['Hello', ',', 'World', '!'] 

1832 """ 

1833 if isinstance( other, basestring ): 

1834 other = ParserElement._literalStringClass( other ) 

1835 if not isinstance( other, ParserElement ): 1835 ↛ 1836line 1835 didn't jump to line 1836, because the condition on line 1835 was never true

1836 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 

1837 SyntaxWarning, stacklevel=2) 

1838 return None 

1839 return And( [ self, other ] ) 

1840 

1841 def __radd__(self, other ): 

1842 """ 

1843 Implementation of + operator when left operand is not a C{L{ParserElement}} 

1844 """ 

1845 if isinstance( other, basestring ): 1845 ↛ 1847line 1845 didn't jump to line 1847, because the condition on line 1845 was never false

1846 other = ParserElement._literalStringClass( other ) 

1847 if not isinstance( other, ParserElement ): 1847 ↛ 1848line 1847 didn't jump to line 1848, because the condition on line 1847 was never true

1848 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 

1849 SyntaxWarning, stacklevel=2) 

1850 return None 

1851 return other + self 

1852 

1853 def __sub__(self, other): 

1854 """ 

1855 Implementation of - operator, returns C{L{And}} with error stop 

1856 """ 

1857 if isinstance( other, basestring ): 

1858 other = ParserElement._literalStringClass( other ) 

1859 if not isinstance( other, ParserElement ): 

1860 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 

1861 SyntaxWarning, stacklevel=2) 

1862 return None 

1863 return self + And._ErrorStop() + other 

1864 

1865 def __rsub__(self, other ): 

1866 """ 

1867 Implementation of - operator when left operand is not a C{L{ParserElement}} 

1868 """ 

1869 if isinstance( other, basestring ): 

1870 other = ParserElement._literalStringClass( other ) 

1871 if not isinstance( other, ParserElement ): 

1872 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 

1873 SyntaxWarning, stacklevel=2) 

1874 return None 

1875 return other - self 

1876 

1877 def __mul__(self,other): 

1878 """ 

1879 Implementation of * operator, allows use of C{expr * 3} in place of 

1880 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 

1881 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 

1882 may also include C{None} as in: 

1883 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 

1884 to C{expr*n + L{ZeroOrMore}(expr)} 

1885 (read as "at least n instances of C{expr}") 

1886 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 

1887 (read as "0 to n instances of C{expr}") 

1888 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 

1889 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 

1890 

1891 Note that C{expr*(None,n)} does not raise an exception if 

1892 more than n exprs exist in the input stream; that is, 

1893 C{expr*(None,n)} does not enforce a maximum number of expr 

1894 occurrences. If this behavior is desired, then write 

1895 C{expr*(None,n) + ~expr} 

1896 """ 

1897 if isinstance(other,int): 

1898 minElements, optElements = other,0 

1899 elif isinstance(other,tuple): 1899 ↛ 1916line 1899 didn't jump to line 1916, because the condition on line 1899 was never false

1900 other = (other + (None, None))[:2] 

1901 if other[0] is None: 1901 ↛ 1902line 1901 didn't jump to line 1902, because the condition on line 1901 was never true

1902 other = (0, other[1]) 

1903 if isinstance(other[0],int) and other[1] is None: 1903 ↛ 1904line 1903 didn't jump to line 1904, because the condition on line 1903 was never true

1904 if other[0] == 0: 

1905 return ZeroOrMore(self) 

1906 if other[0] == 1: 

1907 return OneOrMore(self) 

1908 else: 

1909 return self*other[0] + ZeroOrMore(self) 

1910 elif isinstance(other[0],int) and isinstance(other[1],int): 1910 ↛ 1914line 1910 didn't jump to line 1914, because the condition on line 1910 was never false

1911 minElements, optElements = other 

1912 optElements -= minElements 

1913 else: 

1914 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 

1915 else: 

1916 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 

1917 

1918 if minElements < 0: 1918 ↛ 1919line 1918 didn't jump to line 1919, because the condition on line 1918 was never true

1919 raise ValueError("cannot multiply ParserElement by negative value") 

1920 if optElements < 0: 1920 ↛ 1921line 1920 didn't jump to line 1921, because the condition on line 1920 was never true

1921 raise ValueError("second tuple value must be greater or equal to first tuple value") 

1922 if minElements == optElements == 0: 1922 ↛ 1923line 1922 didn't jump to line 1923, because the condition on line 1922 was never true

1923 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 

1924 

1925 if (optElements): 

1926 def makeOptionalList(n): 

1927 if n>1: 

1928 return Optional(self + makeOptionalList(n-1)) 

1929 else: 

1930 return Optional(self) 

1931 if minElements: 1931 ↛ 1932line 1931 didn't jump to line 1932, because the condition on line 1931 was never true

1932 if minElements == 1: 

1933 ret = self + makeOptionalList(optElements) 

1934 else: 

1935 ret = And([self]*minElements) + makeOptionalList(optElements) 

1936 else: 

1937 ret = makeOptionalList(optElements) 

1938 else: 

1939 if minElements == 1: 1939 ↛ 1940line 1939 didn't jump to line 1940, because the condition on line 1939 was never true

1940 ret = self 

1941 else: 

1942 ret = And([self]*minElements) 

1943 return ret 

1944 

1945 def __rmul__(self, other): 

1946 return self.__mul__(other) 

1947 

1948 def __or__(self, other ): 

1949 """ 

1950 Implementation of | operator - returns C{L{MatchFirst}} 

1951 """ 

1952 if isinstance( other, basestring ): 1952 ↛ 1953line 1952 didn't jump to line 1953, because the condition on line 1952 was never true

1953 other = ParserElement._literalStringClass( other ) 

1954 if not isinstance( other, ParserElement ): 1954 ↛ 1955line 1954 didn't jump to line 1955, because the condition on line 1954 was never true

1955 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 

1956 SyntaxWarning, stacklevel=2) 

1957 return None 

1958 return MatchFirst( [ self, other ] ) 

1959 

1960 def __ror__(self, other ): 

1961 """ 

1962 Implementation of | operator when left operand is not a C{L{ParserElement}} 

1963 """ 

1964 if isinstance( other, basestring ): 

1965 other = ParserElement._literalStringClass( other ) 

1966 if not isinstance( other, ParserElement ): 

1967 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 

1968 SyntaxWarning, stacklevel=2) 

1969 return None 

1970 return other | self 

1971 

1972 def __xor__(self, other ): 

1973 """ 

1974 Implementation of ^ operator - returns C{L{Or}} 

1975 """ 

1976 if isinstance( other, basestring ): 1976 ↛ 1977line 1976 didn't jump to line 1977, because the condition on line 1976 was never true

1977 other = ParserElement._literalStringClass( other ) 

1978 if not isinstance( other, ParserElement ): 1978 ↛ 1979line 1978 didn't jump to line 1979, because the condition on line 1978 was never true

1979 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 

1980 SyntaxWarning, stacklevel=2) 

1981 return None 

1982 return Or( [ self, other ] ) 

1983 

1984 def __rxor__(self, other ): 

1985 """ 

1986 Implementation of ^ operator when left operand is not a C{L{ParserElement}} 

1987 """ 

1988 if isinstance( other, basestring ): 

1989 other = ParserElement._literalStringClass( other ) 

1990 if not isinstance( other, ParserElement ): 

1991 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 

1992 SyntaxWarning, stacklevel=2) 

1993 return None 

1994 return other ^ self 

1995 

1996 def __and__(self, other ): 

1997 """ 

1998 Implementation of & operator - returns C{L{Each}} 

1999 """ 

2000 if isinstance( other, basestring ): 

2001 other = ParserElement._literalStringClass( other ) 

2002 if not isinstance( other, ParserElement ): 

2003 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 

2004 SyntaxWarning, stacklevel=2) 

2005 return None 

2006 return Each( [ self, other ] ) 

2007 

2008 def __rand__(self, other ): 

2009 """ 

2010 Implementation of & operator when left operand is not a C{L{ParserElement}} 

2011 """ 

2012 if isinstance( other, basestring ): 

2013 other = ParserElement._literalStringClass( other ) 

2014 if not isinstance( other, ParserElement ): 

2015 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 

2016 SyntaxWarning, stacklevel=2) 

2017 return None 

2018 return other & self 

2019 

2020 def __invert__( self ): 

2021 """ 

2022 Implementation of ~ operator - returns C{L{NotAny}} 

2023 """ 

2024 return NotAny( self ) 

2025 

2026 def __call__(self, name=None): 

2027 """ 

2028 Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}. 

2029  

2030 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 

2031 passed as C{True}. 

2032  

2033 If C{name} is omitted, same as calling C{L{copy}}. 

2034 

2035 Example:: 

2036 # these are equivalent 

2037 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 

2038 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")  

2039 """ 

2040 if name is not None: 

2041 return self.setResultsName(name) 

2042 else: 

2043 return self.copy() 

2044 

2045 def suppress( self ): 

2046 """ 

2047 Suppresses the output of this C{ParserElement}; useful to keep punctuation from 

2048 cluttering up returned output. 

2049 """ 

2050 return Suppress( self ) 

2051 

2052 def leaveWhitespace( self ): 

2053 """ 

2054 Disables the skipping of whitespace before matching the characters in the 

2055 C{ParserElement}'s defined pattern. This is normally only used internally by 

2056 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 

2057 """ 

2058 self.skipWhitespace = False 

2059 return self 

2060 

2061 def setWhitespaceChars( self, chars ): 

2062 """ 

2063 Overrides the default whitespace chars 

2064 """ 

2065 self.skipWhitespace = True 

2066 self.whiteChars = chars 

2067 self.copyDefaultWhiteChars = False 

2068 return self 

2069 

2070 def parseWithTabs( self ): 

2071 """ 

2072 Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 

2073 Must be called before C{parseString} when the input grammar contains elements that 

2074 match C{<TAB>} characters. 

2075 """ 

2076 self.keepTabs = True 

2077 return self 

2078 

2079 def ignore( self, other ): 

2080 """ 

2081 Define expression to be ignored (e.g., comments) while doing pattern 

2082 matching; may be called repeatedly, to define multiple comment or other 

2083 ignorable patterns. 

2084  

2085 Example:: 

2086 patt = OneOrMore(Word(alphas)) 

2087 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj'] 

2088  

2089 patt.ignore(cStyleComment) 

2090 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd'] 

2091 """ 

2092 if isinstance(other, basestring): 

2093 other = Suppress(other) 

2094 

2095 if isinstance( other, Suppress ): 

2096 if other not in self.ignoreExprs: 

2097 self.ignoreExprs.append(other) 

2098 else: 

2099 self.ignoreExprs.append( Suppress( other.copy() ) ) 

2100 return self 

2101 

2102 def setDebugActions( self, startAction, successAction, exceptionAction ): 

2103 """ 

2104 Enable display of debugging messages while doing pattern matching. 

2105 """ 

2106 self.debugActions = (startAction or _defaultStartDebugAction, 

2107 successAction or _defaultSuccessDebugAction, 

2108 exceptionAction or _defaultExceptionDebugAction) 

2109 self.debug = True 

2110 return self 

2111 

2112 def setDebug( self, flag=True ): 

2113 """ 

2114 Enable display of debugging messages while doing pattern matching. 

2115 Set C{flag} to True to enable, False to disable. 

2116 

2117 Example:: 

2118 wd = Word(alphas).setName("alphaword") 

2119 integer = Word(nums).setName("numword") 

2120 term = wd | integer 

2121  

2122 # turn on debugging for wd 

2123 wd.setDebug() 

2124 

2125 OneOrMore(term).parseString("abc 123 xyz 890") 

2126  

2127 prints:: 

2128 Match alphaword at loc 0(1,1) 

2129 Matched alphaword -> ['abc'] 

2130 Match alphaword at loc 3(1,4) 

2131 Exception raised:Expected alphaword (at char 4), (line:1, col:5) 

2132 Match alphaword at loc 7(1,8) 

2133 Matched alphaword -> ['xyz'] 

2134 Match alphaword at loc 11(1,12) 

2135 Exception raised:Expected alphaword (at char 12), (line:1, col:13) 

2136 Match alphaword at loc 15(1,16) 

2137 Exception raised:Expected alphaword (at char 15), (line:1, col:16) 

2138 

2139 The output shown is that produced by the default debug actions - custom debug actions can be 

2140 specified using L{setDebugActions}. Prior to attempting 

2141 to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"} 

2142 is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"} 

2143 message is shown. Also note the use of L{setName} to assign a human-readable name to the expression, 

2144 which makes debugging and exception messages easier to understand - for instance, the default 

2145 name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}. 

2146 """ 

2147 if flag: 

2148 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 

2149 else: 

2150 self.debug = False 

2151 return self 

2152 

2153 def __str__( self ): 

2154 return self.name 

2155 

2156 def __repr__( self ): 

2157 return _ustr(self) 

2158 

2159 def streamline( self ): 

2160 self.streamlined = True 

2161 self.strRepr = None 

2162 return self 

2163 

2164 def checkRecursion( self, parseElementList ): 

2165 pass 

2166 

2167 def validate( self, validateTrace=[] ): 

2168 """ 

2169 Check defined expressions for valid structure, check for infinite recursive definitions. 

2170 """ 

2171 self.checkRecursion( [] ) 

2172 

2173 def parseFile( self, file_or_filename, parseAll=False ): 

2174 """ 

2175 Execute the parse expression on the given file or filename. 

2176 If a filename is specified (instead of a file object), 

2177 the entire file is opened, read, and closed before parsing. 

2178 """ 

2179 try: 

2180 file_contents = file_or_filename.read() 

2181 except AttributeError: 

2182 with open(file_or_filename, "r") as f: 

2183 file_contents = f.read() 

2184 try: 

2185 return self.parseString(file_contents, parseAll) 

2186 except ParseBaseException as exc: 

2187 if ParserElement.verbose_stacktrace: 

2188 raise 

2189 else: 

2190 # catch and re-raise exception from here, clears out pyparsing internal stack trace 

2191 raise exc 

2192 

2193 def __eq__(self,other): 

2194 if isinstance(other, ParserElement): 

2195 return self is other or vars(self) == vars(other) 

2196 elif isinstance(other, basestring): 

2197 return self.matches(other) 

2198 else: 

2199 return super(ParserElement,self)==other 

2200 

2201 def __ne__(self,other): 

2202 return not (self == other) 

2203 

2204 def __hash__(self): 

2205 return hash(id(self)) 

2206 

2207 def __req__(self,other): 

2208 return self == other 

2209 

2210 def __rne__(self,other): 

2211 return not (self == other) 

2212 

2213 def matches(self, testString, parseAll=True): 

2214 """ 

2215 Method for quick testing of a parser against a test string. Good for simple  

2216 inline microtests of sub expressions while building up larger parser. 

2217  

2218 Parameters: 

2219 - testString - to test against this expression for a match 

2220 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests 

2221  

2222 Example:: 

2223 expr = Word(nums) 

2224 assert expr.matches("100") 

2225 """ 

2226 try: 

2227 self.parseString(_ustr(testString), parseAll=parseAll) 

2228 return True 

2229 except ParseBaseException: 

2230 return False 

2231 

2232 def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False): 

2233 """ 

2234 Execute the parse expression on a series of test strings, showing each 

2235 test, the parsed results or where the parse failed. Quick and easy way to 

2236 run a parse expression against a list of sample strings. 

2237  

2238 Parameters: 

2239 - tests - a list of separate test strings, or a multiline string of test strings 

2240 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests  

2241 - comment - (default=C{'#'}) - expression for indicating embedded comments in the test  

2242 string; pass None to disable comment filtering 

2243 - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline; 

2244 if False, only dump nested list 

2245 - printResults - (default=C{True}) prints test output to stdout 

2246 - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing 

2247 

2248 Returns: a (success, results) tuple, where success indicates that all tests succeeded 

2249 (or failed if C{failureTests} is True), and the results contain a list of lines of each  

2250 test's output 

2251  

2252 Example:: 

2253 number_expr = pyparsing_common.number.copy() 

2254 

2255 result = number_expr.runTests(''' 

2256 # unsigned integer 

2257 100 

2258 # negative integer 

2259 -100 

2260 # float with scientific notation 

2261 6.02e23 

2262 # integer with scientific notation 

2263 1e-12 

2264 ''') 

2265 print("Success" if result[0] else "Failed!") 

2266 

2267 result = number_expr.runTests(''' 

2268 # stray character 

2269 100Z 

2270 # missing leading digit before '.' 

2271 -.100 

2272 # too many '.' 

2273 3.14.159 

2274 ''', failureTests=True) 

2275 print("Success" if result[0] else "Failed!") 

2276 prints:: 

2277 # unsigned integer 

2278 100 

2279 [100] 

2280 

2281 # negative integer 

2282 -100 

2283 [-100] 

2284 

2285 # float with scientific notation 

2286 6.02e23 

2287 [6.02e+23] 

2288 

2289 # integer with scientific notation 

2290 1e-12 

2291 [1e-12] 

2292 

2293 Success 

2294  

2295 # stray character 

2296 100Z 

2297 ^ 

2298 FAIL: Expected end of text (at char 3), (line:1, col:4) 

2299 

2300 # missing leading digit before '.' 

2301 -.100 

2302 ^ 

2303 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) 

2304 

2305 # too many '.' 

2306 3.14.159 

2307 ^ 

2308 FAIL: Expected end of text (at char 4), (line:1, col:5) 

2309 

2310 Success 

2311 

2312 Each test string must be on a single line. If you want to test a string that spans multiple 

2313 lines, create a test like this:: 

2314 

2315 expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines") 

2316  

2317 (Note that this is a raw string literal, you must include the leading 'r'.) 

2318 """ 

2319 if isinstance(tests, basestring): 

2320 tests = list(map(str.strip, tests.rstrip().splitlines())) 

2321 if isinstance(comment, basestring): 

2322 comment = Literal(comment) 

2323 allResults = [] 

2324 comments = [] 

2325 success = True 

2326 for t in tests: 

2327 if comment is not None and comment.matches(t, False) or comments and not t: 

2328 comments.append(t) 

2329 continue 

2330 if not t: 

2331 continue 

2332 out = ['\n'.join(comments), t] 

2333 comments = [] 

2334 try: 

2335 t = t.replace(r'\n','\n') 

2336 result = self.parseString(t, parseAll=parseAll) 

2337 out.append(result.dump(full=fullDump)) 

2338 success = success and not failureTests 

2339 except ParseBaseException as pe: 

2340 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" 

2341 if '\n' in t: 

2342 out.append(line(pe.loc, t)) 

2343 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal) 

2344 else: 

2345 out.append(' '*pe.loc + '^' + fatal) 

2346 out.append("FAIL: " + str(pe)) 

2347 success = success and failureTests 

2348 result = pe 

2349 except Exception as exc: 

2350 out.append("FAIL-EXCEPTION: " + str(exc)) 

2351 success = success and failureTests 

2352 result = exc 

2353 

2354 if printResults: 

2355 if fullDump: 

2356 out.append('') 

2357 print('\n'.join(out)) 

2358 

2359 allResults.append((t, result)) 

2360 

2361 return success, allResults 

2362 

2363 

2364class Token(ParserElement): 

2365 """ 

2366 Abstract C{ParserElement} subclass, for defining atomic matching patterns. 

2367 """ 

2368 def __init__( self ): 

2369 super(Token,self).__init__( savelist=False ) 

2370 

2371 

2372class Empty(Token): 

2373 """ 

2374 An empty token, will always match. 

2375 """ 

2376 def __init__( self ): 

2377 super(Empty,self).__init__() 

2378 self.name = "Empty" 

2379 self.mayReturnEmpty = True 

2380 self.mayIndexError = False 

2381 

2382 

2383class NoMatch(Token): 

2384 """ 

2385 A token that will never match. 

2386 """ 

2387 def __init__( self ): 

2388 super(NoMatch,self).__init__() 

2389 self.name = "NoMatch" 

2390 self.mayReturnEmpty = True 

2391 self.mayIndexError = False 

2392 self.errmsg = "Unmatchable token" 

2393 

2394 def parseImpl( self, instring, loc, doActions=True ): 

2395 raise ParseException(instring, loc, self.errmsg, self) 

2396 

2397 

2398class Literal(Token): 

2399 """ 

2400 Token to exactly match a specified string. 

2401  

2402 Example:: 

2403 Literal('blah').parseString('blah') # -> ['blah'] 

2404 Literal('blah').parseString('blahfooblah') # -> ['blah'] 

2405 Literal('blah').parseString('bla') # -> Exception: Expected "blah" 

2406  

2407 For case-insensitive matching, use L{CaselessLiteral}. 

2408  

2409 For keyword matching (force word break before and after the matched string), 

2410 use L{Keyword} or L{CaselessKeyword}. 

2411 """ 

2412 def __init__( self, matchString ): 

2413 super(Literal,self).__init__() 

2414 self.match = matchString 

2415 self.matchLen = len(matchString) 

2416 try: 

2417 self.firstMatchChar = matchString[0] 

2418 except IndexError: 

2419 warnings.warn("null string passed to Literal; use Empty() instead", 

2420 SyntaxWarning, stacklevel=2) 

2421 self.__class__ = Empty 

2422 self.name = '"%s"' % _ustr(self.match) 

2423 self.errmsg = "Expected " + self.name 

2424 self.mayReturnEmpty = False 

2425 self.mayIndexError = False 

2426 

2427 # Performance tuning: this routine gets called a *lot* 

2428 # if this is a single character match string and the first character matches, 

2429 # short-circuit as quickly as possible, and avoid calling startswith 

2430 #~ @profile 

2431 def parseImpl( self, instring, loc, doActions=True ): 

2432 if (instring[loc] == self.firstMatchChar and 

2433 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 

2434 return loc+self.matchLen, self.match 

2435 raise ParseException(instring, loc, self.errmsg, self) 

2436_L = Literal 

2437ParserElement._literalStringClass = Literal 

2438 

2439class Keyword(Token): 

2440 """ 

2441 Token to exactly match a specified string as a keyword, that is, it must be 

2442 immediately followed by a non-keyword character. Compare with C{L{Literal}}: 

2443 - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}. 

2444 - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 

2445 Accepts two optional constructor arguments in addition to the keyword string: 

2446 - C{identChars} is a string of characters that would be valid identifier characters, 

2447 defaulting to all alphanumerics + "_" and "$" 

2448 - C{caseless} allows case-insensitive matching, default is C{False}. 

2449  

2450 Example:: 

2451 Keyword("start").parseString("start") # -> ['start'] 

2452 Keyword("start").parseString("starting") # -> Exception 

2453 

2454 For case-insensitive matching, use L{CaselessKeyword}. 

2455 """ 

2456 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 

2457 

2458 def __init__( self, matchString, identChars=None, caseless=False ): 

2459 super(Keyword,self).__init__() 

2460 if identChars is None: 

2461 identChars = Keyword.DEFAULT_KEYWORD_CHARS 

2462 self.match = matchString 

2463 self.matchLen = len(matchString) 

2464 try: 

2465 self.firstMatchChar = matchString[0] 

2466 except IndexError: 

2467 warnings.warn("null string passed to Keyword; use Empty() instead", 

2468 SyntaxWarning, stacklevel=2) 

2469 self.name = '"%s"' % self.match 

2470 self.errmsg = "Expected " + self.name 

2471 self.mayReturnEmpty = False 

2472 self.mayIndexError = False 

2473 self.caseless = caseless 

2474 if caseless: 

2475 self.caselessmatch = matchString.upper() 

2476 identChars = identChars.upper() 

2477 self.identChars = set(identChars) 

2478 

2479 def parseImpl( self, instring, loc, doActions=True ): 

2480 if self.caseless: 

2481 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 

2482 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 

2483 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 

2484 return loc+self.matchLen, self.match 

2485 else: 

2486 if (instring[loc] == self.firstMatchChar and 

2487 (self.matchLen==1 or instring.startswith(self.match,loc)) and 

2488 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 

2489 (loc == 0 or instring[loc-1] not in self.identChars) ): 

2490 return loc+self.matchLen, self.match 

2491 raise ParseException(instring, loc, self.errmsg, self) 

2492 

2493 def copy(self): 

2494 c = super(Keyword,self).copy() 

2495 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 

2496 return c 

2497 

2498 @staticmethod 

2499 def setDefaultKeywordChars( chars ): 

2500 """Overrides the default Keyword chars 

2501 """ 

2502 Keyword.DEFAULT_KEYWORD_CHARS = chars 

2503 

2504class CaselessLiteral(Literal): 

2505 """ 

2506 Token to match a specified string, ignoring case of letters. 

2507 Note: the matched results will always be in the case of the given 

2508 match string, NOT the case of the input text. 

2509 

2510 Example:: 

2511 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD'] 

2512  

2513 (Contrast with example for L{CaselessKeyword}.) 

2514 """ 

2515 def __init__( self, matchString ): 

2516 super(CaselessLiteral,self).__init__( matchString.upper() ) 

2517 # Preserve the defining literal. 

2518 self.returnString = matchString 

2519 self.name = "'%s'" % self.returnString 

2520 self.errmsg = "Expected " + self.name 

2521 

2522 def parseImpl( self, instring, loc, doActions=True ): 

2523 if instring[ loc:loc+self.matchLen ].upper() == self.match: 

2524 return loc+self.matchLen, self.returnString 

2525 raise ParseException(instring, loc, self.errmsg, self) 

2526 

2527class CaselessKeyword(Keyword): 

2528 """ 

2529 Caseless version of L{Keyword}. 

2530 

2531 Example:: 

2532 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD'] 

2533  

2534 (Contrast with example for L{CaselessLiteral}.) 

2535 """ 

2536 def __init__( self, matchString, identChars=None ): 

2537 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) 

2538 

2539 def parseImpl( self, instring, loc, doActions=True ): 

2540 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 

2541 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 

2542 return loc+self.matchLen, self.match 

2543 raise ParseException(instring, loc, self.errmsg, self) 

2544 

2545class CloseMatch(Token): 

2546 """ 

2547 A variation on L{Literal} which matches "close" matches, that is,  

2548 strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters: 

2549 - C{match_string} - string to be matched 

2550 - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match 

2551  

2552 The results from a successful parse will contain the matched text from the input string and the following named results: 

2553 - C{mismatches} - a list of the positions within the match_string where mismatches were found 

2554 - C{original} - the original match_string used to compare against the input string 

2555  

2556 If C{mismatches} is an empty list, then the match was an exact match. 

2557  

2558 Example:: 

2559 patt = CloseMatch("ATCATCGAATGGA") 

2560 patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) 

2561 patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) 

2562 

2563 # exact match 

2564 patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) 

2565 

2566 # close match allowing up to 2 mismatches 

2567 patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2) 

2568 patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) 

2569 """ 

2570 def __init__(self, match_string, maxMismatches=1): 

2571 super(CloseMatch,self).__init__() 

2572 self.name = match_string 

2573 self.match_string = match_string 

2574 self.maxMismatches = maxMismatches 

2575 self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches) 

2576 self.mayIndexError = False 

2577 self.mayReturnEmpty = False 

2578 

2579 def parseImpl( self, instring, loc, doActions=True ): 

2580 start = loc 

2581 instrlen = len(instring) 

2582 maxloc = start + len(self.match_string) 

2583 

2584 if maxloc <= instrlen: 

2585 match_string = self.match_string 

2586 match_stringloc = 0 

2587 mismatches = [] 

2588 maxMismatches = self.maxMismatches 

2589 

2590 for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)): 

2591 src,mat = s_m 

2592 if src != mat: 

2593 mismatches.append(match_stringloc) 

2594 if len(mismatches) > maxMismatches: 

2595 break 

2596 else: 

2597 loc = match_stringloc + 1 

2598 results = ParseResults([instring[start:loc]]) 

2599 results['original'] = self.match_string 

2600 results['mismatches'] = mismatches 

2601 return loc, results 

2602 

2603 raise ParseException(instring, loc, self.errmsg, self) 

2604 

2605 

2606class Word(Token): 

2607 """ 

2608 Token for matching words composed of allowed character sets. 

2609 Defined with string containing all allowed initial characters, 

2610 an optional string containing allowed body characters (if omitted, 

2611 defaults to the initial character set), and an optional minimum, 

2612 maximum, and/or exact length. The default value for C{min} is 1 (a 

2613 minimum value < 1 is not valid); the default values for C{max} and C{exact} 

2614 are 0, meaning no maximum or exact length restriction. An optional 

2615 C{excludeChars} parameter can list characters that might be found in  

2616 the input C{bodyChars} string; useful to define a word of all printables 

2617 except for one or two characters, for instance. 

2618  

2619 L{srange} is useful for defining custom character set strings for defining  

2620 C{Word} expressions, using range notation from regular expression character sets. 

2621  

2622 A common mistake is to use C{Word} to match a specific literal string, as in  

2623 C{Word("Address")}. Remember that C{Word} uses the string argument to define 

2624 I{sets} of matchable characters. This expression would match "Add", "AAA", 

2625 "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'. 

2626 To match an exact literal string, use L{Literal} or L{Keyword}. 

2627 

2628 pyparsing includes helper strings for building Words: 

2629 - L{alphas} 

2630 - L{nums} 

2631 - L{alphanums} 

2632 - L{hexnums} 

2633 - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.) 

2634 - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.) 

2635 - L{printables} (any non-whitespace character) 

2636 

2637 Example:: 

2638 # a word composed of digits 

2639 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) 

2640  

2641 # a word with a leading capital, and zero or more lowercase 

2642 capital_word = Word(alphas.upper(), alphas.lower()) 

2643 

2644 # hostnames are alphanumeric, with leading alpha, and '-' 

2645 hostname = Word(alphas, alphanums+'-') 

2646  

2647 # roman numeral (not a strict parser, accepts invalid mix of characters) 

2648 roman = Word("IVXLCDM") 

2649  

2650 # any string of non-whitespace characters, except for ',' 

2651 csv_value = Word(printables, excludeChars=",") 

2652 """ 

2653 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ): 

2654 super(Word,self).__init__() 

2655 if excludeChars: 

2656 initChars = ''.join(c for c in initChars if c not in excludeChars) 

2657 if bodyChars: 2657 ↛ 2658line 2657 didn't jump to line 2658, because the condition on line 2657 was never true

2658 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 

2659 self.initCharsOrig = initChars 

2660 self.initChars = set(initChars) 

2661 if bodyChars : 

2662 self.bodyCharsOrig = bodyChars 

2663 self.bodyChars = set(bodyChars) 

2664 else: 

2665 self.bodyCharsOrig = initChars 

2666 self.bodyChars = set(initChars) 

2667 

2668 self.maxSpecified = max > 0 

2669 

2670 if min < 1: 2670 ↛ 2671line 2670 didn't jump to line 2671, because the condition on line 2670 was never true

2671 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 

2672 

2673 self.minLen = min 

2674 

2675 if max > 0: 2675 ↛ 2676line 2675 didn't jump to line 2676, because the condition on line 2675 was never true

2676 self.maxLen = max 

2677 else: 

2678 self.maxLen = _MAX_INT 

2679 

2680 if exact > 0: 

2681 self.maxLen = exact 

2682 self.minLen = exact 

2683 

2684 self.name = _ustr(self) 

2685 self.errmsg = "Expected " + self.name 

2686 self.mayIndexError = False 

2687 self.asKeyword = asKeyword 

2688 

2689 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 

2690 if self.bodyCharsOrig == self.initCharsOrig: 

2691 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 

2692 elif len(self.initCharsOrig) == 1: 2692 ↛ 2693line 2692 didn't jump to line 2693, because the condition on line 2692 was never true

2693 self.reString = "%s[%s]*" % \ 

2694 (re.escape(self.initCharsOrig), 

2695 _escapeRegexRangeChars(self.bodyCharsOrig),) 

2696 else: 

2697 self.reString = "[%s][%s]*" % \ 

2698 (_escapeRegexRangeChars(self.initCharsOrig), 

2699 _escapeRegexRangeChars(self.bodyCharsOrig),) 

2700 if self.asKeyword: 2700 ↛ 2701line 2700 didn't jump to line 2701, because the condition on line 2700 was never true

2701 self.reString = r"\b"+self.reString+r"\b" 

2702 try: 

2703 self.re = re.compile( self.reString ) 

2704 except Exception: 

2705 self.re = None 

2706 

2707 def parseImpl( self, instring, loc, doActions=True ): 

2708 if self.re: 

2709 result = self.re.match(instring,loc) 

2710 if not result: 

2711 raise ParseException(instring, loc, self.errmsg, self) 

2712 

2713 loc = result.end() 

2714 return loc, result.group() 

2715 

2716 if not(instring[ loc ] in self.initChars): 

2717 raise ParseException(instring, loc, self.errmsg, self) 

2718 

2719 start = loc 

2720 loc += 1 

2721 instrlen = len(instring) 

2722 bodychars = self.bodyChars 

2723 maxloc = start + self.maxLen 

2724 maxloc = min( maxloc, instrlen ) 

2725 while loc < maxloc and instring[loc] in bodychars: 2725 ↛ 2726line 2725 didn't jump to line 2726, because the condition on line 2725 was never true

2726 loc += 1 

2727 

2728 throwException = False 

2729 if loc - start < self.minLen: 2729 ↛ 2731line 2729 didn't jump to line 2731, because the condition on line 2729 was never false

2730 throwException = True 

2731 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 2731 ↛ 2732line 2731 didn't jump to line 2732, because the condition on line 2731 was never true

2732 throwException = True 

2733 if self.asKeyword: 2733 ↛ 2734line 2733 didn't jump to line 2734, because the condition on line 2733 was never true

2734 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 

2735 throwException = True 

2736 

2737 if throwException: 2737 ↛ 2740line 2737 didn't jump to line 2740, because the condition on line 2737 was never false

2738 raise ParseException(instring, loc, self.errmsg, self) 

2739 

2740 return loc, instring[start:loc] 

2741 

2742 def __str__( self ): 

2743 try: 

2744 return super(Word,self).__str__() 

2745 except Exception: 

2746 pass 

2747 

2748 

2749 if self.strRepr is None: 

2750 

2751 def charsAsStr(s): 

2752 if len(s)>4: 

2753 return s[:4]+"..." 

2754 else: 

2755 return s 

2756 

2757 if ( self.initCharsOrig != self.bodyCharsOrig ): 

2758 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 

2759 else: 

2760 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 

2761 

2762 return self.strRepr 

2763 

2764 

2765class Regex(Token): 

2766 r""" 

2767 Token for matching strings that match a given regular expression. 

2768 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 

2769 If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as  

2770 named parse results. 

2771 

2772 Example:: 

2773 realnum = Regex(r"[+-]?\d+\.\d*") 

2774 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 

2775 # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 

2776 roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 

2777 """ 

2778 compiledREtype = type(re.compile("[A-Z]")) 

2779 def __init__( self, pattern, flags=0): 

2780 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 

2781 super(Regex,self).__init__() 

2782 

2783 if isinstance(pattern, basestring): 2783 ↛ 2799line 2783 didn't jump to line 2799, because the condition on line 2783 was never false

2784 if not pattern: 2784 ↛ 2785line 2784 didn't jump to line 2785, because the condition on line 2784 was never true

2785 warnings.warn("null string passed to Regex; use Empty() instead", 

2786 SyntaxWarning, stacklevel=2) 

2787 

2788 self.pattern = pattern 

2789 self.flags = flags 

2790 

2791 try: 

2792 self.re = re.compile(self.pattern, self.flags) 

2793 self.reString = self.pattern 

2794 except sre_constants.error: 

2795 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 

2796 SyntaxWarning, stacklevel=2) 

2797 raise 

2798 

2799 elif isinstance(pattern, Regex.compiledREtype): 

2800 self.re = pattern 

2801 self.pattern = \ 

2802 self.reString = str(pattern) 

2803 self.flags = flags 

2804 

2805 else: 

2806 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 

2807 

2808 self.name = _ustr(self) 

2809 self.errmsg = "Expected " + self.name 

2810 self.mayIndexError = False 

2811 self.mayReturnEmpty = True 

2812 

2813 def parseImpl( self, instring, loc, doActions=True ): 

2814 result = self.re.match(instring,loc) 

2815 if not result: 

2816 raise ParseException(instring, loc, self.errmsg, self) 

2817 

2818 loc = result.end() 

2819 d = result.groupdict() 

2820 ret = ParseResults(result.group()) 

2821 if d: 

2822 for k in d: 

2823 ret[k] = d[k] 

2824 return loc,ret 

2825 

2826 def __str__( self ): 

2827 try: 

2828 return super(Regex,self).__str__() 

2829 except Exception: 

2830 pass 

2831 

2832 if self.strRepr is None: 

2833 self.strRepr = "Re:(%s)" % repr(self.pattern) 

2834 

2835 return self.strRepr 

2836 

2837 

2838class QuotedString(Token): 

2839 r""" 

2840 Token for matching strings that are delimited by quoting characters. 

2841  

2842 Defined with the following parameters: 

2843 - quoteChar - string of one or more characters defining the quote delimiting string 

2844 - escChar - character to escape quotes, typically backslash (default=C{None}) 

2845 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None}) 

2846 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 

2847 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 

2848 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 

2849 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True}) 

2850 

2851 Example:: 

2852 qs = QuotedString('"') 

2853 print(qs.searchString('lsjdf "This is the quote" sldjf')) 

2854 complex_qs = QuotedString('{{', endQuoteChar='}}') 

2855 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf')) 

2856 sql_qs = QuotedString('"', escQuote='""') 

2857 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 

2858 prints:: 

2859 [['This is the quote']] 

2860 [['This is the "quote"']] 

2861 [['This is the quote with "embedded" quotes']] 

2862 """ 

2863 def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True): 

2864 super(QuotedString,self).__init__() 

2865 

2866 # remove white space from quote chars - wont work anyway 

2867 quoteChar = quoteChar.strip() 

2868 if not quoteChar: 2868 ↛ 2869line 2868 didn't jump to line 2869, because the condition on line 2868 was never true

2869 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 

2870 raise SyntaxError() 

2871 

2872 if endQuoteChar is None: 2872 ↛ 2875line 2872 didn't jump to line 2875, because the condition on line 2872 was never false

2873 endQuoteChar = quoteChar 

2874 else: 

2875 endQuoteChar = endQuoteChar.strip() 

2876 if not endQuoteChar: 

2877 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 

2878 raise SyntaxError() 

2879 

2880 self.quoteChar = quoteChar 

2881 self.quoteCharLen = len(quoteChar) 

2882 self.firstQuoteChar = quoteChar[0] 

2883 self.endQuoteChar = endQuoteChar 

2884 self.endQuoteCharLen = len(endQuoteChar) 

2885 self.escChar = escChar 

2886 self.escQuote = escQuote 

2887 self.unquoteResults = unquoteResults 

2888 self.convertWhitespaceEscapes = convertWhitespaceEscapes 

2889 

2890 if multiline: 2890 ↛ 2891line 2890 didn't jump to line 2891, because the condition on line 2890 was never true

2891 self.flags = re.MULTILINE | re.DOTALL 

2892 self.pattern = r'%s(?:[^%s%s]' % \ 

2893 ( re.escape(self.quoteChar), 

2894 _escapeRegexRangeChars(self.endQuoteChar[0]), 

2895 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 

2896 else: 

2897 self.flags = 0 

2898 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 

2899 ( re.escape(self.quoteChar), 

2900 _escapeRegexRangeChars(self.endQuoteChar[0]), 

2901 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 

2902 if len(self.endQuoteChar) > 1: 2902 ↛ 2903line 2902 didn't jump to line 2903, because the condition on line 2902 was never true

2903 self.pattern += ( 

2904 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 

2905 _escapeRegexRangeChars(self.endQuoteChar[i])) 

2906 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' 

2907 ) 

2908 if escQuote: 2908 ↛ 2909line 2908 didn't jump to line 2909, because the condition on line 2908 was never true

2909 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 

2910 if escChar: 2910 ↛ 2911line 2910 didn't jump to line 2911, because the condition on line 2910 was never true

2911 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 

2912 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 

2913 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 

2914 

2915 try: 

2916 self.re = re.compile(self.pattern, self.flags) 

2917 self.reString = self.pattern 

2918 except sre_constants.error: 

2919 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 

2920 SyntaxWarning, stacklevel=2) 

2921 raise 

2922 

2923 self.name = _ustr(self) 

2924 self.errmsg = "Expected " + self.name 

2925 self.mayIndexError = False 

2926 self.mayReturnEmpty = True 

2927 

2928 def parseImpl( self, instring, loc, doActions=True ): 

2929 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 

2930 if not result: 

2931 raise ParseException(instring, loc, self.errmsg, self) 

2932 

2933 loc = result.end() 

2934 ret = result.group() 

2935 

2936 if self.unquoteResults: 2936 ↛ 2961line 2936 didn't jump to line 2961, because the condition on line 2936 was never false

2937 

2938 # strip off quotes 

2939 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 

2940 

2941 if isinstance(ret,basestring): 2941 ↛ 2961line 2941 didn't jump to line 2961, because the condition on line 2941 was never false

2942 # replace escaped whitespace 

2943 if '\\' in ret and self.convertWhitespaceEscapes: 2943 ↛ 2944line 2943 didn't jump to line 2944

2944 ws_map = { 

2945 r'\t' : '\t', 

2946 r'\n' : '\n', 

2947 r'\f' : '\f', 

2948 r'\r' : '\r', 

2949 } 

2950 for wslit,wschar in ws_map.items(): 

2951 ret = ret.replace(wslit, wschar) 

2952 

2953 # replace escaped characters 

2954 if self.escChar: 2954 ↛ 2955line 2954 didn't jump to line 2955, because the condition on line 2954 was never true

2955 ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret) 

2956 

2957 # replace escaped quotes 

2958 if self.escQuote: 2958 ↛ 2959line 2958 didn't jump to line 2959, because the condition on line 2958 was never true

2959 ret = ret.replace(self.escQuote, self.endQuoteChar) 

2960 

2961 return loc, ret 

2962 

2963 def __str__( self ): 

2964 try: 

2965 return super(QuotedString,self).__str__() 

2966 except Exception: 

2967 pass 

2968 

2969 if self.strRepr is None: 

2970 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 

2971 

2972 return self.strRepr 

2973 

2974 

2975class CharsNotIn(Token): 

2976 """ 

2977 Token for matching words composed of characters I{not} in a given set (will 

2978 include whitespace in matched characters if not listed in the provided exclusion set - see example). 

2979 Defined with string containing all disallowed characters, and an optional 

2980 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 

2981 minimum value < 1 is not valid); the default values for C{max} and C{exact} 

2982 are 0, meaning no maximum or exact length restriction. 

2983 

2984 Example:: 

2985 # define a comma-separated-value as anything that is not a ',' 

2986 csv_value = CharsNotIn(',') 

2987 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213")) 

2988 prints:: 

2989 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 

2990 """ 

2991 def __init__( self, notChars, min=1, max=0, exact=0 ): 

2992 super(CharsNotIn,self).__init__() 

2993 self.skipWhitespace = False 

2994 self.notChars = notChars 

2995 

2996 if min < 1: 2996 ↛ 2997line 2996 didn't jump to line 2997, because the condition on line 2996 was never true

2997 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 

2998 

2999 self.minLen = min 

3000 

3001 if max > 0: 3001 ↛ 3002line 3001 didn't jump to line 3002, because the condition on line 3001 was never true

3002 self.maxLen = max 

3003 else: 

3004 self.maxLen = _MAX_INT 

3005 

3006 if exact > 0: 3006 ↛ 3010line 3006 didn't jump to line 3010, because the condition on line 3006 was never false

3007 self.maxLen = exact 

3008 self.minLen = exact 

3009 

3010 self.name = _ustr(self) 

3011 self.errmsg = "Expected " + self.name 

3012 self.mayReturnEmpty = ( self.minLen == 0 ) 

3013 self.mayIndexError = False 

3014 

3015 def parseImpl( self, instring, loc, doActions=True ): 

3016 if instring[loc] in self.notChars: 3016 ↛ 3019line 3016 didn't jump to line 3019, because the condition on line 3016 was never false

3017 raise ParseException(instring, loc, self.errmsg, self) 

3018 

3019 start = loc 

3020 loc += 1 

3021 notchars = self.notChars 

3022 maxlen = min( start+self.maxLen, len(instring) ) 

3023 while loc < maxlen and \ 

3024 (instring[loc] not in notchars): 

3025 loc += 1 

3026 

3027 if loc - start < self.minLen: 

3028 raise ParseException(instring, loc, self.errmsg, self) 

3029 

3030 return loc, instring[start:loc] 

3031 

3032 def __str__( self ): 

3033 try: 

3034 return super(CharsNotIn, self).__str__() 

3035 except Exception: 

3036 pass 

3037 

3038 if self.strRepr is None: 

3039 if len(self.notChars) > 4: 

3040 self.strRepr = "!W:(%s...)" % self.notChars[:4] 

3041 else: 

3042 self.strRepr = "!W:(%s)" % self.notChars 

3043 

3044 return self.strRepr 

3045 

3046class White(Token): 

3047 """ 

3048 Special matching class for matching whitespace. Normally, whitespace is ignored 

3049 by pyparsing grammars. This class is included when some whitespace structures 

3050 are significant. Define with a string containing the whitespace characters to be 

3051 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 

3052 as defined for the C{L{Word}} class. 

3053 """ 

3054 whiteStrs = { 

3055 " " : "<SPC>", 

3056 "\t": "<TAB>", 

3057 "\n": "<LF>", 

3058 "\r": "<CR>", 

3059 "\f": "<FF>", 

3060 } 

3061 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): 

3062 super(White,self).__init__() 

3063 self.matchWhite = ws 

3064 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) 

3065 #~ self.leaveWhitespace() 

3066 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 

3067 self.mayReturnEmpty = True 

3068 self.errmsg = "Expected " + self.name 

3069 

3070 self.minLen = min 

3071 

3072 if max > 0: 3072 ↛ 3073line 3072 didn't jump to line 3073, because the condition on line 3072 was never true

3073 self.maxLen = max 

3074 else: 

3075 self.maxLen = _MAX_INT 

3076 

3077 if exact > 0: 3077 ↛ 3078line 3077 didn't jump to line 3078, because the condition on line 3077 was never true

3078 self.maxLen = exact 

3079 self.minLen = exact 

3080 

3081 def parseImpl( self, instring, loc, doActions=True ): 

3082 if not(instring[ loc ] in self.matchWhite): 

3083 raise ParseException(instring, loc, self.errmsg, self) 

3084 start = loc 

3085 loc += 1 

3086 maxloc = start + self.maxLen 

3087 maxloc = min( maxloc, len(instring) ) 

3088 while loc < maxloc and instring[loc] in self.matchWhite: 

3089 loc += 1 

3090 

3091 if loc - start < self.minLen: 

3092 raise ParseException(instring, loc, self.errmsg, self) 

3093 

3094 return loc, instring[start:loc] 

3095 

3096 

3097class _PositionToken(Token): 

3098 def __init__( self ): 

3099 super(_PositionToken,self).__init__() 

3100 self.name=self.__class__.__name__ 

3101 self.mayReturnEmpty = True 

3102 self.mayIndexError = False 

3103 

3104class GoToColumn(_PositionToken): 

3105 """ 

3106 Token to advance to a specific column of input text; useful for tabular report scraping. 

3107 """ 

3108 def __init__( self, colno ): 

3109 super(GoToColumn,self).__init__() 

3110 self.col = colno 

3111 

3112 def preParse( self, instring, loc ): 

3113 if col(loc,instring) != self.col: 

3114 instrlen = len(instring) 

3115 if self.ignoreExprs: 

3116 loc = self._skipIgnorables( instring, loc ) 

3117 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 

3118 loc += 1 

3119 return loc 

3120 

3121 def parseImpl( self, instring, loc, doActions=True ): 

3122 thiscol = col( loc, instring ) 

3123 if thiscol > self.col: 

3124 raise ParseException( instring, loc, "Text not in expected column", self ) 

3125 newloc = loc + self.col - thiscol 

3126 ret = instring[ loc: newloc ] 

3127 return newloc, ret 

3128 

3129 

3130class LineStart(_PositionToken): 

3131 """ 

3132 Matches if current position is at the beginning of a line within the parse string 

3133  

3134 Example:: 

3135  

3136 test = '''\ 

3137 AAA this line 

3138 AAA and this line 

3139 AAA but not this one 

3140 B AAA and definitely not this one 

3141 ''' 

3142 

3143 for t in (LineStart() + 'AAA' + restOfLine).searchString(test): 

3144 print(t) 

3145  

3146 Prints:: 

3147 ['AAA', ' this line'] 

3148 ['AAA', ' and this line']  

3149 

3150 """ 

3151 def __init__( self ): 

3152 super(LineStart,self).__init__() 

3153 self.errmsg = "Expected start of line" 

3154 

3155 def parseImpl( self, instring, loc, doActions=True ): 

3156 if col(loc, instring) == 1: 

3157 return loc, [] 

3158 raise ParseException(instring, loc, self.errmsg, self) 

3159 

3160class LineEnd(_PositionToken): 

3161 """ 

3162 Matches if current position is at the end of a line within the parse string 

3163 """ 

3164 def __init__( self ): 

3165 super(LineEnd,self).__init__() 

3166 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 

3167 self.errmsg = "Expected end of line" 

3168 

3169 def parseImpl( self, instring, loc, doActions=True ): 

3170 if loc<len(instring): 

3171 if instring[loc] == "\n": 

3172 return loc+1, "\n" 

3173 else: 

3174 raise ParseException(instring, loc, self.errmsg, self) 

3175 elif loc == len(instring): 

3176 return loc+1, [] 

3177 else: 

3178 raise ParseException(instring, loc, self.errmsg, self) 

3179 

3180class StringStart(_PositionToken): 

3181 """ 

3182 Matches if current position is at the beginning of the parse string 

3183 """ 

3184 def __init__( self ): 

3185 super(StringStart,self).__init__() 

3186 self.errmsg = "Expected start of text" 

3187 

3188 def parseImpl( self, instring, loc, doActions=True ): 

3189 if loc != 0: 3189 ↛ 3191line 3189 didn't jump to line 3191, because the condition on line 3189 was never true

3190 # see if entire string up to here is just whitespace and ignoreables 

3191 if loc != self.preParse( instring, 0 ): 

3192 raise ParseException(instring, loc, self.errmsg, self) 

3193 return loc, [] 

3194 

3195class StringEnd(_PositionToken): 

3196 """ 

3197 Matches if current position is at the end of the parse string 

3198 """ 

3199 def __init__( self ): 

3200 super(StringEnd,self).__init__() 

3201 self.errmsg = "Expected end of text" 

3202 

3203 def parseImpl( self, instring, loc, doActions=True ): 

3204 if loc < len(instring): 3204 ↛ 3205line 3204 didn't jump to line 3205, because the condition on line 3204 was never true

3205 raise ParseException(instring, loc, self.errmsg, self) 

3206 elif loc == len(instring): 3206 ↛ 3208line 3206 didn't jump to line 3208, because the condition on line 3206 was never false

3207 return loc+1, [] 

3208 elif loc > len(instring): 

3209 return loc, [] 

3210 else: 

3211 raise ParseException(instring, loc, self.errmsg, self) 

3212 

3213class WordStart(_PositionToken): 

3214 """ 

3215 Matches if the current position is at the beginning of a Word, and 

3216 is not preceded by any character in a given set of C{wordChars} 

3217 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 

3218 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 

3219 the string being parsed, or at the beginning of a line. 

3220 """ 

3221 def __init__(self, wordChars = printables): 

3222 super(WordStart,self).__init__() 

3223 self.wordChars = set(wordChars) 

3224 self.errmsg = "Not at the start of a word" 

3225 

3226 def parseImpl(self, instring, loc, doActions=True ): 

3227 if loc != 0: 

3228 if (instring[loc-1] in self.wordChars or 

3229 instring[loc] not in self.wordChars): 

3230 raise ParseException(instring, loc, self.errmsg, self) 

3231 return loc, [] 

3232 

3233class WordEnd(_PositionToken): 

3234 """ 

3235 Matches if the current position is at the end of a Word, and 

3236 is not followed by any character in a given set of C{wordChars} 

3237 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 

3238 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 

3239 the string being parsed, or at the end of a line. 

3240 """ 

3241 def __init__(self, wordChars = printables): 

3242 super(WordEnd,self).__init__() 

3243 self.wordChars = set(wordChars) 

3244 self.skipWhitespace = False 

3245 self.errmsg = "Not at the end of a word" 

3246 

3247 def parseImpl(self, instring, loc, doActions=True ): 

3248 instrlen = len(instring) 

3249 if instrlen>0 and loc<instrlen: 

3250 if (instring[loc] in self.wordChars or 

3251 instring[loc-1] not in self.wordChars): 

3252 raise ParseException(instring, loc, self.errmsg, self) 

3253 return loc, [] 

3254 

3255 

3256class ParseExpression(ParserElement): 

3257 """ 

3258 Abstract subclass of ParserElement, for combining and post-processing parsed tokens. 

3259 """ 

3260 def __init__( self, exprs, savelist = False ): 

3261 super(ParseExpression,self).__init__(savelist) 

3262 if isinstance( exprs, _generatorType ): 3262 ↛ 3263line 3262 didn't jump to line 3263, because the condition on line 3262 was never true

3263 exprs = list(exprs) 

3264 

3265 if isinstance( exprs, basestring ): 3265 ↛ 3266line 3265 didn't jump to line 3266, because the condition on line 3265 was never true

3266 self.exprs = [ ParserElement._literalStringClass( exprs ) ] 

3267 elif isinstance( exprs, Iterable ): 3267 ↛ 3274line 3267 didn't jump to line 3274, because the condition on line 3267 was never false

3268 exprs = list(exprs) 

3269 # if sequence of strings provided, wrap with Literal 

3270 if all(isinstance(expr, basestring) for expr in exprs): 3270 ↛ exit,   3270 ↛ 32712 missed branches: 1) line 3270 didn't finish the generator expression on line 3270, 2) line 3270 didn't jump to line 3271, because the condition on line 3270 was never true

3271 exprs = map(ParserElement._literalStringClass, exprs) 

3272 self.exprs = list(exprs) 

3273 else: 

3274 try: 

3275 self.exprs = list( exprs ) 

3276 except TypeError: 

3277 self.exprs = [ exprs ] 

3278 self.callPreparse = False 

3279 

3280 def __getitem__( self, i ): 

3281 return self.exprs[i] 

3282 

3283 def append( self, other ): 

3284 self.exprs.append( other ) 

3285 self.strRepr = None 

3286 return self 

3287 

3288 def leaveWhitespace( self ): 

3289 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 

3290 all contained expressions.""" 

3291 self.skipWhitespace = False 

3292 self.exprs = [ e.copy() for e in self.exprs ] 

3293 for e in self.exprs: 

3294 e.leaveWhitespace() 

3295 return self 

3296 

3297 def ignore( self, other ): 

3298 if isinstance( other, Suppress ): 

3299 if other not in self.ignoreExprs: 

3300 super( ParseExpression, self).ignore( other ) 

3301 for e in self.exprs: 

3302 e.ignore( self.ignoreExprs[-1] ) 

3303 else: 

3304 super( ParseExpression, self).ignore( other ) 

3305 for e in self.exprs: 

3306 e.ignore( self.ignoreExprs[-1] ) 

3307 return self 

3308 

3309 def __str__( self ): 

3310 try: 

3311 return super(ParseExpression,self).__str__() 

3312 except Exception: 

3313 pass 

3314 

3315 if self.strRepr is None: 

3316 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 

3317 return self.strRepr 

3318 

3319 def streamline( self ): 

3320 super(ParseExpression,self).streamline() 

3321 

3322 for e in self.exprs: 

3323 e.streamline() 

3324 

3325 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 

3326 # but only if there are no parse actions or resultsNames on the nested And's 

3327 # (likewise for Or's and MatchFirst's) 

3328 if ( len(self.exprs) == 2 ): 

3329 other = self.exprs[0] 

3330 if ( isinstance( other, self.__class__ ) and 

3331 not(other.parseAction) and 

3332 other.resultsName is None and 

3333 not other.debug ): 

3334 self.exprs = other.exprs[:] + [ self.exprs[1] ] 

3335 self.strRepr = None 

3336 self.mayReturnEmpty |= other.mayReturnEmpty 

3337 self.mayIndexError |= other.mayIndexError 

3338 

3339 other = self.exprs[-1] 

3340 if ( isinstance( other, self.__class__ ) and 

3341 not(other.parseAction) and 

3342 other.resultsName is None and 

3343 not other.debug ): 

3344 self.exprs = self.exprs[:-1] + other.exprs[:] 

3345 self.strRepr = None 

3346 self.mayReturnEmpty |= other.mayReturnEmpty 

3347 self.mayIndexError |= other.mayIndexError 

3348 

3349 self.errmsg = "Expected " + _ustr(self) 

3350 

3351 return self 

3352 

3353 def setResultsName( self, name, listAllMatches=False ): 

3354 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 

3355 return ret 

3356 

3357 def validate( self, validateTrace=[] ): 

3358 tmp = validateTrace[:]+[self] 

3359 for e in self.exprs: 

3360 e.validate(tmp) 

3361 self.checkRecursion( [] ) 

3362 

3363 def copy(self): 

3364 ret = super(ParseExpression,self).copy() 

3365 ret.exprs = [e.copy() for e in self.exprs] 

3366 return ret 

3367 

3368class And(ParseExpression): 

3369 """ 

3370 Requires all given C{ParseExpression}s to be found in the given order. 

3371 Expressions may be separated by whitespace. 

3372 May be constructed using the C{'+'} operator. 

3373 May also be constructed using the C{'-'} operator, which will suppress backtracking. 

3374 

3375 Example:: 

3376 integer = Word(nums) 

3377 name_expr = OneOrMore(Word(alphas)) 

3378 

3379 expr = And([integer("id"),name_expr("name"),integer("age")]) 

3380 # more easily written as: 

3381 expr = integer("id") + name_expr("name") + integer("age") 

3382 """ 

3383 

3384 class _ErrorStop(Empty): 

3385 def __init__(self, *args, **kwargs): 

3386 super(And._ErrorStop,self).__init__(*args, **kwargs) 

3387 self.name = '-' 

3388 self.leaveWhitespace() 

3389 

3390 def __init__( self, exprs, savelist = True ): 

3391 super(And,self).__init__(exprs, savelist) 

3392 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

3393 self.setWhitespaceChars( self.exprs[0].whiteChars ) 

3394 self.skipWhitespace = self.exprs[0].skipWhitespace 

3395 self.callPreparse = True 

3396 

3397 def parseImpl( self, instring, loc, doActions=True ): 

3398 # pass False as last arg to _parse for first element, since we already 

3399 # pre-parsed the string as part of our And pre-parsing 

3400 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 

3401 errorStop = False 

3402 for e in self.exprs[1:]: 

3403 if isinstance(e, And._ErrorStop): 3403 ↛ 3404line 3403 didn't jump to line 3404, because the condition on line 3403 was never true

3404 errorStop = True 

3405 continue 

3406 if errorStop: 3406 ↛ 3407line 3406 didn't jump to line 3407, because the condition on line 3406 was never true

3407 try: 

3408 loc, exprtokens = e._parse( instring, loc, doActions ) 

3409 except ParseSyntaxException: 

3410 raise 

3411 except ParseBaseException as pe: 

3412 pe.__traceback__ = None 

3413 raise ParseSyntaxException._from_exception(pe) 

3414 except IndexError: 

3415 raise ParseSyntaxException(instring, len(instring), self.errmsg, self) 

3416 else: 

3417 loc, exprtokens = e._parse( instring, loc, doActions ) 

3418 if exprtokens or exprtokens.haskeys(): 

3419 resultlist += exprtokens 

3420 return loc, resultlist 

3421 

3422 def __iadd__(self, other ): 

3423 if isinstance( other, basestring ): 

3424 other = ParserElement._literalStringClass( other ) 

3425 return self.append( other ) #And( [ self, other ] ) 

3426 

3427 def checkRecursion( self, parseElementList ): 

3428 subRecCheckList = parseElementList[:] + [ self ] 

3429 for e in self.exprs: 

3430 e.checkRecursion( subRecCheckList ) 

3431 if not e.mayReturnEmpty: 

3432 break 

3433 

3434 def __str__( self ): 

3435 if hasattr(self,"name"): 

3436 return self.name 

3437 

3438 if self.strRepr is None: 

3439 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 

3440 

3441 return self.strRepr 

3442 

3443 

3444class Or(ParseExpression): 

3445 """ 

3446 Requires that at least one C{ParseExpression} is found. 

3447 If two expressions match, the expression that matches the longest string will be used. 

3448 May be constructed using the C{'^'} operator. 

3449 

3450 Example:: 

3451 # construct Or using '^' operator 

3452  

3453 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 

3454 print(number.searchString("123 3.1416 789")) 

3455 prints:: 

3456 [['123'], ['3.1416'], ['789']] 

3457 """ 

3458 def __init__( self, exprs, savelist = False ): 

3459 super(Or,self).__init__(exprs, savelist) 

3460 if self.exprs: 3460 ↛ 3463line 3460 didn't jump to line 3463, because the condition on line 3460 was never false

3461 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 3461 ↛ exitline 3461 didn't finish the generator expression on line 3461

3462 else: 

3463 self.mayReturnEmpty = True 

3464 

3465 def parseImpl( self, instring, loc, doActions=True ): 

3466 maxExcLoc = -1 

3467 maxException = None 

3468 matches = [] 

3469 for e in self.exprs: 

3470 try: 

3471 loc2 = e.tryParse( instring, loc ) 

3472 except ParseException as err: 3472 ↛ 3477line 3472 didn't jump to line 3477

3473 err.__traceback__ = None 

3474 if err.loc > maxExcLoc: 

3475 maxException = err 

3476 maxExcLoc = err.loc 

3477 except IndexError: 

3478 if len(instring) > maxExcLoc: 

3479 maxException = ParseException(instring,len(instring),e.errmsg,self) 

3480 maxExcLoc = len(instring) 

3481 else: 

3482 # save match among all matches, to retry longest to shortest 

3483 matches.append((loc2, e)) 

3484 

3485 if matches: 

3486 matches.sort(key=lambda x: -x[0]) 

3487 for _,e in matches: 3487 ↛ 3496line 3487 didn't jump to line 3496, because the loop on line 3487 didn't complete

3488 try: 

3489 return e._parse( instring, loc, doActions ) 

3490 except ParseException as err: 

3491 err.__traceback__ = None 

3492 if err.loc > maxExcLoc: 

3493 maxException = err 

3494 maxExcLoc = err.loc 

3495 

3496 if maxException is not None: 3496 ↛ 3500line 3496 didn't jump to line 3500, because the condition on line 3496 was never false

3497 maxException.msg = self.errmsg 

3498 raise maxException 

3499 else: 

3500 raise ParseException(instring, loc, "no defined alternatives to match", self) 

3501 

3502 

3503 def __ixor__(self, other ): 

3504 if isinstance( other, basestring ): 

3505 other = ParserElement._literalStringClass( other ) 

3506 return self.append( other ) #Or( [ self, other ] ) 

3507 

3508 def __str__( self ): 

3509 if hasattr(self,"name"): 

3510 return self.name 

3511 

3512 if self.strRepr is None: 

3513 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 

3514 

3515 return self.strRepr 

3516 

3517 def checkRecursion( self, parseElementList ): 

3518 subRecCheckList = parseElementList[:] + [ self ] 

3519 for e in self.exprs: 

3520 e.checkRecursion( subRecCheckList ) 

3521 

3522 

3523class MatchFirst(ParseExpression): 

3524 """ 

3525 Requires that at least one C{ParseExpression} is found. 

3526 If two expressions match, the first one listed is the one that will match. 

3527 May be constructed using the C{'|'} operator. 

3528 

3529 Example:: 

3530 # construct MatchFirst using '|' operator 

3531  

3532 # watch the order of expressions to match 

3533 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 

3534 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] 

3535 

3536 # put more selective expression first 

3537 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 

3538 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] 

3539 """ 

3540 def __init__( self, exprs, savelist = False ): 

3541 super(MatchFirst,self).__init__(exprs, savelist) 

3542 if self.exprs: 3542 ↛ 3545line 3542 didn't jump to line 3545, because the condition on line 3542 was never false

3543 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

3544 else: 

3545 self.mayReturnEmpty = True 

3546 

3547 def parseImpl( self, instring, loc, doActions=True ): 

3548 maxExcLoc = -1 

3549 maxException = None 

3550 for e in self.exprs: 

3551 try: 

3552 ret = e._parse( instring, loc, doActions ) 

3553 return ret 

3554 except ParseException as err: 3554 ↛ 3558line 3554 didn't jump to line 3558

3555 if err.loc > maxExcLoc: 

3556 maxException = err 

3557 maxExcLoc = err.loc 

3558 except IndexError: 

3559 if len(instring) > maxExcLoc: 

3560 maxException = ParseException(instring,len(instring),e.errmsg,self) 

3561 maxExcLoc = len(instring) 

3562 

3563 # only got here if no expression matched, raise exception for match that made it the furthest 

3564 else: 

3565 if maxException is not None: 3565 ↛ 3569line 3565 didn't jump to line 3569, because the condition on line 3565 was never false

3566 maxException.msg = self.errmsg 

3567 raise maxException 

3568 else: 

3569 raise ParseException(instring, loc, "no defined alternatives to match", self) 

3570 

3571 def __ior__(self, other ): 

3572 if isinstance( other, basestring ): 

3573 other = ParserElement._literalStringClass( other ) 

3574 return self.append( other ) #MatchFirst( [ self, other ] ) 

3575 

3576 def __str__( self ): 

3577 if hasattr(self,"name"): 

3578 return self.name 

3579 

3580 if self.strRepr is None: 

3581 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 

3582 

3583 return self.strRepr 

3584 

3585 def checkRecursion( self, parseElementList ): 

3586 subRecCheckList = parseElementList[:] + [ self ] 

3587 for e in self.exprs: 

3588 e.checkRecursion( subRecCheckList ) 

3589 

3590 

3591class Each(ParseExpression): 

3592 """ 

3593 Requires all given C{ParseExpression}s to be found, but in any order. 

3594 Expressions may be separated by whitespace. 

3595 May be constructed using the C{'&'} operator. 

3596 

3597 Example:: 

3598 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 

3599 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 

3600 integer = Word(nums) 

3601 shape_attr = "shape:" + shape_type("shape") 

3602 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 

3603 color_attr = "color:" + color("color") 

3604 size_attr = "size:" + integer("size") 

3605 

3606 # use Each (using operator '&') to accept attributes in any order  

3607 # (shape and posn are required, color and size are optional) 

3608 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr) 

3609 

3610 shape_spec.runTests(''' 

3611 shape: SQUARE color: BLACK posn: 100, 120 

3612 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

3613 color:GREEN size:20 shape:TRIANGLE posn:20,40 

3614 ''' 

3615 ) 

3616 prints:: 

3617 shape: SQUARE color: BLACK posn: 100, 120 

3618 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 

3619 - color: BLACK 

3620 - posn: ['100', ',', '120'] 

3621 - x: 100 

3622 - y: 120 

3623 - shape: SQUARE 

3624 

3625 

3626 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

3627 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] 

3628 - color: BLUE 

3629 - posn: ['50', ',', '80'] 

3630 - x: 50 

3631 - y: 80 

3632 - shape: CIRCLE 

3633 - size: 50 

3634 

3635 

3636 color: GREEN size: 20 shape: TRIANGLE posn: 20,40 

3637 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] 

3638 - color: GREEN 

3639 - posn: ['20', ',', '40'] 

3640 - x: 20 

3641 - y: 40 

3642 - shape: TRIANGLE 

3643 - size: 20 

3644 """ 

3645 def __init__( self, exprs, savelist = True ): 

3646 super(Each,self).__init__(exprs, savelist) 

3647 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

3648 self.skipWhitespace = True 

3649 self.initExprGroups = True 

3650 

3651 def parseImpl( self, instring, loc, doActions=True ): 

3652 if self.initExprGroups: 

3653 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) 

3654 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 

3655 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] 

3656 self.optionals = opt1 + opt2 

3657 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 

3658 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 

3659 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 

3660 self.required += self.multirequired 

3661 self.initExprGroups = False 

3662 tmpLoc = loc 

3663 tmpReqd = self.required[:] 

3664 tmpOpt = self.optionals[:] 

3665 matchOrder = [] 

3666 

3667 keepMatching = True 

3668 while keepMatching: 

3669 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 

3670 failed = [] 

3671 for e in tmpExprs: 

3672 try: 

3673 tmpLoc = e.tryParse( instring, tmpLoc ) 

3674 except ParseException: 

3675 failed.append(e) 

3676 else: 

3677 matchOrder.append(self.opt1map.get(id(e),e)) 

3678 if e in tmpReqd: 

3679 tmpReqd.remove(e) 

3680 elif e in tmpOpt: 

3681 tmpOpt.remove(e) 

3682 if len(failed) == len(tmpExprs): 

3683 keepMatching = False 

3684 

3685 if tmpReqd: 

3686 missing = ", ".join(_ustr(e) for e in tmpReqd) 

3687 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 

3688 

3689 # add any unmatched Optionals, in case they have default values defined 

3690 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 

3691 

3692 resultlist = [] 

3693 for e in matchOrder: 

3694 loc,results = e._parse(instring,loc,doActions) 

3695 resultlist.append(results) 

3696 

3697 finalResults = sum(resultlist, ParseResults([])) 

3698 return loc, finalResults 

3699 

3700 def __str__( self ): 

3701 if hasattr(self,"name"): 

3702 return self.name 

3703 

3704 if self.strRepr is None: 

3705 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 

3706 

3707 return self.strRepr 

3708 

3709 def checkRecursion( self, parseElementList ): 

3710 subRecCheckList = parseElementList[:] + [ self ] 

3711 for e in self.exprs: 

3712 e.checkRecursion( subRecCheckList ) 

3713 

3714 

3715class ParseElementEnhance(ParserElement): 

3716 """ 

3717 Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens. 

3718 """ 

3719 def __init__( self, expr, savelist=False ): 

3720 super(ParseElementEnhance,self).__init__(savelist) 

3721 if isinstance( expr, basestring ): 

3722 if issubclass(ParserElement._literalStringClass, Token): 3722 ↛ 3725line 3722 didn't jump to line 3725, because the condition on line 3722 was never false

3723 expr = ParserElement._literalStringClass(expr) 

3724 else: 

3725 expr = ParserElement._literalStringClass(Literal(expr)) 

3726 self.expr = expr 

3727 self.strRepr = None 

3728 if expr is not None: 

3729 self.mayIndexError = expr.mayIndexError 

3730 self.mayReturnEmpty = expr.mayReturnEmpty 

3731 self.setWhitespaceChars( expr.whiteChars ) 

3732 self.skipWhitespace = expr.skipWhitespace 

3733 self.saveAsList = expr.saveAsList 

3734 self.callPreparse = expr.callPreparse 

3735 self.ignoreExprs.extend(expr.ignoreExprs) 

3736 

3737 def parseImpl( self, instring, loc, doActions=True ): 

3738 if self.expr is not None: 3738 ↛ 3741line 3738 didn't jump to line 3741, because the condition on line 3738 was never false

3739 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 

3740 else: 

3741 raise ParseException("",loc,self.errmsg,self) 

3742 

3743 def leaveWhitespace( self ): 

3744 self.skipWhitespace = False 

3745 self.expr = self.expr.copy() 

3746 if self.expr is not None: 3746 ↛ 3748line 3746 didn't jump to line 3748, because the condition on line 3746 was never false

3747 self.expr.leaveWhitespace() 

3748 return self 

3749 

3750 def ignore( self, other ): 

3751 if isinstance( other, Suppress ): 

3752 if other not in self.ignoreExprs: 

3753 super( ParseElementEnhance, self).ignore( other ) 

3754 if self.expr is not None: 

3755 self.expr.ignore( self.ignoreExprs[-1] ) 

3756 else: 

3757 super( ParseElementEnhance, self).ignore( other ) 

3758 if self.expr is not None: 

3759 self.expr.ignore( self.ignoreExprs[-1] ) 

3760 return self 

3761 

3762 def streamline( self ): 

3763 super(ParseElementEnhance,self).streamline() 

3764 if self.expr is not None: 3764 ↛ 3766line 3764 didn't jump to line 3766, because the condition on line 3764 was never false

3765 self.expr.streamline() 

3766 return self 

3767 

3768 def checkRecursion( self, parseElementList ): 

3769 if self in parseElementList: 

3770 raise RecursiveGrammarException( parseElementList+[self] ) 

3771 subRecCheckList = parseElementList[:] + [ self ] 

3772 if self.expr is not None: 

3773 self.expr.checkRecursion( subRecCheckList ) 

3774 

3775 def validate( self, validateTrace=[] ): 

3776 tmp = validateTrace[:]+[self] 

3777 if self.expr is not None: 

3778 self.expr.validate(tmp) 

3779 self.checkRecursion( [] ) 

3780 

3781 def __str__( self ): 

3782 try: 

3783 return super(ParseElementEnhance,self).__str__() 

3784 except Exception: 

3785 pass 

3786 

3787 if self.strRepr is None and self.expr is not None: 

3788 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 

3789 return self.strRepr 

3790 

3791 

3792class FollowedBy(ParseElementEnhance): 

3793 """ 

3794 Lookahead matching of the given parse expression. C{FollowedBy} 

3795 does I{not} advance the parsing position within the input string, it only 

3796 verifies that the specified parse expression matches at the current 

3797 position. C{FollowedBy} always returns a null token list. 

3798 

3799 Example:: 

3800 # use FollowedBy to match a label only if it is followed by a ':' 

3801 data_word = Word(alphas) 

3802 label = data_word + FollowedBy(':') 

3803 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 

3804  

3805 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint() 

3806 prints:: 

3807 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 

3808 """ 

3809 def __init__( self, expr ): 

3810 super(FollowedBy,self).__init__(expr) 

3811 self.mayReturnEmpty = True 

3812 

3813 def parseImpl( self, instring, loc, doActions=True ): 

3814 self.expr.tryParse( instring, loc ) 

3815 return loc, [] 

3816 

3817 

3818class NotAny(ParseElementEnhance): 

3819 """ 

3820 Lookahead to disallow matching with the given parse expression. C{NotAny} 

3821 does I{not} advance the parsing position within the input string, it only 

3822 verifies that the specified parse expression does I{not} match at the current 

3823 position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny} 

3824 always returns a null token list. May be constructed using the '~' operator. 

3825 

3826 Example:: 

3827  

3828 """ 

3829 def __init__( self, expr ): 

3830 super(NotAny,self).__init__(expr) 

3831 #~ self.leaveWhitespace() 

3832 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 

3833 self.mayReturnEmpty = True 

3834 self.errmsg = "Found unwanted token, "+_ustr(self.expr) 

3835 

3836 def parseImpl( self, instring, loc, doActions=True ): 

3837 if self.expr.canParseNext(instring, loc): 

3838 raise ParseException(instring, loc, self.errmsg, self) 

3839 return loc, [] 

3840 

3841 def __str__( self ): 

3842 if hasattr(self,"name"): 

3843 return self.name 

3844 

3845 if self.strRepr is None: 

3846 self.strRepr = "~{" + _ustr(self.expr) + "}" 

3847 

3848 return self.strRepr 

3849 

3850class _MultipleMatch(ParseElementEnhance): 

3851 def __init__( self, expr, stopOn=None): 

3852 super(_MultipleMatch, self).__init__(expr) 

3853 self.saveAsList = True 

3854 ender = stopOn 

3855 if isinstance(ender, basestring): 3855 ↛ 3856line 3855 didn't jump to line 3856, because the condition on line 3855 was never true

3856 ender = ParserElement._literalStringClass(ender) 

3857 self.not_ender = ~ender if ender is not None else None 

3858 

3859 def parseImpl( self, instring, loc, doActions=True ): 

3860 self_expr_parse = self.expr._parse 

3861 self_skip_ignorables = self._skipIgnorables 

3862 check_ender = self.not_ender is not None 

3863 if check_ender: 3863 ↛ 3864line 3863 didn't jump to line 3864, because the condition on line 3863 was never true

3864 try_not_ender = self.not_ender.tryParse 

3865 

3866 # must be at least one (but first see if we are the stopOn sentinel; 

3867 # if so, fail) 

3868 if check_ender: 3868 ↛ 3869line 3868 didn't jump to line 3869, because the condition on line 3868 was never true

3869 try_not_ender(instring, loc) 

3870 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False ) 

3871 try: 

3872 hasIgnoreExprs = (not not self.ignoreExprs) 

3873 while 1: 

3874 if check_ender: 3874 ↛ 3875line 3874 didn't jump to line 3875, because the condition on line 3874 was never true

3875 try_not_ender(instring, loc) 

3876 if hasIgnoreExprs: 3876 ↛ 3877line 3876 didn't jump to line 3877, because the condition on line 3876 was never true

3877 preloc = self_skip_ignorables( instring, loc ) 

3878 else: 

3879 preloc = loc 

3880 loc, tmptokens = self_expr_parse( instring, preloc, doActions ) 

3881 if tmptokens or tmptokens.haskeys(): 3881 ↛ 3874line 3881 didn't jump to line 3874, because the condition on line 3881 was never false

3882 tokens += tmptokens 

3883 except (ParseException,IndexError): 

3884 pass 

3885 

3886 return loc, tokens 

3887 

3888class OneOrMore(_MultipleMatch): 

3889 """ 

3890 Repetition of one or more of the given expression. 

3891  

3892 Parameters: 

3893 - expr - expression that must match one or more times 

3894 - stopOn - (default=C{None}) - expression for a terminating sentinel 

3895 (only required if the sentinel would ordinarily match the repetition  

3896 expression)  

3897 

3898 Example:: 

3899 data_word = Word(alphas) 

3900 label = data_word + FollowedBy(':') 

3901 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 

3902 

3903 text = "shape: SQUARE posn: upper left color: BLACK" 

3904 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] 

3905 

3906 # use stopOn attribute for OneOrMore to avoid reading label string as part of the data 

3907 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 

3908 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 

3909  

3910 # could also be written as 

3911 (attr_expr * (1,)).parseString(text).pprint() 

3912 """ 

3913 

3914 def __str__( self ): 

3915 if hasattr(self,"name"): 

3916 return self.name 

3917 

3918 if self.strRepr is None: 

3919 self.strRepr = "{" + _ustr(self.expr) + "}..." 

3920 

3921 return self.strRepr 

3922 

3923class ZeroOrMore(_MultipleMatch): 

3924 """ 

3925 Optional repetition of zero or more of the given expression. 

3926  

3927 Parameters: 

3928 - expr - expression that must match zero or more times 

3929 - stopOn - (default=C{None}) - expression for a terminating sentinel 

3930 (only required if the sentinel would ordinarily match the repetition  

3931 expression)  

3932 

3933 Example: similar to L{OneOrMore} 

3934 """ 

3935 def __init__( self, expr, stopOn=None): 

3936 super(ZeroOrMore,self).__init__(expr, stopOn=stopOn) 

3937 self.mayReturnEmpty = True 

3938 

3939 def parseImpl( self, instring, loc, doActions=True ): 

3940 try: 

3941 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) 

3942 except (ParseException,IndexError): 

3943 return loc, [] 

3944 

3945 def __str__( self ): 

3946 if hasattr(self,"name"): 

3947 return self.name 

3948 

3949 if self.strRepr is None: 

3950 self.strRepr = "[" + _ustr(self.expr) + "]..." 

3951 

3952 return self.strRepr 

3953 

3954class _NullToken(object): 

3955 def __bool__(self): 

3956 return False 

3957 __nonzero__ = __bool__ 

3958 def __str__(self): 

3959 return "" 

3960 

3961_optionalNotMatched = _NullToken() 

3962class Optional(ParseElementEnhance): 

3963 """ 

3964 Optional matching of the given expression. 

3965 

3966 Parameters: 

3967 - expr - expression that must match zero or more times 

3968 - default (optional) - value to be returned if the optional expression is not found. 

3969 

3970 Example:: 

3971 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 

3972 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4))) 

3973 zip.runTests(''' 

3974 # traditional ZIP code 

3975 12345 

3976  

3977 # ZIP+4 form 

3978 12101-0001 

3979  

3980 # invalid ZIP 

3981 98765- 

3982 ''') 

3983 prints:: 

3984 # traditional ZIP code 

3985 12345 

3986 ['12345'] 

3987 

3988 # ZIP+4 form 

3989 12101-0001 

3990 ['12101-0001'] 

3991 

3992 # invalid ZIP 

3993 98765- 

3994 ^ 

3995 FAIL: Expected end of text (at char 5), (line:1, col:6) 

3996 """ 

3997 def __init__( self, expr, default=_optionalNotMatched ): 

3998 super(Optional,self).__init__( expr, savelist=False ) 

3999 self.saveAsList = self.expr.saveAsList 

4000 self.defaultValue = default 

4001 self.mayReturnEmpty = True 

4002 

4003 def parseImpl( self, instring, loc, doActions=True ): 

4004 try: 

4005 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 

4006 except (ParseException,IndexError): 

4007 if self.defaultValue is not _optionalNotMatched: 4007 ↛ 4008line 4007 didn't jump to line 4008, because the condition on line 4007 was never true

4008 if self.expr.resultsName: 

4009 tokens = ParseResults([ self.defaultValue ]) 

4010 tokens[self.expr.resultsName] = self.defaultValue 

4011 else: 

4012 tokens = [ self.defaultValue ] 

4013 else: 

4014 tokens = [] 

4015 return loc, tokens 

4016 

4017 def __str__( self ): 

4018 if hasattr(self,"name"): 

4019 return self.name 

4020 

4021 if self.strRepr is None: 

4022 self.strRepr = "[" + _ustr(self.expr) + "]" 

4023 

4024 return self.strRepr 

4025 

4026class SkipTo(ParseElementEnhance): 

4027 """ 

4028 Token for skipping over all undefined text until the matched expression is found. 

4029 

4030 Parameters: 

4031 - expr - target expression marking the end of the data to be skipped 

4032 - include - (default=C{False}) if True, the target expression is also parsed  

4033 (the skipped text and target expression are returned as a 2-element list). 

4034 - ignore - (default=C{None}) used to define grammars (typically quoted strings and  

4035 comments) that might contain false matches to the target expression 

4036 - failOn - (default=C{None}) define expressions that are not allowed to be  

4037 included in the skipped test; if found before the target expression is found,  

4038 the SkipTo is not a match 

4039 

4040 Example:: 

4041 report = ''' 

4042 Outstanding Issues Report - 1 Jan 2000 

4043 

4044 # | Severity | Description | Days Open 

4045 -----+----------+-------------------------------------------+----------- 

4046 101 | Critical | Intermittent system crash | 6 

4047 94 | Cosmetic | Spelling error on Login ('log|n') | 14 

4048 79 | Minor | System slow when running too many reports | 47 

4049 ''' 

4050 integer = Word(nums) 

4051 SEP = Suppress('|') 

4052 # use SkipTo to simply match everything up until the next SEP 

4053 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 

4054 # - parse action will call token.strip() for each matched token, i.e., the description body 

4055 string_data = SkipTo(SEP, ignore=quotedString) 

4056 string_data.setParseAction(tokenMap(str.strip)) 

4057 ticket_expr = (integer("issue_num") + SEP  

4058 + string_data("sev") + SEP  

4059 + string_data("desc") + SEP  

4060 + integer("days_open")) 

4061  

4062 for tkt in ticket_expr.searchString(report): 

4063 print tkt.dump() 

4064 prints:: 

4065 ['101', 'Critical', 'Intermittent system crash', '6'] 

4066 - days_open: 6 

4067 - desc: Intermittent system crash 

4068 - issue_num: 101 

4069 - sev: Critical 

4070 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 

4071 - days_open: 14 

4072 - desc: Spelling error on Login ('log|n') 

4073 - issue_num: 94 

4074 - sev: Cosmetic 

4075 ['79', 'Minor', 'System slow when running too many reports', '47'] 

4076 - days_open: 47 

4077 - desc: System slow when running too many reports 

4078 - issue_num: 79 

4079 - sev: Minor 

4080 """ 

4081 def __init__( self, other, include=False, ignore=None, failOn=None ): 

4082 super( SkipTo, self ).__init__( other ) 

4083 self.ignoreExpr = ignore 

4084 self.mayReturnEmpty = True 

4085 self.mayIndexError = False 

4086 self.includeMatch = include 

4087 self.asList = False 

4088 if isinstance(failOn, basestring): 

4089 self.failOn = ParserElement._literalStringClass(failOn) 

4090 else: 

4091 self.failOn = failOn 

4092 self.errmsg = "No match found for "+_ustr(self.expr) 

4093 

4094 def parseImpl( self, instring, loc, doActions=True ): 

4095 startloc = loc 

4096 instrlen = len(instring) 

4097 expr = self.expr 

4098 expr_parse = self.expr._parse 

4099 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None 

4100 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None 

4101 

4102 tmploc = loc 

4103 while tmploc <= instrlen: 

4104 if self_failOn_canParseNext is not None: 

4105 # break if failOn expression matches 

4106 if self_failOn_canParseNext(instring, tmploc): 

4107 break 

4108 

4109 if self_ignoreExpr_tryParse is not None: 

4110 # advance past ignore expressions 

4111 while 1: 

4112 try: 

4113 tmploc = self_ignoreExpr_tryParse(instring, tmploc) 

4114 except ParseBaseException: 

4115 break 

4116 

4117 try: 

4118 expr_parse(instring, tmploc, doActions=False, callPreParse=False) 

4119 except (ParseException, IndexError): 

4120 # no match, advance loc in string 

4121 tmploc += 1 

4122 else: 

4123 # matched skipto expr, done 

4124 break 

4125 

4126 else: 

4127 # ran off the end of the input string without matching skipto expr, fail 

4128 raise ParseException(instring, loc, self.errmsg, self) 

4129 

4130 # build up return values 

4131 loc = tmploc 

4132 skiptext = instring[startloc:loc] 

4133 skipresult = ParseResults(skiptext) 

4134 

4135 if self.includeMatch: 

4136 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False) 

4137 skipresult += mat 

4138 

4139 return loc, skipresult 

4140 

4141class Forward(ParseElementEnhance): 

4142 """ 

4143 Forward declaration of an expression to be defined later - 

4144 used for recursive grammars, such as algebraic infix notation. 

4145 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 

4146 

4147 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 

4148 Specifically, '|' has a lower precedence than '<<', so that:: 

4149 fwdExpr << a | b | c 

4150 will actually be evaluated as:: 

4151 (fwdExpr << a) | b | c 

4152 thereby leaving b and c out as parseable alternatives. It is recommended that you 

4153 explicitly group the values inserted into the C{Forward}:: 

4154 fwdExpr << (a | b | c) 

4155 Converting to use the '<<=' operator instead will avoid this problem. 

4156 

4157 See L{ParseResults.pprint} for an example of a recursive parser created using 

4158 C{Forward}. 

4159 """ 

4160 def __init__( self, other=None ): 

4161 super(Forward,self).__init__( other, savelist=False ) 

4162 

4163 def __lshift__( self, other ): 

4164 if isinstance( other, basestring ): 4164 ↛ 4165line 4164 didn't jump to line 4165, because the condition on line 4164 was never true

4165 other = ParserElement._literalStringClass(other) 

4166 self.expr = other 

4167 self.strRepr = None 

4168 self.mayIndexError = self.expr.mayIndexError 

4169 self.mayReturnEmpty = self.expr.mayReturnEmpty 

4170 self.setWhitespaceChars( self.expr.whiteChars ) 

4171 self.skipWhitespace = self.expr.skipWhitespace 

4172 self.saveAsList = self.expr.saveAsList 

4173 self.ignoreExprs.extend(self.expr.ignoreExprs) 

4174 return self 

4175 

4176 def __ilshift__(self, other): 

4177 return self << other 

4178 

4179 def leaveWhitespace( self ): 

4180 self.skipWhitespace = False 

4181 return self 

4182 

4183 def streamline( self ): 

4184 if not self.streamlined: 

4185 self.streamlined = True 

4186 if self.expr is not None: 4186 ↛ 4188line 4186 didn't jump to line 4188, because the condition on line 4186 was never false

4187 self.expr.streamline() 

4188 return self 

4189 

4190 def validate( self, validateTrace=[] ): 

4191 if self not in validateTrace: 

4192 tmp = validateTrace[:]+[self] 

4193 if self.expr is not None: 

4194 self.expr.validate(tmp) 

4195 self.checkRecursion([]) 

4196 

4197 def __str__( self ): 

4198 if hasattr(self,"name"): 

4199 return self.name 

4200 return self.__class__.__name__ + ": ..." 

4201 

4202 # stubbed out for now - creates awful memory and perf issues 

4203 self._revertClass = self.__class__ 

4204 self.__class__ = _ForwardNoRecurse 

4205 try: 

4206 if self.expr is not None: 

4207 retString = _ustr(self.expr) 

4208 else: 

4209 retString = "None" 

4210 finally: 

4211 self.__class__ = self._revertClass 

4212 return self.__class__.__name__ + ": " + retString 

4213 

4214 def copy(self): 

4215 if self.expr is not None: 4215 ↛ 4218line 4215 didn't jump to line 4218, because the condition on line 4215 was never false

4216 return super(Forward,self).copy() 

4217 else: 

4218 ret = Forward() 

4219 ret <<= self 

4220 return ret 

4221 

4222class _ForwardNoRecurse(Forward): 

4223 def __str__( self ): 

4224 return "..." 

4225 

4226class TokenConverter(ParseElementEnhance): 

4227 """ 

4228 Abstract subclass of C{ParseExpression}, for converting parsed results. 

4229 """ 

4230 def __init__( self, expr, savelist=False ): 

4231 super(TokenConverter,self).__init__( expr )#, savelist ) 

4232 self.saveAsList = False 

4233 

4234class Combine(TokenConverter): 

4235 """ 

4236 Converter to concatenate all matching tokens to a single string. 

4237 By default, the matching patterns must also be contiguous in the input string; 

4238 this can be disabled by specifying C{'adjacent=False'} in the constructor. 

4239 

4240 Example:: 

4241 real = Word(nums) + '.' + Word(nums) 

4242 print(real.parseString('3.1416')) # -> ['3', '.', '1416'] 

4243 # will also erroneously match the following 

4244 print(real.parseString('3. 1416')) # -> ['3', '.', '1416'] 

4245 

4246 real = Combine(Word(nums) + '.' + Word(nums)) 

4247 print(real.parseString('3.1416')) # -> ['3.1416'] 

4248 # no match when there are internal spaces 

4249 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...) 

4250 """ 

4251 def __init__( self, expr, joinString="", adjacent=True ): 

4252 super(Combine,self).__init__( expr ) 

4253 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 

4254 if adjacent: 

4255 self.leaveWhitespace() 

4256 self.adjacent = adjacent 

4257 self.skipWhitespace = True 

4258 self.joinString = joinString 

4259 self.callPreparse = True 

4260 

4261 def ignore( self, other ): 

4262 if self.adjacent: 

4263 ParserElement.ignore(self, other) 

4264 else: 

4265 super( Combine, self).ignore( other ) 

4266 return self 

4267 

4268 def postParse( self, instring, loc, tokenlist ): 

4269 retToks = tokenlist.copy() 

4270 del retToks[:] 

4271 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 

4272 

4273 if self.resultsName and retToks.haskeys(): 

4274 return [ retToks ] 

4275 else: 

4276 return retToks 

4277 

4278class Group(TokenConverter): 

4279 """ 

4280 Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions. 

4281 

4282 Example:: 

4283 ident = Word(alphas) 

4284 num = Word(nums) 

4285 term = ident | num 

4286 func = ident + Optional(delimitedList(term)) 

4287 print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100'] 

4288 

4289 func = ident + Group(Optional(delimitedList(term))) 

4290 print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']] 

4291 """ 

4292 def __init__( self, expr ): 

4293 super(Group,self).__init__( expr ) 

4294 self.saveAsList = True 

4295 

4296 def postParse( self, instring, loc, tokenlist ): 

4297 return [ tokenlist ] 

4298 

4299class Dict(TokenConverter): 

4300 """ 

4301 Converter to return a repetitive expression as a list, but also as a dictionary. 

4302 Each element can also be referenced using the first token in the expression as its key. 

4303 Useful for tabular report scraping when the first column can be used as a item key. 

4304 

4305 Example:: 

4306 data_word = Word(alphas) 

4307 label = data_word + FollowedBy(':') 

4308 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 

4309 

4310 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

4311 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 

4312  

4313 # print attributes as plain groups 

4314 print(OneOrMore(attr_expr).parseString(text).dump()) 

4315  

4316 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names 

4317 result = Dict(OneOrMore(Group(attr_expr))).parseString(text) 

4318 print(result.dump()) 

4319  

4320 # access named fields as dict entries, or output as dict 

4321 print(result['shape'])  

4322 print(result.asDict()) 

4323 prints:: 

4324 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 

4325 

4326 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

4327 - color: light blue 

4328 - posn: upper left 

4329 - shape: SQUARE 

4330 - texture: burlap 

4331 SQUARE 

4332 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} 

4333 See more examples at L{ParseResults} of accessing fields by results name. 

4334 """ 

4335 def __init__( self, expr ): 

4336 super(Dict,self).__init__( expr ) 

4337 self.saveAsList = True 

4338 

4339 def postParse( self, instring, loc, tokenlist ): 

4340 for i,tok in enumerate(tokenlist): 

4341 if len(tok) == 0: 

4342 continue 

4343 ikey = tok[0] 

4344 if isinstance(ikey,int): 

4345 ikey = _ustr(tok[0]).strip() 

4346 if len(tok)==1: 

4347 tokenlist[ikey] = _ParseResultsWithOffset("",i) 

4348 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 

4349 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 

4350 else: 

4351 dictvalue = tok.copy() #ParseResults(i) 

4352 del dictvalue[0] 

4353 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): 

4354 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 

4355 else: 

4356 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 

4357 

4358 if self.resultsName: 

4359 return [ tokenlist ] 

4360 else: 

4361 return tokenlist 

4362 

4363 

4364class Suppress(TokenConverter): 

4365 """ 

4366 Converter for ignoring the results of a parsed expression. 

4367 

4368 Example:: 

4369 source = "a, b, c,d" 

4370 wd = Word(alphas) 

4371 wd_list1 = wd + ZeroOrMore(',' + wd) 

4372 print(wd_list1.parseString(source)) 

4373 

4374 # often, delimiters that are useful during parsing are just in the 

4375 # way afterward - use Suppress to keep them out of the parsed output 

4376 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd) 

4377 print(wd_list2.parseString(source)) 

4378 prints:: 

4379 ['a', ',', 'b', ',', 'c', ',', 'd'] 

4380 ['a', 'b', 'c', 'd'] 

4381 (See also L{delimitedList}.) 

4382 """ 

4383 def postParse( self, instring, loc, tokenlist ): 

4384 return [] 

4385 

4386 def suppress( self ): 

4387 return self 

4388 

4389 

4390class OnlyOnce(object): 

4391 """ 

4392 Wrapper for parse actions, to ensure they are only called once. 

4393 """ 

4394 def __init__(self, methodCall): 

4395 self.callable = _trim_arity(methodCall) 

4396 self.called = False 

4397 def __call__(self,s,l,t): 

4398 if not self.called: 

4399 results = self.callable(s,l,t) 

4400 self.called = True 

4401 return results 

4402 raise ParseException(s,l,"") 

4403 def reset(self): 

4404 self.called = False 

4405 

4406def traceParseAction(f): 

4407 """ 

4408 Decorator for debugging parse actions.  

4409  

4410 When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".} 

4411 When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised. 

4412 

4413 Example:: 

4414 wd = Word(alphas) 

4415 

4416 @traceParseAction 

4417 def remove_duplicate_chars(tokens): 

4418 return ''.join(sorted(set(''.join(tokens)))) 

4419 

4420 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars) 

4421 print(wds.parseString("slkdjs sld sldd sdlf sdljf")) 

4422 prints:: 

4423 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 

4424 <<leaving remove_duplicate_chars (ret: 'dfjkls') 

4425 ['dfjkls'] 

4426 """ 

4427 f = _trim_arity(f) 

4428 def z(*paArgs): 

4429 thisFunc = f.__name__ 

4430 s,l,t = paArgs[-3:] 

4431 if len(paArgs)>3: 

4432 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 

4433 sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) ) 

4434 try: 

4435 ret = f(*paArgs) 

4436 except Exception as exc: 

4437 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 

4438 raise 

4439 sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) ) 

4440 return ret 

4441 try: 

4442 z.__name__ = f.__name__ 

4443 except AttributeError: 

4444 pass 

4445 return z 

4446 

4447# 

4448# global helpers 

4449# 

4450def delimitedList( expr, delim=",", combine=False ): 

4451 """ 

4452 Helper to define a delimited list of expressions - the delimiter defaults to ','. 

4453 By default, the list elements and delimiters can have intervening whitespace, and 

4454 comments, but this can be overridden by passing C{combine=True} in the constructor. 

4455 If C{combine} is set to C{True}, the matching tokens are returned as a single token 

4456 string, with the delimiters included; otherwise, the matching tokens are returned 

4457 as a list of tokens, with the delimiters suppressed. 

4458 

4459 Example:: 

4460 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc'] 

4461 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] 

4462 """ 

4463 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 

4464 if combine: 4464 ↛ 4465line 4464 didn't jump to line 4465, because the condition on line 4464 was never true

4465 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 

4466 else: 

4467 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName) 

4468 

4469def countedArray( expr, intExpr=None ): 

4470 """ 

4471 Helper to define a counted list of expressions. 

4472 This helper defines a pattern of the form:: 

4473 integer expr expr expr... 

4474 where the leading integer tells how many expr expressions follow. 

4475 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 

4476  

4477 If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value. 

4478 

4479 Example:: 

4480 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd'] 

4481 

4482 # in this parser, the leading integer value is given in binary, 

4483 # '10' indicating that 2 values are in the array 

4484 binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2)) 

4485 countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd'] 

4486 """ 

4487 arrayExpr = Forward() 

4488 def countFieldParseAction(s,l,t): 

4489 n = t[0] 

4490 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 

4491 return [] 

4492 if intExpr is None: 

4493 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 

4494 else: 

4495 intExpr = intExpr.copy() 

4496 intExpr.setName("arrayLen") 

4497 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 

4498 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...') 

4499 

4500def _flatten(L): 

4501 ret = [] 

4502 for i in L: 

4503 if isinstance(i,list): 

4504 ret.extend(_flatten(i)) 

4505 else: 

4506 ret.append(i) 

4507 return ret 

4508 

4509def matchPreviousLiteral(expr): 

4510 """ 

4511 Helper to define an expression that is indirectly defined from 

4512 the tokens matched in a previous expression, that is, it looks 

4513 for a 'repeat' of a previous expression. For example:: 

4514 first = Word(nums) 

4515 second = matchPreviousLiteral(first) 

4516 matchExpr = first + ":" + second 

4517 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 

4518 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 

4519 If this is not desired, use C{matchPreviousExpr}. 

4520 Do I{not} use with packrat parsing enabled. 

4521 """ 

4522 rep = Forward() 

4523 def copyTokenToRepeater(s,l,t): 

4524 if t: 

4525 if len(t) == 1: 

4526 rep << t[0] 

4527 else: 

4528 # flatten t tokens 

4529 tflat = _flatten(t.asList()) 

4530 rep << And(Literal(tt) for tt in tflat) 

4531 else: 

4532 rep << Empty() 

4533 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 

4534 rep.setName('(prev) ' + _ustr(expr)) 

4535 return rep 

4536 

4537def matchPreviousExpr(expr): 

4538 """ 

4539 Helper to define an expression that is indirectly defined from 

4540 the tokens matched in a previous expression, that is, it looks 

4541 for a 'repeat' of a previous expression. For example:: 

4542 first = Word(nums) 

4543 second = matchPreviousExpr(first) 

4544 matchExpr = first + ":" + second 

4545 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 

4546 expressions, will I{not} match the leading C{"1:1"} in C{"1:10"}; 

4547 the expressions are evaluated first, and then compared, so 

4548 C{"1"} is compared with C{"10"}. 

4549 Do I{not} use with packrat parsing enabled. 

4550 """ 

4551 rep = Forward() 

4552 e2 = expr.copy() 

4553 rep <<= e2 

4554 def copyTokenToRepeater(s,l,t): 

4555 matchTokens = _flatten(t.asList()) 

4556 def mustMatchTheseTokens(s,l,t): 

4557 theseTokens = _flatten(t.asList()) 

4558 if theseTokens != matchTokens: 

4559 raise ParseException("",0,"") 

4560 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 

4561 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 

4562 rep.setName('(prev) ' + _ustr(expr)) 

4563 return rep 

4564 

4565def _escapeRegexRangeChars(s): 

4566 #~ escape these chars: ^-] 

4567 for c in r"\^-]": 

4568 s = s.replace(c,_bslash+c) 

4569 s = s.replace("\n",r"\n") 

4570 s = s.replace("\t",r"\t") 

4571 return _ustr(s) 

4572 

4573def oneOf( strs, caseless=False, useRegex=True ): 

4574 """ 

4575 Helper to quickly define a set of alternative Literals, and makes sure to do 

4576 longest-first testing when there is a conflict, regardless of the input order, 

4577 but returns a C{L{MatchFirst}} for best performance. 

4578 

4579 Parameters: 

4580 - strs - a string of space-delimited literals, or a collection of string literals 

4581 - caseless - (default=C{False}) - treat all literals as caseless 

4582 - useRegex - (default=C{True}) - as an optimization, will generate a Regex 

4583 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 

4584 if creating a C{Regex} raises an exception) 

4585 

4586 Example:: 

4587 comp_oper = oneOf("< = > <= >= !=") 

4588 var = Word(alphas) 

4589 number = Word(nums) 

4590 term = var | number 

4591 comparison_expr = term + comp_oper + term 

4592 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12")) 

4593 prints:: 

4594 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] 

4595 """ 

4596 if caseless: 

4597 isequal = ( lambda a,b: a.upper() == b.upper() ) 

4598 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 

4599 parseElementClass = CaselessLiteral 

4600 else: 

4601 isequal = ( lambda a,b: a == b ) 

4602 masks = ( lambda a,b: b.startswith(a) ) 

4603 parseElementClass = Literal 

4604 

4605 symbols = [] 

4606 if isinstance(strs,basestring): 

4607 symbols = strs.split() 

4608 elif isinstance(strs, Iterable): 

4609 symbols = list(strs) 

4610 else: 

4611 warnings.warn("Invalid argument to oneOf, expected string or iterable", 

4612 SyntaxWarning, stacklevel=2) 

4613 if not symbols: 

4614 return NoMatch() 

4615 

4616 i = 0 

4617 while i < len(symbols)-1: 

4618 cur = symbols[i] 

4619 for j,other in enumerate(symbols[i+1:]): 

4620 if ( isequal(other, cur) ): 

4621 del symbols[i+j+1] 

4622 break 

4623 elif ( masks(cur, other) ): 

4624 del symbols[i+j+1] 

4625 symbols.insert(i,other) 

4626 cur = other 

4627 break 

4628 else: 

4629 i += 1 

4630 

4631 if not caseless and useRegex: 

4632 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 

4633 try: 

4634 if len(symbols)==len("".join(symbols)): 

4635 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols)) 

4636 else: 

4637 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols)) 

4638 except Exception: 

4639 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 

4640 SyntaxWarning, stacklevel=2) 

4641 

4642 

4643 # last resort, just use MatchFirst 

4644 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols)) 

4645 

4646def dictOf( key, value ): 

4647 """ 

4648 Helper to easily and clearly define a dictionary by specifying the respective patterns 

4649 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 

4650 in the proper order. The key pattern can include delimiting markers or punctuation, 

4651 as long as they are suppressed, thereby leaving the significant key text. The value 

4652 pattern can include named results, so that the C{Dict} results can include named token 

4653 fields. 

4654 

4655 Example:: 

4656 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

4657 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 

4658 print(OneOrMore(attr_expr).parseString(text).dump()) 

4659  

4660 attr_label = label 

4661 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join) 

4662 

4663 # similar to Dict, but simpler call format 

4664 result = dictOf(attr_label, attr_value).parseString(text) 

4665 print(result.dump()) 

4666 print(result['shape']) 

4667 print(result.shape) # object attribute access works too 

4668 print(result.asDict()) 

4669 prints:: 

4670 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

4671 - color: light blue 

4672 - posn: upper left 

4673 - shape: SQUARE 

4674 - texture: burlap 

4675 SQUARE 

4676 SQUARE 

4677 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} 

4678 """ 

4679 return Dict( ZeroOrMore( Group ( key + value ) ) ) 

4680 

4681def originalTextFor(expr, asString=True): 

4682 """ 

4683 Helper to return the original, untokenized text for a given expression. Useful to 

4684 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 

4685 revert separate tokens with intervening whitespace back to the original matching 

4686 input text. By default, returns astring containing the original parsed text.  

4687  

4688 If the optional C{asString} argument is passed as C{False}, then the return value is a  

4689 C{L{ParseResults}} containing any results names that were originally matched, and a  

4690 single token containing the original matched text from the input string. So if  

4691 the expression passed to C{L{originalTextFor}} contains expressions with defined 

4692 results names, you must set C{asString} to C{False} if you want to preserve those 

4693 results name values. 

4694 

4695 Example:: 

4696 src = "this is test <b> bold <i>text</i> </b> normal text " 

4697 for tag in ("b","i"): 

4698 opener,closer = makeHTMLTags(tag) 

4699 patt = originalTextFor(opener + SkipTo(closer) + closer) 

4700 print(patt.searchString(src)[0]) 

4701 prints:: 

4702 ['<b> bold <i>text</i> </b>'] 

4703 ['<i>text</i>'] 

4704 """ 

4705 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 

4706 endlocMarker = locMarker.copy() 

4707 endlocMarker.callPreparse = False 

4708 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 

4709 if asString: 4709 ↛ 4712line 4709 didn't jump to line 4712, because the condition on line 4709 was never false

4710 extractText = lambda s,l,t: s[t._original_start:t._original_end] 4710 ↛ exitline 4710 didn't run the lambda on line 4710

4711 else: 

4712 def extractText(s,l,t): 

4713 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]] 

4714 matchExpr.setParseAction(extractText) 

4715 matchExpr.ignoreExprs = expr.ignoreExprs 

4716 return matchExpr 

4717 

4718def ungroup(expr): 

4719 """ 

4720 Helper to undo pyparsing's default grouping of And expressions, even 

4721 if all but one are non-empty. 

4722 """ 

4723 return TokenConverter(expr).setParseAction(lambda t:t[0]) 

4724 

4725def locatedExpr(expr): 

4726 """ 

4727 Helper to decorate a returned token with its starting and ending locations in the input string. 

4728 This helper adds the following results names: 

4729 - locn_start = location where matched expression begins 

4730 - locn_end = location where matched expression ends 

4731 - value = the actual parsed results 

4732 

4733 Be careful if the input text contains C{<TAB>} characters, you may want to call 

4734 C{L{ParserElement.parseWithTabs}} 

4735 

4736 Example:: 

4737 wd = Word(alphas) 

4738 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): 

4739 print(match) 

4740 prints:: 

4741 [[0, 'ljsdf', 5]] 

4742 [[8, 'lksdjjf', 15]] 

4743 [[18, 'lkkjj', 23]] 

4744 """ 

4745 locator = Empty().setParseAction(lambda s,l,t: l) 

4746 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end")) 

4747 

4748 

4749# convenience constants for positional expressions 

4750empty = Empty().setName("empty") 

4751lineStart = LineStart().setName("lineStart") 

4752lineEnd = LineEnd().setName("lineEnd") 

4753stringStart = StringStart().setName("stringStart") 

4754stringEnd = StringEnd().setName("stringEnd") 

4755 

4756_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 4756 ↛ exitline 4756 didn't run the lambda on line 4756

4757_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 

4758_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 4758 ↛ exitline 4758 didn't run the lambda on line 4758

4759_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1) 

4760_charRange = Group(_singleChar + Suppress("-") + _singleChar) 

4761_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" 

4762 

4763def srange(s): 

4764 r""" 

4765 Helper to easily define string ranges for use in Word construction. Borrows 

4766 syntax from regexp '[]' string range definitions:: 

4767 srange("[0-9]") -> "0123456789" 

4768 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 

4769 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 

4770 The input string must be enclosed in []'s, and the returned string is the expanded 

4771 character set joined into a single string. 

4772 The values enclosed in the []'s may be: 

4773 - a single character 

4774 - an escaped character with a leading backslash (such as C{\-} or C{\]}) 

4775 - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)  

4776 (C{\0x##} is also supported for backwards compatibility)  

4777 - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character) 

4778 - a range of any of the above, separated by a dash (C{'a-z'}, etc.) 

4779 - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.) 

4780 """ 

4781 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) 

4782 try: 

4783 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 

4784 except Exception: 

4785 return "" 

4786 

4787def matchOnlyAtCol(n): 

4788 """ 

4789 Helper method for defining parse actions that require matching at a specific 

4790 column in the input text. 

4791 """ 

4792 def verifyCol(strg,locn,toks): 

4793 if col(locn,strg) != n: 

4794 raise ParseException(strg,locn,"matched token not at column %d" % n) 

4795 return verifyCol 

4796 

4797def replaceWith(replStr): 

4798 """ 

4799 Helper method for common parse actions that simply return a literal value. Especially 

4800 useful when used with C{L{transformString<ParserElement.transformString>}()}. 

4801 

4802 Example:: 

4803 num = Word(nums).setParseAction(lambda toks: int(toks[0])) 

4804 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan)) 

4805 term = na | num 

4806  

4807 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234] 

4808 """ 

4809 return lambda s,l,t: [replStr] 

4810 

4811def removeQuotes(s,l,t): 

4812 """ 

4813 Helper parse action for removing quotation marks from parsed quoted strings. 

4814 

4815 Example:: 

4816 # by default, quotation marks are included in parsed results 

4817 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] 

4818 

4819 # use removeQuotes to strip quotation marks from parsed results 

4820 quotedString.setParseAction(removeQuotes) 

4821 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] 

4822 """ 

4823 return t[0][1:-1] 

4824 

4825def tokenMap(func, *args): 

4826 """ 

4827 Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional  

4828 args are passed, they are forwarded to the given function as additional arguments after 

4829 the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the 

4830 parsed data to an integer using base 16. 

4831 

4832 Example (compare the last to example in L{ParserElement.transformString}:: 

4833 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16)) 

4834 hex_ints.runTests(''' 

4835 00 11 22 aa FF 0a 0d 1a 

4836 ''') 

4837  

4838 upperword = Word(alphas).setParseAction(tokenMap(str.upper)) 

4839 OneOrMore(upperword).runTests(''' 

4840 my kingdom for a horse 

4841 ''') 

4842 

4843 wd = Word(alphas).setParseAction(tokenMap(str.title)) 

4844 OneOrMore(wd).setParseAction(' '.join).runTests(''' 

4845 now is the winter of our discontent made glorious summer by this sun of york 

4846 ''') 

4847 prints:: 

4848 00 11 22 aa FF 0a 0d 1a 

4849 [0, 17, 34, 170, 255, 10, 13, 26] 

4850 

4851 my kingdom for a horse 

4852 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 

4853 

4854 now is the winter of our discontent made glorious summer by this sun of york 

4855 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 

4856 """ 

4857 def pa(s,l,t): 

4858 return [func(tokn, *args) for tokn in t] 

4859 

4860 try: 

4861 func_name = getattr(func, '__name__', 

4862 getattr(func, '__class__').__name__) 

4863 except Exception: 

4864 func_name = str(func) 

4865 pa.__name__ = func_name 

4866 

4867 return pa 

4868 

4869upcaseTokens = tokenMap(lambda t: _ustr(t).upper()) 4869 ↛ exitline 4869 didn't run the lambda on line 4869

4870"""(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}""" 

4871 

4872downcaseTokens = tokenMap(lambda t: _ustr(t).lower()) 4872 ↛ exitline 4872 didn't run the lambda on line 4872

4873"""(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}""" 

4874 

4875def _makeTags(tagStr, xml): 

4876 """Internal helper to construct opening and closing tag expressions, given a tag name""" 

4877 if isinstance(tagStr,basestring): 4877 ↛ 4878line 4877 didn't jump to line 4878, because the condition on line 4877 was never true

4878 resname = tagStr 

4879 tagStr = Keyword(tagStr, caseless=not xml) 

4880 else: 

4881 resname = tagStr.name 

4882 

4883 tagAttrName = Word(alphas,alphanums+"_-:") 

4884 if (xml): 4884 ↛ 4885line 4884 didn't jump to line 4885, because the condition on line 4884 was never true

4885 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 

4886 openTag = Suppress("<") + tagStr("tag") + \ 

4887 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 

4888 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 

4889 else: 

4890 printablesLessRAbrack = "".join(c for c in printables if c not in ">") 

4891 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 

4892 openTag = Suppress("<") + tagStr("tag") + \ 4892 ↛ exitline 4892 didn't jump to the function exit

4893 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 

4894 Optional( Suppress("=") + tagAttrValue ) ))) + \ 

4895 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 

4896 closeTag = Combine(_L("</") + tagStr + ">") 

4897 

4898 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname) 

4899 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname) 

4900 openTag.tag = resname 

4901 closeTag.tag = resname 

4902 return openTag, closeTag 

4903 

4904def makeHTMLTags(tagStr): 

4905 """ 

4906 Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches 

4907 tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values. 

4908 

4909 Example:: 

4910 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>' 

4911 # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple 

4912 a,a_end = makeHTMLTags("A") 

4913 link_expr = a + SkipTo(a_end)("link_text") + a_end 

4914  

4915 for link in link_expr.searchString(text): 

4916 # attributes in the <A> tag (like "href" shown here) are also accessible as named results 

4917 print(link.link_text, '->', link.href) 

4918 prints:: 

4919 pyparsing -> http://pyparsing.wikispaces.com 

4920 """ 

4921 return _makeTags( tagStr, False ) 

4922 

4923def makeXMLTags(tagStr): 

4924 """ 

4925 Helper to construct opening and closing tag expressions for XML, given a tag name. Matches 

4926 tags only in the given upper/lower case. 

4927 

4928 Example: similar to L{makeHTMLTags} 

4929 """ 

4930 return _makeTags( tagStr, True ) 

4931 

4932def withAttribute(*args,**attrDict): 

4933 """ 

4934 Helper to create a validating parse action to be used with start tags created 

4935 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 

4936 with a required attribute value, to avoid false matches on common tags such as 

4937 C{<TD>} or C{<DIV>}. 

4938 

4939 Call C{withAttribute} with a series of attribute names and values. Specify the list 

4940 of filter attributes names and values as: 

4941 - keyword arguments, as in C{(align="right")}, or 

4942 - as an explicit dict with C{**} operator, when an attribute name is also a Python 

4943 reserved word, as in C{**{"class":"Customer", "align":"right"}} 

4944 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 

4945 For attribute names with a namespace prefix, you must use the second form. Attribute 

4946 names are matched insensitive to upper/lower case. 

4947  

4948 If just testing for C{class} (with or without a namespace), use C{L{withClass}}. 

4949 

4950 To verify that the attribute exists, but without specifying a value, pass 

4951 C{withAttribute.ANY_VALUE} as the value. 

4952 

4953 Example:: 

4954 html = ''' 

4955 <div> 

4956 Some text 

4957 <div type="grid">1 4 0 1 0</div> 

4958 <div type="graph">1,3 2,3 1,1</div> 

4959 <div>this has no type</div> 

4960 </div> 

4961  

4962 ''' 

4963 div,div_end = makeHTMLTags("div") 

4964 

4965 # only match div tag having a type attribute with value "grid" 

4966 div_grid = div().setParseAction(withAttribute(type="grid")) 

4967 grid_expr = div_grid + SkipTo(div | div_end)("body") 

4968 for grid_header in grid_expr.searchString(html): 

4969 print(grid_header.body) 

4970  

4971 # construct a match with any div tag having a type attribute, regardless of the value 

4972 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE)) 

4973 div_expr = div_any_type + SkipTo(div | div_end)("body") 

4974 for div_header in div_expr.searchString(html): 

4975 print(div_header.body) 

4976 prints:: 

4977 1 4 0 1 0 

4978 

4979 1 4 0 1 0 

4980 1,3 2,3 1,1 

4981 """ 

4982 if args: 

4983 attrs = args[:] 

4984 else: 

4985 attrs = attrDict.items() 

4986 attrs = [(k,v) for k,v in attrs] 

4987 def pa(s,l,tokens): 

4988 for attrName,attrValue in attrs: 

4989 if attrName not in tokens: 

4990 raise ParseException(s,l,"no matching attribute " + attrName) 

4991 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 

4992 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 

4993 (attrName, tokens[attrName], attrValue)) 

4994 return pa 

4995withAttribute.ANY_VALUE = object() 

4996 

4997def withClass(classname, namespace=''): 

4998 """ 

4999 Simplified version of C{L{withAttribute}} when matching on a div class - made 

5000 difficult because C{class} is a reserved word in Python. 

5001 

5002 Example:: 

5003 html = ''' 

5004 <div> 

5005 Some text 

5006 <div class="grid">1 4 0 1 0</div> 

5007 <div class="graph">1,3 2,3 1,1</div> 

5008 <div>this &lt;div&gt; has no class</div> 

5009 </div> 

5010  

5011 ''' 

5012 div,div_end = makeHTMLTags("div") 

5013 div_grid = div().setParseAction(withClass("grid")) 

5014  

5015 grid_expr = div_grid + SkipTo(div | div_end)("body") 

5016 for grid_header in grid_expr.searchString(html): 

5017 print(grid_header.body) 

5018  

5019 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE)) 

5020 div_expr = div_any_type + SkipTo(div | div_end)("body") 

5021 for div_header in div_expr.searchString(html): 

5022 print(div_header.body) 

5023 prints:: 

5024 1 4 0 1 0 

5025 

5026 1 4 0 1 0 

5027 1,3 2,3 1,1 

5028 """ 

5029 classattr = "%s:class" % namespace if namespace else "class" 

5030 return withAttribute(**{classattr : classname}) 

5031 

5032opAssoc = _Constants() 

5033opAssoc.LEFT = object() 

5034opAssoc.RIGHT = object() 

5035 

5036def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ): 

5037 """ 

5038 Helper method for constructing grammars of expressions made up of 

5039 operators working in a precedence hierarchy. Operators may be unary or 

5040 binary, left- or right-associative. Parse actions can also be attached 

5041 to operator expressions. The generated parser will also recognize the use  

5042 of parentheses to override operator precedences (see example below). 

5043  

5044 Note: if you define a deep operator list, you may see performance issues 

5045 when using infixNotation. See L{ParserElement.enablePackrat} for a 

5046 mechanism to potentially improve your parser performance. 

5047 

5048 Parameters: 

5049 - baseExpr - expression representing the most basic element for the nested 

5050 - opList - list of tuples, one for each operator precedence level in the 

5051 expression grammar; each tuple is of the form 

5052 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 

5053 - opExpr is the pyparsing expression for the operator; 

5054 may also be a string, which will be converted to a Literal; 

5055 if numTerms is 3, opExpr is a tuple of two expressions, for the 

5056 two operators separating the 3 terms 

5057 - numTerms is the number of terms for this operator (must 

5058 be 1, 2, or 3) 

5059 - rightLeftAssoc is the indicator whether the operator is 

5060 right or left associative, using the pyparsing-defined 

5061 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 

5062 - parseAction is the parse action to be associated with 

5063 expressions matching this operator expression (the 

5064 parse action tuple member may be omitted); if the parse action 

5065 is passed a tuple or list of functions, this is equivalent to 

5066 calling C{setParseAction(*fn)} (L{ParserElement.setParseAction}) 

5067 - lpar - expression for matching left-parentheses (default=C{Suppress('(')}) 

5068 - rpar - expression for matching right-parentheses (default=C{Suppress(')')}) 

5069 

5070 Example:: 

5071 # simple example of four-function arithmetic with ints and variable names 

5072 integer = pyparsing_common.signed_integer 

5073 varname = pyparsing_common.identifier  

5074  

5075 arith_expr = infixNotation(integer | varname, 

5076 [ 

5077 ('-', 1, opAssoc.RIGHT), 

5078 (oneOf('* /'), 2, opAssoc.LEFT), 

5079 (oneOf('+ -'), 2, opAssoc.LEFT), 

5080 ]) 

5081  

5082 arith_expr.runTests(''' 

5083 5+3*6 

5084 (5+3)*6 

5085 -2--11 

5086 ''', fullDump=False) 

5087 prints:: 

5088 5+3*6 

5089 [[5, '+', [3, '*', 6]]] 

5090 

5091 (5+3)*6 

5092 [[[5, '+', 3], '*', 6]] 

5093 

5094 -2--11 

5095 [[['-', 2], '-', ['-', 11]]] 

5096 """ 

5097 ret = Forward() 

5098 lastExpr = baseExpr | ( lpar + ret + rpar ) 

5099 for i,operDef in enumerate(opList): 

5100 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 

5101 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr 

5102 if arity == 3: 

5103 if opExpr is None or len(opExpr) != 2: 

5104 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 

5105 opExpr1, opExpr2 = opExpr 

5106 thisExpr = Forward().setName(termName) 

5107 if rightLeftAssoc == opAssoc.LEFT: 

5108 if arity == 1: 

5109 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 

5110 elif arity == 2: 

5111 if opExpr is not None: 

5112 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 

5113 else: 

5114 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 

5115 elif arity == 3: 

5116 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 

5117 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 

5118 else: 

5119 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 

5120 elif rightLeftAssoc == opAssoc.RIGHT: 

5121 if arity == 1: 

5122 # try to avoid LR with this extra test 

5123 if not isinstance(opExpr, Optional): 

5124 opExpr = Optional(opExpr) 

5125 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 

5126 elif arity == 2: 

5127 if opExpr is not None: 

5128 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 

5129 else: 

5130 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 

5131 elif arity == 3: 

5132 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 

5133 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 

5134 else: 

5135 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 

5136 else: 

5137 raise ValueError("operator must indicate right or left associativity") 

5138 if pa: 

5139 if isinstance(pa, (tuple, list)): 

5140 matchExpr.setParseAction(*pa) 

5141 else: 

5142 matchExpr.setParseAction(pa) 

5143 thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) 

5144 lastExpr = thisExpr 

5145 ret <<= lastExpr 

5146 return ret 

5147 

5148operatorPrecedence = infixNotation 

5149"""(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release.""" 

5150 

5151dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes") 

5152sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes") 

5153quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'| 

5154 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes") 

5155unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal") 

5156 

5157def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()): 

5158 """ 

5159 Helper method for defining nested lists enclosed in opening and closing 

5160 delimiters ("(" and ")" are the default). 

5161 

5162 Parameters: 

5163 - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression 

5164 - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression 

5165 - content - expression for items within the nested lists (default=C{None}) 

5166 - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString}) 

5167 

5168 If an expression is not provided for the content argument, the nested 

5169 expression will capture all whitespace-delimited content between delimiters 

5170 as a list of separate values. 

5171 

5172 Use the C{ignoreExpr} argument to define expressions that may contain 

5173 opening or closing characters that should not be treated as opening 

5174 or closing characters for nesting, such as quotedString or a comment 

5175 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 

5176 The default is L{quotedString}, but if no expressions are to be ignored, 

5177 then pass C{None} for this argument. 

5178 

5179 Example:: 

5180 data_type = oneOf("void int short long char float double") 

5181 decl_data_type = Combine(data_type + Optional(Word('*'))) 

5182 ident = Word(alphas+'_', alphanums+'_') 

5183 number = pyparsing_common.number 

5184 arg = Group(decl_data_type + ident) 

5185 LPAR,RPAR = map(Suppress, "()") 

5186 

5187 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment)) 

5188 

5189 c_function = (decl_data_type("type")  

5190 + ident("name") 

5191 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR  

5192 + code_body("body")) 

5193 c_function.ignore(cStyleComment) 

5194  

5195 source_code = ''' 

5196 int is_odd(int x) {  

5197 return (x%2);  

5198 } 

5199  

5200 int dec_to_hex(char hchar) {  

5201 if (hchar >= '0' && hchar <= '9') {  

5202 return (ord(hchar)-ord('0'));  

5203 } else {  

5204 return (10+ord(hchar)-ord('A')); 

5205 }  

5206 } 

5207 ''' 

5208 for func in c_function.searchString(source_code): 

5209 print("%(name)s (%(type)s) args: %(args)s" % func) 

5210 

5211 prints:: 

5212 is_odd (int) args: [['int', 'x']] 

5213 dec_to_hex (int) args: [['char', 'hchar']] 

5214 """ 

5215 if opener == closer: 

5216 raise ValueError("opening and closing strings cannot be the same") 

5217 if content is None: 

5218 if isinstance(opener,basestring) and isinstance(closer,basestring): 

5219 if len(opener) == 1 and len(closer)==1: 

5220 if ignoreExpr is not None: 

5221 content = (Combine(OneOrMore(~ignoreExpr + 

5222 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 

5223 ).setParseAction(lambda t:t[0].strip())) 

5224 else: 

5225 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 

5226 ).setParseAction(lambda t:t[0].strip())) 

5227 else: 

5228 if ignoreExpr is not None: 

5229 content = (Combine(OneOrMore(~ignoreExpr + 

5230 ~Literal(opener) + ~Literal(closer) + 

5231 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 

5232 ).setParseAction(lambda t:t[0].strip())) 

5233 else: 

5234 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 

5235 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 

5236 ).setParseAction(lambda t:t[0].strip())) 

5237 else: 

5238 raise ValueError("opening and closing arguments must be strings if no content expression is given") 

5239 ret = Forward() 

5240 if ignoreExpr is not None: 

5241 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 

5242 else: 

5243 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 

5244 ret.setName('nested %s%s expression' % (opener,closer)) 

5245 return ret 

5246 

5247def indentedBlock(blockStatementExpr, indentStack, indent=True): 

5248 """ 

5249 Helper method for defining space-delimited indentation blocks, such as 

5250 those used to define block statements in Python source code. 

5251 

5252 Parameters: 

5253 - blockStatementExpr - expression defining syntax of statement that 

5254 is repeated within the indented block 

5255 - indentStack - list created by caller to manage indentation stack 

5256 (multiple statementWithIndentedBlock expressions within a single grammar 

5257 should share a common indentStack) 

5258 - indent - boolean indicating whether block must be indented beyond the 

5259 the current level; set to False for block of left-most statements 

5260 (default=C{True}) 

5261 

5262 A valid block must contain at least one C{blockStatement}. 

5263 

5264 Example:: 

5265 data = ''' 

5266 def A(z): 

5267 A1 

5268 B = 100 

5269 G = A2 

5270 A2 

5271 A3 

5272 B 

5273 def BB(a,b,c): 

5274 BB1 

5275 def BBA(): 

5276 bba1 

5277 bba2 

5278 bba3 

5279 C 

5280 D 

5281 def spam(x,y): 

5282 def eggs(z): 

5283 pass 

5284 ''' 

5285 

5286 

5287 indentStack = [1] 

5288 stmt = Forward() 

5289 

5290 identifier = Word(alphas, alphanums) 

5291 funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":") 

5292 func_body = indentedBlock(stmt, indentStack) 

5293 funcDef = Group( funcDecl + func_body ) 

5294 

5295 rvalue = Forward() 

5296 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") 

5297 rvalue << (funcCall | identifier | Word(nums)) 

5298 assignment = Group(identifier + "=" + rvalue) 

5299 stmt << ( funcDef | assignment | identifier ) 

5300 

5301 module_body = OneOrMore(stmt) 

5302 

5303 parseTree = module_body.parseString(data) 

5304 parseTree.pprint() 

5305 prints:: 

5306 [['def', 

5307 'A', 

5308 ['(', 'z', ')'], 

5309 ':', 

5310 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 

5311 'B', 

5312 ['def', 

5313 'BB', 

5314 ['(', 'a', 'b', 'c', ')'], 

5315 ':', 

5316 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 

5317 'C', 

5318 'D', 

5319 ['def', 

5320 'spam', 

5321 ['(', 'x', 'y', ')'], 

5322 ':', 

5323 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]  

5324 """ 

5325 def checkPeerIndent(s,l,t): 

5326 if l >= len(s): return 

5327 curCol = col(l,s) 

5328 if curCol != indentStack[-1]: 

5329 if curCol > indentStack[-1]: 

5330 raise ParseFatalException(s,l,"illegal nesting") 

5331 raise ParseException(s,l,"not a peer entry") 

5332 

5333 def checkSubIndent(s,l,t): 

5334 curCol = col(l,s) 

5335 if curCol > indentStack[-1]: 

5336 indentStack.append( curCol ) 

5337 else: 

5338 raise ParseException(s,l,"not a subentry") 

5339 

5340 def checkUnindent(s,l,t): 

5341 if l >= len(s): return 

5342 curCol = col(l,s) 

5343 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 

5344 raise ParseException(s,l,"not an unindent") 

5345 indentStack.pop() 

5346 

5347 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 

5348 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') 

5349 PEER = Empty().setParseAction(checkPeerIndent).setName('') 

5350 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') 

5351 if indent: 

5352 smExpr = Group( Optional(NL) + 

5353 #~ FollowedBy(blockStatementExpr) + 

5354 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 

5355 else: 

5356 smExpr = Group( Optional(NL) + 

5357 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 

5358 blockStatementExpr.ignore(_bslash + LineEnd()) 

5359 return smExpr.setName('indented block') 

5360 

5361alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 

5362punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 

5363 

5364anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) 

5365_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) 

5366commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity") 

5367def replaceHTMLEntity(t): 

5368 """Helper parser action to replace common HTML entities with their special characters""" 

5369 return _htmlEntityMap.get(t.entity) 

5370 

5371# it's easy to get these comment structures wrong - they're very common, so may as well make them available 

5372cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") 

5373"Comment of the form C{/* ... */}" 

5374 

5375htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment") 

5376"Comment of the form C{<!-- ... -->}" 

5377 

5378restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") 

5379dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") 

5380"Comment of the form C{// ... (to end of line)}" 

5381 

5382cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment") 

5383"Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}" 

5384 

5385javaStyleComment = cppStyleComment 

5386"Same as C{L{cppStyleComment}}" 

5387 

5388pythonStyleComment = Regex(r"#.*").setName("Python style comment") 

5389"Comment of the form C{# ... (to end of line)}" 

5390 

5391_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + 

5392 Optional( Word(" \t") + 

5393 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 

5394commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 

5395"""(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas. 

5396 This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}.""" 

5397 

5398# some other useful expressions - using lower-case class name since we are really using this as a namespace 

5399class pyparsing_common: 

5400 """ 

5401 Here are some common low-level expressions that may be useful in jump-starting parser development: 

5402 - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>}) 

5403 - common L{programming identifiers<identifier>} 

5404 - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>}) 

5405 - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>} 

5406 - L{UUID<uuid>} 

5407 - L{comma-separated list<comma_separated_list>} 

5408 Parse actions: 

5409 - C{L{convertToInteger}} 

5410 - C{L{convertToFloat}} 

5411 - C{L{convertToDate}} 

5412 - C{L{convertToDatetime}} 

5413 - C{L{stripHTMLTags}} 

5414 - C{L{upcaseTokens}} 

5415 - C{L{downcaseTokens}} 

5416 

5417 Example:: 

5418 pyparsing_common.number.runTests(''' 

5419 # any int or real number, returned as the appropriate type 

5420 100 

5421 -100 

5422 +100 

5423 3.14159 

5424 6.02e23 

5425 1e-12 

5426 ''') 

5427 

5428 pyparsing_common.fnumber.runTests(''' 

5429 # any int or real number, returned as float 

5430 100 

5431 -100 

5432 +100 

5433 3.14159 

5434 6.02e23 

5435 1e-12 

5436 ''') 

5437 

5438 pyparsing_common.hex_integer.runTests(''' 

5439 # hex numbers 

5440 100 

5441 FF 

5442 ''') 

5443 

5444 pyparsing_common.fraction.runTests(''' 

5445 # fractions 

5446 1/2 

5447 -3/4 

5448 ''') 

5449 

5450 pyparsing_common.mixed_integer.runTests(''' 

5451 # mixed fractions 

5452 1 

5453 1/2 

5454 -3/4 

5455 1-3/4 

5456 ''') 

5457 

5458 import uuid 

5459 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 

5460 pyparsing_common.uuid.runTests(''' 

5461 # uuid 

5462 12345678-1234-5678-1234-567812345678 

5463 ''') 

5464 prints:: 

5465 # any int or real number, returned as the appropriate type 

5466 100 

5467 [100] 

5468 

5469 -100 

5470 [-100] 

5471 

5472 +100 

5473 [100] 

5474 

5475 3.14159 

5476 [3.14159] 

5477 

5478 6.02e23 

5479 [6.02e+23] 

5480 

5481 1e-12 

5482 [1e-12] 

5483 

5484 # any int or real number, returned as float 

5485 100 

5486 [100.0] 

5487 

5488 -100 

5489 [-100.0] 

5490 

5491 +100 

5492 [100.0] 

5493 

5494 3.14159 

5495 [3.14159] 

5496 

5497 6.02e23 

5498 [6.02e+23] 

5499 

5500 1e-12 

5501 [1e-12] 

5502 

5503 # hex numbers 

5504 100 

5505 [256] 

5506 

5507 FF 

5508 [255] 

5509 

5510 # fractions 

5511 1/2 

5512 [0.5] 

5513 

5514 -3/4 

5515 [-0.75] 

5516 

5517 # mixed fractions 

5518 1 

5519 [1] 

5520 

5521 1/2 

5522 [0.5] 

5523 

5524 -3/4 

5525 [-0.75] 

5526 

5527 1-3/4 

5528 [1.75] 

5529 

5530 # uuid 

5531 12345678-1234-5678-1234-567812345678 

5532 [UUID('12345678-1234-5678-1234-567812345678')] 

5533 """ 

5534 

5535 convertToInteger = tokenMap(int) 

5536 """ 

5537 Parse action for converting parsed integers to Python int 

5538 """ 

5539 

5540 convertToFloat = tokenMap(float) 

5541 """ 

5542 Parse action for converting parsed numbers to Python float 

5543 """ 

5544 

5545 integer = Word(nums).setName("integer").setParseAction(convertToInteger) 

5546 """expression that parses an unsigned integer, returns an int""" 

5547 

5548 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16)) 

5549 """expression that parses a hexadecimal integer, returns an int""" 

5550 

5551 signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) 

5552 """expression that parses an integer with optional leading sign, returns an int""" 

5553 

5554 fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction") 

5555 """fractional expression of an integer divided by an integer, returns a float""" 

5556 fraction.addParseAction(lambda t: t[0]/t[-1]) 5556 ↛ exitline 5556 didn't run the lambda on line 5556

5557 

5558 mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") 

5559 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" 

5560 mixed_integer.addParseAction(sum) 

5561 

5562 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat) 

5563 """expression that parses a floating point number and returns a float""" 

5564 

5565 sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) 

5566 """expression that parses a floating point number with optional scientific notation and returns a float""" 

5567 

5568 # streamlining this expression makes the docs nicer-looking 

5569 number = (sci_real | real | signed_integer).streamline() 

5570 """any numeric expression, returns the corresponding Python type""" 

5571 

5572 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat) 

5573 """any int or real number, returned as float""" 

5574 

5575 identifier = Word(alphas+'_', alphanums+'_').setName("identifier") 

5576 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" 

5577 

5578 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") 

5579 "IPv4 address (C{0.0.0.0 - 255.255.255.255})" 

5580 

5581 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") 

5582 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address") 

5583 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address") 

5584 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) 5584 ↛ exitline 5584 didn't run the lambda on line 5584 or line 5584 didn't run the generator expression on line 5584

5585 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") 

5586 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") 

5587 "IPv6 address (long, short, or mixed form)" 

5588 

5589 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") 

5590 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" 

5591 

5592 @staticmethod 

5593 def convertToDate(fmt="%Y-%m-%d"): 

5594 """ 

5595 Helper to create a parse action for converting parsed date string to Python datetime.date 

5596 

5597 Params - 

5598 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"}) 

5599 

5600 Example:: 

5601 date_expr = pyparsing_common.iso8601_date.copy() 

5602 date_expr.setParseAction(pyparsing_common.convertToDate()) 

5603 print(date_expr.parseString("1999-12-31")) 

5604 prints:: 

5605 [datetime.date(1999, 12, 31)] 

5606 """ 

5607 def cvt_fn(s,l,t): 

5608 try: 

5609 return datetime.strptime(t[0], fmt).date() 

5610 except ValueError as ve: 

5611 raise ParseException(s, l, str(ve)) 

5612 return cvt_fn 

5613 

5614 @staticmethod 

5615 def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"): 

5616 """ 

5617 Helper to create a parse action for converting parsed datetime string to Python datetime.datetime 

5618 

5619 Params - 

5620 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"}) 

5621 

5622 Example:: 

5623 dt_expr = pyparsing_common.iso8601_datetime.copy() 

5624 dt_expr.setParseAction(pyparsing_common.convertToDatetime()) 

5625 print(dt_expr.parseString("1999-12-31T23:59:59.999")) 

5626 prints:: 

5627 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] 

5628 """ 

5629 def cvt_fn(s,l,t): 

5630 try: 

5631 return datetime.strptime(t[0], fmt) 

5632 except ValueError as ve: 

5633 raise ParseException(s, l, str(ve)) 

5634 return cvt_fn 

5635 

5636 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date") 

5637 "ISO8601 date (C{yyyy-mm-dd})" 

5638 

5639 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") 

5640 "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}" 

5641 

5642 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") 

5643 "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})" 

5644 

5645 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() 

5646 @staticmethod 

5647 def stripHTMLTags(s, l, tokens): 

5648 """ 

5649 Parse action to remove HTML tags from web page HTML source 

5650 

5651 Example:: 

5652 # strip HTML links from normal text  

5653 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>' 

5654 td,td_end = makeHTMLTags("TD") 

5655 table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end 

5656  

5657 print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page' 

5658 """ 

5659 return pyparsing_common._html_stripper.transformString(tokens[0]) 

5660 

5661 _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',') 

5662 + Optional( White(" \t") ) ) ).streamline().setName("commaItem") 

5663 comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list") 

5664 """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" 

5665 

5666 upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper())) 5666 ↛ exitline 5666 didn't run the lambda on line 5666

5667 """Parse action to convert tokens to upper case.""" 

5668 

5669 downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower())) 5669 ↛ exitline 5669 didn't run the lambda on line 5669

5670 """Parse action to convert tokens to lower case.""" 

5671 

5672 

5673if __name__ == "__main__": 5673 ↛ 5675line 5673 didn't jump to line 5675, because the condition on line 5673 was never true

5674 

5675 selectToken = CaselessLiteral("select") 

5676 fromToken = CaselessLiteral("from") 

5677 

5678 ident = Word(alphas, alphanums + "_$") 

5679 

5680 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 

5681 columnNameList = Group(delimitedList(columnName)).setName("columns") 

5682 columnSpec = ('*' | columnNameList) 

5683 

5684 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 

5685 tableNameList = Group(delimitedList(tableName)).setName("tables") 

5686 

5687 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") 

5688 

5689 # demo runTests method, including embedded comments in test string 

5690 simpleSQL.runTests(""" 

5691 # '*' as column list and dotted table name 

5692 select * from SYS.XYZZY 

5693 

5694 # caseless match on "SELECT", and casts back to "select" 

5695 SELECT * from XYZZY, ABC 

5696 

5697 # list of column names, and mixed case SELECT keyword 

5698 Select AA,BB,CC from Sys.dual 

5699 

5700 # multiple tables 

5701 Select A, B, C from Sys.dual, Table2 

5702 

5703 # invalid SELECT keyword - should fail 

5704 Xelect A, B, C from Sys.dual 

5705 

5706 # incomplete command - should fail 

5707 Select 

5708 

5709 # invalid column name - should fail 

5710 Select ^^^ frox Sys.dual 

5711 

5712 """) 

5713 

5714 pyparsing_common.number.runTests(""" 

5715 100 

5716 -100 

5717 +100 

5718 3.14159 

5719 6.02e23 

5720 1e-12 

5721 """) 

5722 

5723 # any int or real number, returned as float 

5724 pyparsing_common.fnumber.runTests(""" 

5725 100 

5726 -100 

5727 +100 

5728 3.14159 

5729 6.02e23 

5730 1e-12 

5731 """) 

5732 

5733 pyparsing_common.hex_integer.runTests(""" 

5734 100 

5735 FF 

5736 """) 

5737 

5738 import uuid 

5739 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 

5740 pyparsing_common.uuid.runTests(""" 

5741 12345678-1234-5678-1234-567812345678 

5742 """)