Docco'd the library. Updated the examples to use the new parse-inheritance.
This commit is contained in:
		
						commit
						bd77a5742d
					
				|  | @ -0,0 +1,7 @@ | |||
| .npm | ||||
| node_modules | ||||
| *~ | ||||
| #* | ||||
| .#* | ||||
| */#* | ||||
| docs | ||||
|  | @ -0,0 +1,29 @@ | |||
| ReParse = require('../src/reparse').ReParse | ||||
| util = require("util") | ||||
| 
 | ||||
| class Calc extends ReParse | ||||
|     ignorews: true | ||||
| 
 | ||||
|     OPS: | ||||
|         '+': (a, b) -> a + b | ||||
|         "-": (a, b) -> a - b | ||||
|         "*": (a, b) -> a * b | ||||
|         "/": (a, b) -> a / b | ||||
| 
 | ||||
|     expr:   => @chainl @term, @addop | ||||
|     term:   => @chainl1 @factor, @mulop | ||||
|     factor: => @choice @group, @number | ||||
|     group:  => @between /^\(/, /^\)/, @expr | ||||
|     number: => parseFloat @match(/^(\-?\d+(\.\d+)?)/) | ||||
|     mulop:  => @OPS[@match(/^[\*\/]/)] | ||||
|     addop:  => @OPS[@match(/^[\+\-]/)] | ||||
| 
 | ||||
|     parse:  => | ||||
|         super | ||||
|         @start(@expr) | ||||
| 
 | ||||
| unless process.argv.length is 3 | ||||
|   util.puts "Usage: node " + process.argv[1] + " expression" | ||||
|   process.exit 1 | ||||
| 
 | ||||
| util.puts (new Calc).parse(process.argv[2]) | ||||
|  | @ -0,0 +1,23 @@ | |||
| ReParse = require('../src/reparse').ReParse | ||||
| util = require("util") | ||||
| 
 | ||||
| class EmailAddress extends ReParse | ||||
| 
 | ||||
|     addressList:  =>  @sepEndBy @address, /^\s*,\s*/ | ||||
|     address:      =>  @choice @namedAddress, @bareAddress | ||||
|     namedAddress: =>  @seq(@phrase, /^\s*</m, @bareAddress, /^>/)[2] | ||||
|     bareAddress:  =>  @seq(@word, /^@/, @word).join "" | ||||
|     phrase:       =>  @many @word | ||||
|     word:         =>  @skip(/^\s+/).choice @quoted, @dottedAtom | ||||
|     quoted:       =>  @match /^"(?:\\.|[^"\r\n])+"/m | ||||
|     dottedAtom:   =>  @match /^[!#\$%&'\*\+\-\/\w=\?\^`\{\|\}~]+(?:\.[!#\$%&'\*\+\-\/\w=\?\^`\{\|\}~]+)*/m | ||||
| 
 | ||||
|     parse:        => | ||||
|         super | ||||
|         @start(@addressList) | ||||
| 
 | ||||
| unless process.argv.length is 3 | ||||
|   util.puts "Usage: node " + process.argv[1] + " list-of-addresses" | ||||
|   process.exit 1 | ||||
| 
 | ||||
| console.log util.inspect (new EmailAddress).parse(process.argv[2]) | ||||
|  | @ -0,0 +1,51 @@ | |||
| ReParse = require('../src/reparse').ReParse | ||||
| util = require("util") | ||||
| peg = require('./pegjson').parser; | ||||
| require('./upgrades') | ||||
| 
 | ||||
| class ReJSON extends ReParse | ||||
|     ignorews: true | ||||
| 
 | ||||
|     LITERAL = {'true': true, 'false': false, 'null': null} | ||||
|     STRING = {"\"": 34, "\\": 92, "/": 47, 'b': 8, 'f': 12, 'n': 10, 'r': 13, 't': 9} | ||||
| 
 | ||||
|     value:    => @choice @literal, @string, @number, @array, @object | ||||
|     object:   => @between(/^\{/, /^\}/, @members).reduce ((obj, pair) => obj[pair[0]] = pair[2]; obj), {} | ||||
|     members:  => @sepBy @pair, /^,/ | ||||
|     pair:     => @seq @string, /^:/, @value | ||||
|     array:    => @between /^\[/, /^\]/, @elements | ||||
|     elements: => @sepBy @value, /^,/ | ||||
|     literal:  => LITERAL[@match(/^(true|false|null)/)] | ||||
|     number:   => parseFloat @match(/^\-?\d+(?:\.\d+)?(?:[eE][\+\-]?\d+)?/) | ||||
| 
 | ||||
|     string: => | ||||
|         chars = @match(/^"((?:\\["\\/bfnrt]|\\u[0-9a-fA-F]{4}|[^"\\])*)"/) | ||||
|         chars.replace /\\(["\\/bfnrt])|\\u([0-9a-fA-F]{4})/g, (_, $1, $2) => | ||||
|             String.fromCharCode (if $1 then STRING[$1] else parseInt($2, 16)) # " | ||||
| 
 | ||||
|     parse:  => | ||||
|         super | ||||
|         @start(@value) | ||||
| 
 | ||||
| 
 | ||||
| capture = (stream, encoding, fn) => | ||||
|     data = "" | ||||
|     stream.setEncoding encoding | ||||
|     stream.on "data", (chunk) => data += chunk | ||||
|     stream.on "end", => fn data | ||||
| 
 | ||||
| time = (label, reps, fn) => | ||||
|     start = Date.now() | ||||
|     for i in [0..reps] | ||||
|         fn() | ||||
|     util.puts label + ": " + (Date.now() - start) | ||||
| 
 | ||||
| input = "{\"a\": [1, \"foo\", [], {\"foo\": 1, \"bar\": [1, 2, 3]}] }" | ||||
| console.log util.inspect (new ReJSON).parse( input), false, 4 | ||||
| 
 | ||||
| jsonparse = new ReJSON() | ||||
| 
 | ||||
| time "JSON", 1000, =>   JSON.parse input | ||||
| time "PEG.js", 1000, =>   peg.parse input | ||||
| time "ReParse", 1000, =>   jsonparse.parse(input) | ||||
| 
 | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							|  | @ -0,0 +1,21 @@ | |||
| # Taken from https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/Array/Reduce | ||||
| 
 | ||||
| if not Array::reduce? | ||||
|     Array::reduce = -> | ||||
|         accumulator = arguments[0] | ||||
|         ctr = 0 | ||||
| 
 | ||||
|         if typeof accumulator != 'function' | ||||
|             throw new TypeError "First argument is not callable" | ||||
| 
 | ||||
|         curr = if arguments.length < 2 | ||||
|             if @length == 0 then throw new TypeError "Array length is 0 and no second argument" | ||||
|             ctr = 1 | ||||
|             @[0] | ||||
|         else | ||||
|             arguments[1] | ||||
| 
 | ||||
|         for i in [ctr...@length] | ||||
|             curr = accumulator.call(undefined, curr, @[i], i, @) | ||||
| 
 | ||||
|         curr | ||||
|  | @ -0,0 +1,13 @@ | |||
| { | ||||
|     "name": "reparse-coffee", | ||||
|     "description": "An implementation of a parser combinator in coffeescript.", | ||||
|     "author": "Elf M. Sternberg (elf.sternberg@gmail.com)", | ||||
|     "version": "0.0.1", | ||||
|     "dependencies": { | ||||
|         "coffee-script": "1.x.x" | ||||
|     }, | ||||
|     "devDependencies": { | ||||
|         "docco": "0.3.x" | ||||
|     }, | ||||
|     "engine": "node >= 0.6.0" | ||||
| } | ||||
|  | @ -0,0 +1,285 @@ | |||
| #; -*- mode: coffee -*- | ||||
| 
 | ||||
| #      ___     ___                     ___      __  __ | ||||
| #     | _ \___| _ \__ _ _ _ ___ ___   / __|___ / _|/ _|___ ___ | ||||
| #     |   / -_)  _/ _` | '_(_-</ -_) | (__/ _ \  _|  _/ -_) -_) | ||||
| #     |_|_\___|_| \__,_|_| /__/\___|  \___\___/_| |_| \___\___| | ||||
| # | ||||
| 
 | ||||
| # A hand-re-written implementation of Ben Weaver's ReParse, a parser | ||||
| # combinator, which in turn was heavily influenced by Haskell's | ||||
| # PARSEC.  In many cases, this code is almost exactly Ben's; in some, | ||||
| # I've lifted further ideas and commentary from the JSParsec project. | ||||
| 
 | ||||
| exports.ReParse = class ReParse | ||||
| 
 | ||||
|     # Extend from ReParse and set to true if you don't care about | ||||
|     # whitespace. | ||||
| 
 | ||||
|     ignorews: false | ||||
| 
 | ||||
|     # Where the parse phase begins.  The correct way to override this | ||||
|     # is to create a child method: | ||||
| 
 | ||||
|     #     parse: -> | ||||
|     #         super | ||||
|     #         @start(@your_top_level_production) | ||||
| 
 | ||||
|     parse: (input) => | ||||
|         @input = input | ||||
| 
 | ||||
|     # Returns true when this parser has exhausted its input. | ||||
| 
 | ||||
|     eof: => | ||||
|         @input is "" | ||||
| 
 | ||||
|     # Indicate failure, optionally resetting the input to a previous | ||||
|     # state.  This is not an exceptional condition (in choice and | ||||
|     # maybes, for example). | ||||
| 
 | ||||
|     fail: (input) => | ||||
|         @input = input if input isnt `undefined` | ||||
|         throw @fail | ||||
| 
 | ||||
|     # Execute a production, which could be a function or a RegExp. | ||||
| 
 | ||||
|     produce: (method) => | ||||
|         val = if (method instanceof RegExp) then @match(method) else method.call(this) | ||||
|         @skipWS() if @ignorews | ||||
|         val | ||||
| 
 | ||||
|     # Begin parsing using the given production, return the result. | ||||
|     # All input must be consumed. | ||||
| 
 | ||||
|     start: (method) => | ||||
|         val = undefined | ||||
|         @ignorews and @skipWS() | ||||
|         try | ||||
|             val = @produce method | ||||
|             return val if @eof() | ||||
|         catch err | ||||
|             throw err if err isnt @fail | ||||
|         throw new Error("Could not parse '" + @input + "'.") | ||||
| 
 | ||||
| 
 | ||||
|     # Attempts to apply the method and produce a value.  If it fails, | ||||
|     # restores the input to the previous state. | ||||
| 
 | ||||
|     maybe: (method) => | ||||
|         input = @input | ||||
|         try | ||||
|             return @produce method | ||||
|         catch err | ||||
|             throw err if err isnt @fail | ||||
|         @fail input | ||||
| 
 | ||||
|     # Try to run the production `method`.  If the production fails, | ||||
|     # don't fail, just return the otherwise. | ||||
| 
 | ||||
|     option: (method, otherwise) => | ||||
|         try | ||||
|             return @maybe method | ||||
|         catch err | ||||
|             throw err if err isnt @fail | ||||
|         return otherwise | ||||
| 
 | ||||
|     # Given three parsers, return the value produced by `body`.  This | ||||
|     # is equivalent to seq(left, body, right)[0].  I'm not sure why | ||||
|     # Weaver created an alternative syntax, then.  Wishing JSParsec | ||||
|     # wasn't so damned unreadable. | ||||
| 
 | ||||
|     between: (left, right, body) => | ||||
|         input = @input | ||||
|         val = undefined | ||||
|         try | ||||
|             @produce left | ||||
|             val = @produce body | ||||
|             @produce right | ||||
|             return val | ||||
|         catch err | ||||
|             throw err if err isnt @fail | ||||
|         @fail input | ||||
| 
 | ||||
|     # Match a regular expression against the input, returning the | ||||
|     # first captured group.  If no group is captured, return the | ||||
|     # matched string.  This can result in surprises, if you don't wrap | ||||
|     # your groups exactly right, which is common in ()? regexps. | ||||
| 
 | ||||
|     match: (pattern) => | ||||
|         probe = @input.match pattern | ||||
|         return @fail()  unless probe | ||||
|         @input = @input.substr probe[0].length | ||||
|         if probe[1] is `undefined` then probe[0] else probe[1] | ||||
| 
 | ||||
|     # Returns the first production among arguments for which the | ||||
|     # production does not fail. | ||||
| 
 | ||||
|     choice: => | ||||
|         input = @input | ||||
|         for arg in arguments | ||||
|             try | ||||
|                 return @produce arg | ||||
|             catch err | ||||
|                 throw err if err isnt @fail | ||||
|         @fail input | ||||
| 
 | ||||
|     # Match every production in a sequence, returning a list of the | ||||
|     # values produced.  Sometimes Coffeescript's parser surprises me, | ||||
|     # as in this case where the try-return pairing confused it, and it | ||||
|     # needed help isolating the element. | ||||
|     # | ||||
|     # I have yet to find a case where where Weaver's unshift of the | ||||
|     # beginning of the input string to the front of the return value | ||||
|     # makes sense.  It's not a feature of Parsec's sequence primitive, | ||||
|     # for example. | ||||
|     # | ||||
|     # It could be useful if one needed the raw of a seq: for example, | ||||
|     # when processing XML entities for correctness, not value.  But in | ||||
|     # the short term, the productions can be as preservative as | ||||
|     # Weaver's technique, and for my needs that's my story, and I'm | ||||
|     # sticking to it. | ||||
| 
 | ||||
|     seq: => | ||||
|         input = @input | ||||
|         try | ||||
|             return (for arg in arguments | ||||
|                 @produce(arg)) | ||||
|         catch err | ||||
|             throw err if err isnt @fail | ||||
|         @fail input | ||||
| 
 | ||||
|     # Applies the production `method` `min` or more times.  Returns | ||||
|     # the parser object as a chainable convenience if it does not | ||||
|     # fail.  Will fail if it skips less than `min` times. | ||||
| 
 | ||||
|     skip: (method, min = null) => | ||||
|         found = 0 | ||||
|         input = @input | ||||
|         until @eof() | ||||
|             try | ||||
|                 @maybe method | ||||
|                 found++ | ||||
|             catch err | ||||
|                 throw err if err isnt @fail | ||||
|                 break | ||||
|         if min and (found < min) then @fail input else @ | ||||
| 
 | ||||
|     # Applies the production `method` one or more times. | ||||
| 
 | ||||
|     skip1: (method) => @skip(method, 1) | ||||
| 
 | ||||
|     # Skip whitespace.  Returns the parser object for chainable | ||||
|     # convenience.  Note that this is the baseline whitespace: this | ||||
|     # will not skip carriage returns or linefeeds. | ||||
| 
 | ||||
|     skipWS: => | ||||
|         @match(/^\s*/) | ||||
|         @ | ||||
| 
 | ||||
|     # Returns an array of `min` values produced by `method`. | ||||
| 
 | ||||
|     many: (method, min = null) => | ||||
|         input = @input | ||||
|         result = until @eof() | ||||
|             try | ||||
|                 @maybe(method) | ||||
|             catch err | ||||
|                 throw err if err isnt @fail | ||||
|             break | ||||
| 
 | ||||
|         if min and (result.length < min) then @fail input else result | ||||
| 
 | ||||
|     # Returns an array of at least one values produced by `method`. | ||||
|     # Fails if zero values are produced. | ||||
| 
 | ||||
|     many1: (method) => @many method, 1 | ||||
| 
 | ||||
|     # Return the array of values produced by `method` with `sep` | ||||
|     # between each value.  The series may be terminated by a `sep`. | ||||
| 
 | ||||
|     sepBy: (method, sep, min = 0) => | ||||
|         orig = @input | ||||
|         input = undefined | ||||
|         result = [] | ||||
|         try | ||||
|             result.push @produce method | ||||
|             until @eof() | ||||
|                 try | ||||
|                     input = @input | ||||
|                     @produce sep | ||||
|                     result.push @produce method | ||||
|                 catch err | ||||
|                     throw err if err isnt @fail | ||||
|                     @fail input | ||||
|         catch err | ||||
|             throw err if err isnt @fail | ||||
|         if min and (result.length < min) then @fail orig else result | ||||
| 
 | ||||
|     sepBy1: (method, sep) => @sepBy method, sep, 1 | ||||
| 
 | ||||
|     # parses `min` or more productions of `method` (zero by default), | ||||
|     # which must be terminated with the `end` production.  RESOLVE: | ||||
|     # There is no alternative production being given to `@option` in | ||||
|     # Weaver's code.  I've changed this to @produce for the time | ||||
|     # being, which seems to be in line with the JSParsec | ||||
|     # implementation. | ||||
| 
 | ||||
|     endBy: (method, end, min = 0) => | ||||
|         val = @many method, min | ||||
|         @option end | ||||
|         val | ||||
| 
 | ||||
|     # Parses 1 or more productions of method, which must be terminated | ||||
|     # with the end production | ||||
| 
 | ||||
|     endBy1: (method, end) => | ||||
|         @endBy method, end, 1 | ||||
| 
 | ||||
|     # Returns an array of `min` or more values produced by `method`, | ||||
|     # separated by `sep`, and optionally terminated by `sep`. | ||||
|     # Defaults to zero productions. | ||||
| 
 | ||||
|     sepEndBy: (method, sep, min = 0) => | ||||
|         val = @sepBy method, sep, min | ||||
|         @option sep | ||||
|         val | ||||
| 
 | ||||
|     # Returns an array of `min` or more values produced by `method`, | ||||
|     # separated by `sep`, and optionally terminated by `sep`. | ||||
|     # Defaults to zero productions.  Must return at least one | ||||
|     # production; fails if there are zero productions. | ||||
| 
 | ||||
|     sepEndBy1: (method, sep) => @sepEndBy method, sep, 1 | ||||
| 
 | ||||
|     # Process `min` occurrences of `method`, separated by `op`. Return | ||||
|     # a value obtained by the repeated application of the return of | ||||
|     # `op` to the return of `method`.  If there are less that `min` | ||||
|     # occurrences of `method`, `otherwise` is returned.  Used, for | ||||
|     # example, to process a collection of mathematical productions of | ||||
|     # the same precedence. | ||||
| 
 | ||||
|     chainl: (method, op, otherwise = null, min = null) => | ||||
|         found = 0 | ||||
|         result = otherwise | ||||
|         orig = @input | ||||
|         input = undefined | ||||
|         try | ||||
|             result = @maybe(method) | ||||
|             found++ | ||||
|             until @eof() | ||||
|                 try | ||||
|                     input = @input | ||||
|                     result = @produce(op)(result, @produce(method)) | ||||
|                     found++ | ||||
|                 catch err | ||||
|                     throw err  if err isnt @fail | ||||
|                     @fail input | ||||
|         catch err | ||||
|             throw err  if err isnt @fail | ||||
|         if min and (found < min) then @fail input else result | ||||
| 
 | ||||
|     # Like `chainl`, but must produce at least one production.  Fails | ||||
|     # if there are zero productions. | ||||
| 
 | ||||
|     chainl1: (method, op) => @chainl method, op, null, 1 | ||||
| 
 | ||||
		Loading…
	
		Reference in New Issue