Docco'd the library. Updated the examples to use the new parse-inheritance.
This commit is contained in:
commit
bd77a5742d
|
@ -0,0 +1,7 @@
|
||||||
|
.npm
|
||||||
|
node_modules
|
||||||
|
*~
|
||||||
|
#*
|
||||||
|
.#*
|
||||||
|
*/#*
|
||||||
|
docs
|
|
@ -0,0 +1,29 @@
|
||||||
|
ReParse = require('../src/reparse').ReParse
|
||||||
|
util = require("util")
|
||||||
|
|
||||||
|
class Calc extends ReParse
|
||||||
|
ignorews: true
|
||||||
|
|
||||||
|
OPS:
|
||||||
|
'+': (a, b) -> a + b
|
||||||
|
"-": (a, b) -> a - b
|
||||||
|
"*": (a, b) -> a * b
|
||||||
|
"/": (a, b) -> a / b
|
||||||
|
|
||||||
|
expr: => @chainl @term, @addop
|
||||||
|
term: => @chainl1 @factor, @mulop
|
||||||
|
factor: => @choice @group, @number
|
||||||
|
group: => @between /^\(/, /^\)/, @expr
|
||||||
|
number: => parseFloat @match(/^(\-?\d+(\.\d+)?)/)
|
||||||
|
mulop: => @OPS[@match(/^[\*\/]/)]
|
||||||
|
addop: => @OPS[@match(/^[\+\-]/)]
|
||||||
|
|
||||||
|
parse: =>
|
||||||
|
super
|
||||||
|
@start(@expr)
|
||||||
|
|
||||||
|
unless process.argv.length is 3
|
||||||
|
util.puts "Usage: node " + process.argv[1] + " expression"
|
||||||
|
process.exit 1
|
||||||
|
|
||||||
|
util.puts (new Calc).parse(process.argv[2])
|
|
@ -0,0 +1,23 @@
|
||||||
|
ReParse = require('../src/reparse').ReParse
|
||||||
|
util = require("util")
|
||||||
|
|
||||||
|
class EmailAddress extends ReParse
|
||||||
|
|
||||||
|
addressList: => @sepEndBy @address, /^\s*,\s*/
|
||||||
|
address: => @choice @namedAddress, @bareAddress
|
||||||
|
namedAddress: => @seq(@phrase, /^\s*</m, @bareAddress, /^>/)[2]
|
||||||
|
bareAddress: => @seq(@word, /^@/, @word).join ""
|
||||||
|
phrase: => @many @word
|
||||||
|
word: => @skip(/^\s+/).choice @quoted, @dottedAtom
|
||||||
|
quoted: => @match /^"(?:\\.|[^"\r\n])+"/m
|
||||||
|
dottedAtom: => @match /^[!#\$%&'\*\+\-\/\w=\?\^`\{\|\}~]+(?:\.[!#\$%&'\*\+\-\/\w=\?\^`\{\|\}~]+)*/m
|
||||||
|
|
||||||
|
parse: =>
|
||||||
|
super
|
||||||
|
@start(@addressList)
|
||||||
|
|
||||||
|
unless process.argv.length is 3
|
||||||
|
util.puts "Usage: node " + process.argv[1] + " list-of-addresses"
|
||||||
|
process.exit 1
|
||||||
|
|
||||||
|
console.log util.inspect (new EmailAddress).parse(process.argv[2])
|
|
@ -0,0 +1,51 @@
|
||||||
|
ReParse = require('../src/reparse').ReParse
|
||||||
|
util = require("util")
|
||||||
|
peg = require('./pegjson').parser;
|
||||||
|
require('./upgrades')
|
||||||
|
|
||||||
|
class ReJSON extends ReParse
|
||||||
|
ignorews: true
|
||||||
|
|
||||||
|
LITERAL = {'true': true, 'false': false, 'null': null}
|
||||||
|
STRING = {"\"": 34, "\\": 92, "/": 47, 'b': 8, 'f': 12, 'n': 10, 'r': 13, 't': 9}
|
||||||
|
|
||||||
|
value: => @choice @literal, @string, @number, @array, @object
|
||||||
|
object: => @between(/^\{/, /^\}/, @members).reduce ((obj, pair) => obj[pair[0]] = pair[2]; obj), {}
|
||||||
|
members: => @sepBy @pair, /^,/
|
||||||
|
pair: => @seq @string, /^:/, @value
|
||||||
|
array: => @between /^\[/, /^\]/, @elements
|
||||||
|
elements: => @sepBy @value, /^,/
|
||||||
|
literal: => LITERAL[@match(/^(true|false|null)/)]
|
||||||
|
number: => parseFloat @match(/^\-?\d+(?:\.\d+)?(?:[eE][\+\-]?\d+)?/)
|
||||||
|
|
||||||
|
string: =>
|
||||||
|
chars = @match(/^"((?:\\["\\/bfnrt]|\\u[0-9a-fA-F]{4}|[^"\\])*)"/)
|
||||||
|
chars.replace /\\(["\\/bfnrt])|\\u([0-9a-fA-F]{4})/g, (_, $1, $2) =>
|
||||||
|
String.fromCharCode (if $1 then STRING[$1] else parseInt($2, 16)) # "
|
||||||
|
|
||||||
|
parse: =>
|
||||||
|
super
|
||||||
|
@start(@value)
|
||||||
|
|
||||||
|
|
||||||
|
capture = (stream, encoding, fn) =>
|
||||||
|
data = ""
|
||||||
|
stream.setEncoding encoding
|
||||||
|
stream.on "data", (chunk) => data += chunk
|
||||||
|
stream.on "end", => fn data
|
||||||
|
|
||||||
|
time = (label, reps, fn) =>
|
||||||
|
start = Date.now()
|
||||||
|
for i in [0..reps]
|
||||||
|
fn()
|
||||||
|
util.puts label + ": " + (Date.now() - start)
|
||||||
|
|
||||||
|
input = "{\"a\": [1, \"foo\", [], {\"foo\": 1, \"bar\": [1, 2, 3]}] }"
|
||||||
|
console.log util.inspect (new ReJSON).parse( input), false, 4
|
||||||
|
|
||||||
|
jsonparse = new ReJSON()
|
||||||
|
|
||||||
|
time "JSON", 1000, => JSON.parse input
|
||||||
|
time "PEG.js", 1000, => peg.parse input
|
||||||
|
time "ReParse", 1000, => jsonparse.parse(input)
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,21 @@
|
||||||
|
# Taken from https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/Array/Reduce
|
||||||
|
|
||||||
|
if not Array::reduce?
|
||||||
|
Array::reduce = ->
|
||||||
|
accumulator = arguments[0]
|
||||||
|
ctr = 0
|
||||||
|
|
||||||
|
if typeof accumulator != 'function'
|
||||||
|
throw new TypeError "First argument is not callable"
|
||||||
|
|
||||||
|
curr = if arguments.length < 2
|
||||||
|
if @length == 0 then throw new TypeError "Array length is 0 and no second argument"
|
||||||
|
ctr = 1
|
||||||
|
@[0]
|
||||||
|
else
|
||||||
|
arguments[1]
|
||||||
|
|
||||||
|
for i in [ctr...@length]
|
||||||
|
curr = accumulator.call(undefined, curr, @[i], i, @)
|
||||||
|
|
||||||
|
curr
|
|
@ -0,0 +1,13 @@
|
||||||
|
{
|
||||||
|
"name": "reparse-coffee",
|
||||||
|
"description": "An implementation of a parser combinator in coffeescript.",
|
||||||
|
"author": "Elf M. Sternberg (elf.sternberg@gmail.com)",
|
||||||
|
"version": "0.0.1",
|
||||||
|
"dependencies": {
|
||||||
|
"coffee-script": "1.x.x"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"docco": "0.3.x"
|
||||||
|
},
|
||||||
|
"engine": "node >= 0.6.0"
|
||||||
|
}
|
|
@ -0,0 +1,285 @@
|
||||||
|
#; -*- mode: coffee -*-
|
||||||
|
|
||||||
|
# ___ ___ ___ __ __
|
||||||
|
# | _ \___| _ \__ _ _ _ ___ ___ / __|___ / _|/ _|___ ___
|
||||||
|
# | / -_) _/ _` | '_(_-</ -_) | (__/ _ \ _| _/ -_) -_)
|
||||||
|
# |_|_\___|_| \__,_|_| /__/\___| \___\___/_| |_| \___\___|
|
||||||
|
#
|
||||||
|
|
||||||
|
# A hand-re-written implementation of Ben Weaver's ReParse, a parser
|
||||||
|
# combinator, which in turn was heavily influenced by Haskell's
|
||||||
|
# PARSEC. In many cases, this code is almost exactly Ben's; in some,
|
||||||
|
# I've lifted further ideas and commentary from the JSParsec project.
|
||||||
|
|
||||||
|
exports.ReParse = class ReParse
|
||||||
|
|
||||||
|
# Extend from ReParse and set to true if you don't care about
|
||||||
|
# whitespace.
|
||||||
|
|
||||||
|
ignorews: false
|
||||||
|
|
||||||
|
# Where the parse phase begins. The correct way to override this
|
||||||
|
# is to create a child method:
|
||||||
|
|
||||||
|
# parse: ->
|
||||||
|
# super
|
||||||
|
# @start(@your_top_level_production)
|
||||||
|
|
||||||
|
parse: (input) =>
|
||||||
|
@input = input
|
||||||
|
|
||||||
|
# Returns true when this parser has exhausted its input.
|
||||||
|
|
||||||
|
eof: =>
|
||||||
|
@input is ""
|
||||||
|
|
||||||
|
# Indicate failure, optionally resetting the input to a previous
|
||||||
|
# state. This is not an exceptional condition (in choice and
|
||||||
|
# maybes, for example).
|
||||||
|
|
||||||
|
fail: (input) =>
|
||||||
|
@input = input if input isnt `undefined`
|
||||||
|
throw @fail
|
||||||
|
|
||||||
|
# Execute a production, which could be a function or a RegExp.
|
||||||
|
|
||||||
|
produce: (method) =>
|
||||||
|
val = if (method instanceof RegExp) then @match(method) else method.call(this)
|
||||||
|
@skipWS() if @ignorews
|
||||||
|
val
|
||||||
|
|
||||||
|
# Begin parsing using the given production, return the result.
|
||||||
|
# All input must be consumed.
|
||||||
|
|
||||||
|
start: (method) =>
|
||||||
|
val = undefined
|
||||||
|
@ignorews and @skipWS()
|
||||||
|
try
|
||||||
|
val = @produce method
|
||||||
|
return val if @eof()
|
||||||
|
catch err
|
||||||
|
throw err if err isnt @fail
|
||||||
|
throw new Error("Could not parse '" + @input + "'.")
|
||||||
|
|
||||||
|
|
||||||
|
# Attempts to apply the method and produce a value. If it fails,
|
||||||
|
# restores the input to the previous state.
|
||||||
|
|
||||||
|
maybe: (method) =>
|
||||||
|
input = @input
|
||||||
|
try
|
||||||
|
return @produce method
|
||||||
|
catch err
|
||||||
|
throw err if err isnt @fail
|
||||||
|
@fail input
|
||||||
|
|
||||||
|
# Try to run the production `method`. If the production fails,
|
||||||
|
# don't fail, just return the otherwise.
|
||||||
|
|
||||||
|
option: (method, otherwise) =>
|
||||||
|
try
|
||||||
|
return @maybe method
|
||||||
|
catch err
|
||||||
|
throw err if err isnt @fail
|
||||||
|
return otherwise
|
||||||
|
|
||||||
|
# Given three parsers, return the value produced by `body`. This
|
||||||
|
# is equivalent to seq(left, body, right)[0]. I'm not sure why
|
||||||
|
# Weaver created an alternative syntax, then. Wishing JSParsec
|
||||||
|
# wasn't so damned unreadable.
|
||||||
|
|
||||||
|
between: (left, right, body) =>
|
||||||
|
input = @input
|
||||||
|
val = undefined
|
||||||
|
try
|
||||||
|
@produce left
|
||||||
|
val = @produce body
|
||||||
|
@produce right
|
||||||
|
return val
|
||||||
|
catch err
|
||||||
|
throw err if err isnt @fail
|
||||||
|
@fail input
|
||||||
|
|
||||||
|
# Match a regular expression against the input, returning the
|
||||||
|
# first captured group. If no group is captured, return the
|
||||||
|
# matched string. This can result in surprises, if you don't wrap
|
||||||
|
# your groups exactly right, which is common in ()? regexps.
|
||||||
|
|
||||||
|
match: (pattern) =>
|
||||||
|
probe = @input.match pattern
|
||||||
|
return @fail() unless probe
|
||||||
|
@input = @input.substr probe[0].length
|
||||||
|
if probe[1] is `undefined` then probe[0] else probe[1]
|
||||||
|
|
||||||
|
# Returns the first production among arguments for which the
|
||||||
|
# production does not fail.
|
||||||
|
|
||||||
|
choice: =>
|
||||||
|
input = @input
|
||||||
|
for arg in arguments
|
||||||
|
try
|
||||||
|
return @produce arg
|
||||||
|
catch err
|
||||||
|
throw err if err isnt @fail
|
||||||
|
@fail input
|
||||||
|
|
||||||
|
# Match every production in a sequence, returning a list of the
|
||||||
|
# values produced. Sometimes Coffeescript's parser surprises me,
|
||||||
|
# as in this case where the try-return pairing confused it, and it
|
||||||
|
# needed help isolating the element.
|
||||||
|
#
|
||||||
|
# I have yet to find a case where where Weaver's unshift of the
|
||||||
|
# beginning of the input string to the front of the return value
|
||||||
|
# makes sense. It's not a feature of Parsec's sequence primitive,
|
||||||
|
# for example.
|
||||||
|
#
|
||||||
|
# It could be useful if one needed the raw of a seq: for example,
|
||||||
|
# when processing XML entities for correctness, not value. But in
|
||||||
|
# the short term, the productions can be as preservative as
|
||||||
|
# Weaver's technique, and for my needs that's my story, and I'm
|
||||||
|
# sticking to it.
|
||||||
|
|
||||||
|
seq: =>
|
||||||
|
input = @input
|
||||||
|
try
|
||||||
|
return (for arg in arguments
|
||||||
|
@produce(arg))
|
||||||
|
catch err
|
||||||
|
throw err if err isnt @fail
|
||||||
|
@fail input
|
||||||
|
|
||||||
|
# Applies the production `method` `min` or more times. Returns
|
||||||
|
# the parser object as a chainable convenience if it does not
|
||||||
|
# fail. Will fail if it skips less than `min` times.
|
||||||
|
|
||||||
|
skip: (method, min = null) =>
|
||||||
|
found = 0
|
||||||
|
input = @input
|
||||||
|
until @eof()
|
||||||
|
try
|
||||||
|
@maybe method
|
||||||
|
found++
|
||||||
|
catch err
|
||||||
|
throw err if err isnt @fail
|
||||||
|
break
|
||||||
|
if min and (found < min) then @fail input else @
|
||||||
|
|
||||||
|
# Applies the production `method` one or more times.
|
||||||
|
|
||||||
|
skip1: (method) => @skip(method, 1)
|
||||||
|
|
||||||
|
# Skip whitespace. Returns the parser object for chainable
|
||||||
|
# convenience. Note that this is the baseline whitespace: this
|
||||||
|
# will not skip carriage returns or linefeeds.
|
||||||
|
|
||||||
|
skipWS: =>
|
||||||
|
@match(/^\s*/)
|
||||||
|
@
|
||||||
|
|
||||||
|
# Returns an array of `min` values produced by `method`.
|
||||||
|
|
||||||
|
many: (method, min = null) =>
|
||||||
|
input = @input
|
||||||
|
result = until @eof()
|
||||||
|
try
|
||||||
|
@maybe(method)
|
||||||
|
catch err
|
||||||
|
throw err if err isnt @fail
|
||||||
|
break
|
||||||
|
|
||||||
|
if min and (result.length < min) then @fail input else result
|
||||||
|
|
||||||
|
# Returns an array of at least one values produced by `method`.
|
||||||
|
# Fails if zero values are produced.
|
||||||
|
|
||||||
|
many1: (method) => @many method, 1
|
||||||
|
|
||||||
|
# Return the array of values produced by `method` with `sep`
|
||||||
|
# between each value. The series may be terminated by a `sep`.
|
||||||
|
|
||||||
|
sepBy: (method, sep, min = 0) =>
|
||||||
|
orig = @input
|
||||||
|
input = undefined
|
||||||
|
result = []
|
||||||
|
try
|
||||||
|
result.push @produce method
|
||||||
|
until @eof()
|
||||||
|
try
|
||||||
|
input = @input
|
||||||
|
@produce sep
|
||||||
|
result.push @produce method
|
||||||
|
catch err
|
||||||
|
throw err if err isnt @fail
|
||||||
|
@fail input
|
||||||
|
catch err
|
||||||
|
throw err if err isnt @fail
|
||||||
|
if min and (result.length < min) then @fail orig else result
|
||||||
|
|
||||||
|
sepBy1: (method, sep) => @sepBy method, sep, 1
|
||||||
|
|
||||||
|
# parses `min` or more productions of `method` (zero by default),
|
||||||
|
# which must be terminated with the `end` production. RESOLVE:
|
||||||
|
# There is no alternative production being given to `@option` in
|
||||||
|
# Weaver's code. I've changed this to @produce for the time
|
||||||
|
# being, which seems to be in line with the JSParsec
|
||||||
|
# implementation.
|
||||||
|
|
||||||
|
endBy: (method, end, min = 0) =>
|
||||||
|
val = @many method, min
|
||||||
|
@option end
|
||||||
|
val
|
||||||
|
|
||||||
|
# Parses 1 or more productions of method, which must be terminated
|
||||||
|
# with the end production
|
||||||
|
|
||||||
|
endBy1: (method, end) =>
|
||||||
|
@endBy method, end, 1
|
||||||
|
|
||||||
|
# Returns an array of `min` or more values produced by `method`,
|
||||||
|
# separated by `sep`, and optionally terminated by `sep`.
|
||||||
|
# Defaults to zero productions.
|
||||||
|
|
||||||
|
sepEndBy: (method, sep, min = 0) =>
|
||||||
|
val = @sepBy method, sep, min
|
||||||
|
@option sep
|
||||||
|
val
|
||||||
|
|
||||||
|
# Returns an array of `min` or more values produced by `method`,
|
||||||
|
# separated by `sep`, and optionally terminated by `sep`.
|
||||||
|
# Defaults to zero productions. Must return at least one
|
||||||
|
# production; fails if there are zero productions.
|
||||||
|
|
||||||
|
sepEndBy1: (method, sep) => @sepEndBy method, sep, 1
|
||||||
|
|
||||||
|
# Process `min` occurrences of `method`, separated by `op`. Return
|
||||||
|
# a value obtained by the repeated application of the return of
|
||||||
|
# `op` to the return of `method`. If there are less that `min`
|
||||||
|
# occurrences of `method`, `otherwise` is returned. Used, for
|
||||||
|
# example, to process a collection of mathematical productions of
|
||||||
|
# the same precedence.
|
||||||
|
|
||||||
|
chainl: (method, op, otherwise = null, min = null) =>
|
||||||
|
found = 0
|
||||||
|
result = otherwise
|
||||||
|
orig = @input
|
||||||
|
input = undefined
|
||||||
|
try
|
||||||
|
result = @maybe(method)
|
||||||
|
found++
|
||||||
|
until @eof()
|
||||||
|
try
|
||||||
|
input = @input
|
||||||
|
result = @produce(op)(result, @produce(method))
|
||||||
|
found++
|
||||||
|
catch err
|
||||||
|
throw err if err isnt @fail
|
||||||
|
@fail input
|
||||||
|
catch err
|
||||||
|
throw err if err isnt @fail
|
||||||
|
if min and (found < min) then @fail input else result
|
||||||
|
|
||||||
|
# Like `chainl`, but must produce at least one production. Fails
|
||||||
|
# if there are zero productions.
|
||||||
|
|
||||||
|
chainl1: (method, op) => @chainl method, op, null, 1
|
||||||
|
|
Loading…
Reference in New Issue