Docco'd the library. Updated the examples to use the new parse-inheritance.
This commit is contained in:
commit
bd77a5742d
|
@ -0,0 +1,7 @@
|
|||
.npm
|
||||
node_modules
|
||||
*~
|
||||
#*
|
||||
.#*
|
||||
*/#*
|
||||
docs
|
|
@ -0,0 +1,29 @@
|
|||
ReParse = require('../src/reparse').ReParse
|
||||
util = require("util")
|
||||
|
||||
class Calc extends ReParse
|
||||
ignorews: true
|
||||
|
||||
OPS:
|
||||
'+': (a, b) -> a + b
|
||||
"-": (a, b) -> a - b
|
||||
"*": (a, b) -> a * b
|
||||
"/": (a, b) -> a / b
|
||||
|
||||
expr: => @chainl @term, @addop
|
||||
term: => @chainl1 @factor, @mulop
|
||||
factor: => @choice @group, @number
|
||||
group: => @between /^\(/, /^\)/, @expr
|
||||
number: => parseFloat @match(/^(\-?\d+(\.\d+)?)/)
|
||||
mulop: => @OPS[@match(/^[\*\/]/)]
|
||||
addop: => @OPS[@match(/^[\+\-]/)]
|
||||
|
||||
parse: =>
|
||||
super
|
||||
@start(@expr)
|
||||
|
||||
unless process.argv.length is 3
|
||||
util.puts "Usage: node " + process.argv[1] + " expression"
|
||||
process.exit 1
|
||||
|
||||
util.puts (new Calc).parse(process.argv[2])
|
|
@ -0,0 +1,23 @@
|
|||
ReParse = require('../src/reparse').ReParse
|
||||
util = require("util")
|
||||
|
||||
class EmailAddress extends ReParse
|
||||
|
||||
addressList: => @sepEndBy @address, /^\s*,\s*/
|
||||
address: => @choice @namedAddress, @bareAddress
|
||||
namedAddress: => @seq(@phrase, /^\s*</m, @bareAddress, /^>/)[2]
|
||||
bareAddress: => @seq(@word, /^@/, @word).join ""
|
||||
phrase: => @many @word
|
||||
word: => @skip(/^\s+/).choice @quoted, @dottedAtom
|
||||
quoted: => @match /^"(?:\\.|[^"\r\n])+"/m
|
||||
dottedAtom: => @match /^[!#\$%&'\*\+\-\/\w=\?\^`\{\|\}~]+(?:\.[!#\$%&'\*\+\-\/\w=\?\^`\{\|\}~]+)*/m
|
||||
|
||||
parse: =>
|
||||
super
|
||||
@start(@addressList)
|
||||
|
||||
unless process.argv.length is 3
|
||||
util.puts "Usage: node " + process.argv[1] + " list-of-addresses"
|
||||
process.exit 1
|
||||
|
||||
console.log util.inspect (new EmailAddress).parse(process.argv[2])
|
|
@ -0,0 +1,51 @@
|
|||
ReParse = require('../src/reparse').ReParse
|
||||
util = require("util")
|
||||
peg = require('./pegjson').parser;
|
||||
require('./upgrades')
|
||||
|
||||
class ReJSON extends ReParse
|
||||
ignorews: true
|
||||
|
||||
LITERAL = {'true': true, 'false': false, 'null': null}
|
||||
STRING = {"\"": 34, "\\": 92, "/": 47, 'b': 8, 'f': 12, 'n': 10, 'r': 13, 't': 9}
|
||||
|
||||
value: => @choice @literal, @string, @number, @array, @object
|
||||
object: => @between(/^\{/, /^\}/, @members).reduce ((obj, pair) => obj[pair[0]] = pair[2]; obj), {}
|
||||
members: => @sepBy @pair, /^,/
|
||||
pair: => @seq @string, /^:/, @value
|
||||
array: => @between /^\[/, /^\]/, @elements
|
||||
elements: => @sepBy @value, /^,/
|
||||
literal: => LITERAL[@match(/^(true|false|null)/)]
|
||||
number: => parseFloat @match(/^\-?\d+(?:\.\d+)?(?:[eE][\+\-]?\d+)?/)
|
||||
|
||||
string: =>
|
||||
chars = @match(/^"((?:\\["\\/bfnrt]|\\u[0-9a-fA-F]{4}|[^"\\])*)"/)
|
||||
chars.replace /\\(["\\/bfnrt])|\\u([0-9a-fA-F]{4})/g, (_, $1, $2) =>
|
||||
String.fromCharCode (if $1 then STRING[$1] else parseInt($2, 16)) # "
|
||||
|
||||
parse: =>
|
||||
super
|
||||
@start(@value)
|
||||
|
||||
|
||||
capture = (stream, encoding, fn) =>
|
||||
data = ""
|
||||
stream.setEncoding encoding
|
||||
stream.on "data", (chunk) => data += chunk
|
||||
stream.on "end", => fn data
|
||||
|
||||
time = (label, reps, fn) =>
|
||||
start = Date.now()
|
||||
for i in [0..reps]
|
||||
fn()
|
||||
util.puts label + ": " + (Date.now() - start)
|
||||
|
||||
input = "{\"a\": [1, \"foo\", [], {\"foo\": 1, \"bar\": [1, 2, 3]}] }"
|
||||
console.log util.inspect (new ReJSON).parse( input), false, 4
|
||||
|
||||
jsonparse = new ReJSON()
|
||||
|
||||
time "JSON", 1000, => JSON.parse input
|
||||
time "PEG.js", 1000, => peg.parse input
|
||||
time "ReParse", 1000, => jsonparse.parse(input)
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,21 @@
|
|||
# Taken from https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/Array/Reduce
|
||||
|
||||
if not Array::reduce?
|
||||
Array::reduce = ->
|
||||
accumulator = arguments[0]
|
||||
ctr = 0
|
||||
|
||||
if typeof accumulator != 'function'
|
||||
throw new TypeError "First argument is not callable"
|
||||
|
||||
curr = if arguments.length < 2
|
||||
if @length == 0 then throw new TypeError "Array length is 0 and no second argument"
|
||||
ctr = 1
|
||||
@[0]
|
||||
else
|
||||
arguments[1]
|
||||
|
||||
for i in [ctr...@length]
|
||||
curr = accumulator.call(undefined, curr, @[i], i, @)
|
||||
|
||||
curr
|
|
@ -0,0 +1,13 @@
|
|||
{
|
||||
"name": "reparse-coffee",
|
||||
"description": "An implementation of a parser combinator in coffeescript.",
|
||||
"author": "Elf M. Sternberg (elf.sternberg@gmail.com)",
|
||||
"version": "0.0.1",
|
||||
"dependencies": {
|
||||
"coffee-script": "1.x.x"
|
||||
},
|
||||
"devDependencies": {
|
||||
"docco": "0.3.x"
|
||||
},
|
||||
"engine": "node >= 0.6.0"
|
||||
}
|
|
@ -0,0 +1,285 @@
|
|||
#; -*- mode: coffee -*-
|
||||
|
||||
# ___ ___ ___ __ __
|
||||
# | _ \___| _ \__ _ _ _ ___ ___ / __|___ / _|/ _|___ ___
|
||||
# | / -_) _/ _` | '_(_-</ -_) | (__/ _ \ _| _/ -_) -_)
|
||||
# |_|_\___|_| \__,_|_| /__/\___| \___\___/_| |_| \___\___|
|
||||
#
|
||||
|
||||
# A hand-re-written implementation of Ben Weaver's ReParse, a parser
|
||||
# combinator, which in turn was heavily influenced by Haskell's
|
||||
# PARSEC. In many cases, this code is almost exactly Ben's; in some,
|
||||
# I've lifted further ideas and commentary from the JSParsec project.
|
||||
|
||||
exports.ReParse = class ReParse
|
||||
|
||||
# Extend from ReParse and set to true if you don't care about
|
||||
# whitespace.
|
||||
|
||||
ignorews: false
|
||||
|
||||
# Where the parse phase begins. The correct way to override this
|
||||
# is to create a child method:
|
||||
|
||||
# parse: ->
|
||||
# super
|
||||
# @start(@your_top_level_production)
|
||||
|
||||
parse: (input) =>
|
||||
@input = input
|
||||
|
||||
# Returns true when this parser has exhausted its input.
|
||||
|
||||
eof: =>
|
||||
@input is ""
|
||||
|
||||
# Indicate failure, optionally resetting the input to a previous
|
||||
# state. This is not an exceptional condition (in choice and
|
||||
# maybes, for example).
|
||||
|
||||
fail: (input) =>
|
||||
@input = input if input isnt `undefined`
|
||||
throw @fail
|
||||
|
||||
# Execute a production, which could be a function or a RegExp.
|
||||
|
||||
produce: (method) =>
|
||||
val = if (method instanceof RegExp) then @match(method) else method.call(this)
|
||||
@skipWS() if @ignorews
|
||||
val
|
||||
|
||||
# Begin parsing using the given production, return the result.
|
||||
# All input must be consumed.
|
||||
|
||||
start: (method) =>
|
||||
val = undefined
|
||||
@ignorews and @skipWS()
|
||||
try
|
||||
val = @produce method
|
||||
return val if @eof()
|
||||
catch err
|
||||
throw err if err isnt @fail
|
||||
throw new Error("Could not parse '" + @input + "'.")
|
||||
|
||||
|
||||
# Attempts to apply the method and produce a value. If it fails,
|
||||
# restores the input to the previous state.
|
||||
|
||||
maybe: (method) =>
|
||||
input = @input
|
||||
try
|
||||
return @produce method
|
||||
catch err
|
||||
throw err if err isnt @fail
|
||||
@fail input
|
||||
|
||||
# Try to run the production `method`. If the production fails,
|
||||
# don't fail, just return the otherwise.
|
||||
|
||||
option: (method, otherwise) =>
|
||||
try
|
||||
return @maybe method
|
||||
catch err
|
||||
throw err if err isnt @fail
|
||||
return otherwise
|
||||
|
||||
# Given three parsers, return the value produced by `body`. This
|
||||
# is equivalent to seq(left, body, right)[0]. I'm not sure why
|
||||
# Weaver created an alternative syntax, then. Wishing JSParsec
|
||||
# wasn't so damned unreadable.
|
||||
|
||||
between: (left, right, body) =>
|
||||
input = @input
|
||||
val = undefined
|
||||
try
|
||||
@produce left
|
||||
val = @produce body
|
||||
@produce right
|
||||
return val
|
||||
catch err
|
||||
throw err if err isnt @fail
|
||||
@fail input
|
||||
|
||||
# Match a regular expression against the input, returning the
|
||||
# first captured group. If no group is captured, return the
|
||||
# matched string. This can result in surprises, if you don't wrap
|
||||
# your groups exactly right, which is common in ()? regexps.
|
||||
|
||||
match: (pattern) =>
|
||||
probe = @input.match pattern
|
||||
return @fail() unless probe
|
||||
@input = @input.substr probe[0].length
|
||||
if probe[1] is `undefined` then probe[0] else probe[1]
|
||||
|
||||
# Returns the first production among arguments for which the
|
||||
# production does not fail.
|
||||
|
||||
choice: =>
|
||||
input = @input
|
||||
for arg in arguments
|
||||
try
|
||||
return @produce arg
|
||||
catch err
|
||||
throw err if err isnt @fail
|
||||
@fail input
|
||||
|
||||
# Match every production in a sequence, returning a list of the
|
||||
# values produced. Sometimes Coffeescript's parser surprises me,
|
||||
# as in this case where the try-return pairing confused it, and it
|
||||
# needed help isolating the element.
|
||||
#
|
||||
# I have yet to find a case where where Weaver's unshift of the
|
||||
# beginning of the input string to the front of the return value
|
||||
# makes sense. It's not a feature of Parsec's sequence primitive,
|
||||
# for example.
|
||||
#
|
||||
# It could be useful if one needed the raw of a seq: for example,
|
||||
# when processing XML entities for correctness, not value. But in
|
||||
# the short term, the productions can be as preservative as
|
||||
# Weaver's technique, and for my needs that's my story, and I'm
|
||||
# sticking to it.
|
||||
|
||||
seq: =>
|
||||
input = @input
|
||||
try
|
||||
return (for arg in arguments
|
||||
@produce(arg))
|
||||
catch err
|
||||
throw err if err isnt @fail
|
||||
@fail input
|
||||
|
||||
# Applies the production `method` `min` or more times. Returns
|
||||
# the parser object as a chainable convenience if it does not
|
||||
# fail. Will fail if it skips less than `min` times.
|
||||
|
||||
skip: (method, min = null) =>
|
||||
found = 0
|
||||
input = @input
|
||||
until @eof()
|
||||
try
|
||||
@maybe method
|
||||
found++
|
||||
catch err
|
||||
throw err if err isnt @fail
|
||||
break
|
||||
if min and (found < min) then @fail input else @
|
||||
|
||||
# Applies the production `method` one or more times.
|
||||
|
||||
skip1: (method) => @skip(method, 1)
|
||||
|
||||
# Skip whitespace. Returns the parser object for chainable
|
||||
# convenience. Note that this is the baseline whitespace: this
|
||||
# will not skip carriage returns or linefeeds.
|
||||
|
||||
skipWS: =>
|
||||
@match(/^\s*/)
|
||||
@
|
||||
|
||||
# Returns an array of `min` values produced by `method`.
|
||||
|
||||
many: (method, min = null) =>
|
||||
input = @input
|
||||
result = until @eof()
|
||||
try
|
||||
@maybe(method)
|
||||
catch err
|
||||
throw err if err isnt @fail
|
||||
break
|
||||
|
||||
if min and (result.length < min) then @fail input else result
|
||||
|
||||
# Returns an array of at least one values produced by `method`.
|
||||
# Fails if zero values are produced.
|
||||
|
||||
many1: (method) => @many method, 1
|
||||
|
||||
# Return the array of values produced by `method` with `sep`
|
||||
# between each value. The series may be terminated by a `sep`.
|
||||
|
||||
sepBy: (method, sep, min = 0) =>
|
||||
orig = @input
|
||||
input = undefined
|
||||
result = []
|
||||
try
|
||||
result.push @produce method
|
||||
until @eof()
|
||||
try
|
||||
input = @input
|
||||
@produce sep
|
||||
result.push @produce method
|
||||
catch err
|
||||
throw err if err isnt @fail
|
||||
@fail input
|
||||
catch err
|
||||
throw err if err isnt @fail
|
||||
if min and (result.length < min) then @fail orig else result
|
||||
|
||||
sepBy1: (method, sep) => @sepBy method, sep, 1
|
||||
|
||||
# parses `min` or more productions of `method` (zero by default),
|
||||
# which must be terminated with the `end` production. RESOLVE:
|
||||
# There is no alternative production being given to `@option` in
|
||||
# Weaver's code. I've changed this to @produce for the time
|
||||
# being, which seems to be in line with the JSParsec
|
||||
# implementation.
|
||||
|
||||
endBy: (method, end, min = 0) =>
|
||||
val = @many method, min
|
||||
@option end
|
||||
val
|
||||
|
||||
# Parses 1 or more productions of method, which must be terminated
|
||||
# with the end production
|
||||
|
||||
endBy1: (method, end) =>
|
||||
@endBy method, end, 1
|
||||
|
||||
# Returns an array of `min` or more values produced by `method`,
|
||||
# separated by `sep`, and optionally terminated by `sep`.
|
||||
# Defaults to zero productions.
|
||||
|
||||
sepEndBy: (method, sep, min = 0) =>
|
||||
val = @sepBy method, sep, min
|
||||
@option sep
|
||||
val
|
||||
|
||||
# Returns an array of `min` or more values produced by `method`,
|
||||
# separated by `sep`, and optionally terminated by `sep`.
|
||||
# Defaults to zero productions. Must return at least one
|
||||
# production; fails if there are zero productions.
|
||||
|
||||
sepEndBy1: (method, sep) => @sepEndBy method, sep, 1
|
||||
|
||||
# Process `min` occurrences of `method`, separated by `op`. Return
|
||||
# a value obtained by the repeated application of the return of
|
||||
# `op` to the return of `method`. If there are less that `min`
|
||||
# occurrences of `method`, `otherwise` is returned. Used, for
|
||||
# example, to process a collection of mathematical productions of
|
||||
# the same precedence.
|
||||
|
||||
chainl: (method, op, otherwise = null, min = null) =>
|
||||
found = 0
|
||||
result = otherwise
|
||||
orig = @input
|
||||
input = undefined
|
||||
try
|
||||
result = @maybe(method)
|
||||
found++
|
||||
until @eof()
|
||||
try
|
||||
input = @input
|
||||
result = @produce(op)(result, @produce(method))
|
||||
found++
|
||||
catch err
|
||||
throw err if err isnt @fail
|
||||
@fail input
|
||||
catch err
|
||||
throw err if err isnt @fail
|
||||
if min and (found < min) then @fail input else result
|
||||
|
||||
# Like `chainl`, but must produce at least one production. Fails
|
||||
# if there are zero productions.
|
||||
|
||||
chainl1: (method, op) => @chainl method, op, null, 1
|
||||
|
Loading…
Reference in New Issue