Docco'd the library. Updated the examples to use the new parse-inheritance.

This commit is contained in:
Elf M. Sternberg 2012-05-03 16:41:00 -07:00
commit bd77a5742d
8 changed files with 2176 additions and 0 deletions

7
.gitignore vendored Normal file
View File

@ -0,0 +1,7 @@
.npm
node_modules
*~
#*
.#*
*/#*
docs

29
examples/calc.coffee Normal file
View File

@ -0,0 +1,29 @@
ReParse = require('../src/reparse').ReParse
util = require("util")
class Calc extends ReParse
ignorews: true
OPS:
'+': (a, b) -> a + b
"-": (a, b) -> a - b
"*": (a, b) -> a * b
"/": (a, b) -> a / b
expr: => @chainl @term, @addop
term: => @chainl1 @factor, @mulop
factor: => @choice @group, @number
group: => @between /^\(/, /^\)/, @expr
number: => parseFloat @match(/^(\-?\d+(\.\d+)?)/)
mulop: => @OPS[@match(/^[\*\/]/)]
addop: => @OPS[@match(/^[\+\-]/)]
parse: =>
super
@start(@expr)
unless process.argv.length is 3
util.puts "Usage: node " + process.argv[1] + " expression"
process.exit 1
util.puts (new Calc).parse(process.argv[2])

View File

@ -0,0 +1,23 @@
ReParse = require('../src/reparse').ReParse
util = require("util")
class EmailAddress extends ReParse
addressList: => @sepEndBy @address, /^\s*,\s*/
address: => @choice @namedAddress, @bareAddress
namedAddress: => @seq(@phrase, /^\s*</m, @bareAddress, /^>/)[2]
bareAddress: => @seq(@word, /^@/, @word).join ""
phrase: => @many @word
word: => @skip(/^\s+/).choice @quoted, @dottedAtom
quoted: => @match /^"(?:\\.|[^"\r\n])+"/m
dottedAtom: => @match /^[!#\$%&'\*\+\-\/\w=\?\^`\{\|\}~]+(?:\.[!#\$%&'\*\+\-\/\w=\?\^`\{\|\}~]+)*/m
parse: =>
super
@start(@addressList)
unless process.argv.length is 3
util.puts "Usage: node " + process.argv[1] + " list-of-addresses"
process.exit 1
console.log util.inspect (new EmailAddress).parse(process.argv[2])

51
examples/json.coffee Normal file
View File

@ -0,0 +1,51 @@
ReParse = require('../src/reparse').ReParse
util = require("util")
peg = require('./pegjson').parser;
require('./upgrades')
class ReJSON extends ReParse
ignorews: true
LITERAL = {'true': true, 'false': false, 'null': null}
STRING = {"\"": 34, "\\": 92, "/": 47, 'b': 8, 'f': 12, 'n': 10, 'r': 13, 't': 9}
value: => @choice @literal, @string, @number, @array, @object
object: => @between(/^\{/, /^\}/, @members).reduce ((obj, pair) => obj[pair[0]] = pair[2]; obj), {}
members: => @sepBy @pair, /^,/
pair: => @seq @string, /^:/, @value
array: => @between /^\[/, /^\]/, @elements
elements: => @sepBy @value, /^,/
literal: => LITERAL[@match(/^(true|false|null)/)]
number: => parseFloat @match(/^\-?\d+(?:\.\d+)?(?:[eE][\+\-]?\d+)?/)
string: =>
chars = @match(/^"((?:\\["\\/bfnrt]|\\u[0-9a-fA-F]{4}|[^"\\])*)"/)
chars.replace /\\(["\\/bfnrt])|\\u([0-9a-fA-F]{4})/g, (_, $1, $2) =>
String.fromCharCode (if $1 then STRING[$1] else parseInt($2, 16)) # "
parse: =>
super
@start(@value)
capture = (stream, encoding, fn) =>
data = ""
stream.setEncoding encoding
stream.on "data", (chunk) => data += chunk
stream.on "end", => fn data
time = (label, reps, fn) =>
start = Date.now()
for i in [0..reps]
fn()
util.puts label + ": " + (Date.now() - start)
input = "{\"a\": [1, \"foo\", [], {\"foo\": 1, \"bar\": [1, 2, 3]}] }"
console.log util.inspect (new ReJSON).parse( input), false, 4
jsonparse = new ReJSON()
time "JSON", 1000, => JSON.parse input
time "PEG.js", 1000, => peg.parse input
time "ReParse", 1000, => jsonparse.parse(input)

1747
examples/pegjson.js Normal file

File diff suppressed because it is too large Load Diff

21
examples/upgrades.coffee Normal file
View File

@ -0,0 +1,21 @@
# Taken from https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/Array/Reduce
if not Array::reduce?
Array::reduce = ->
accumulator = arguments[0]
ctr = 0
if typeof accumulator != 'function'
throw new TypeError "First argument is not callable"
curr = if arguments.length < 2
if @length == 0 then throw new TypeError "Array length is 0 and no second argument"
ctr = 1
@[0]
else
arguments[1]
for i in [ctr...@length]
curr = accumulator.call(undefined, curr, @[i], i, @)
curr

13
package.json Normal file
View File

@ -0,0 +1,13 @@
{
"name": "reparse-coffee",
"description": "An implementation of a parser combinator in coffeescript.",
"author": "Elf M. Sternberg (elf.sternberg@gmail.com)",
"version": "0.0.1",
"dependencies": {
"coffee-script": "1.x.x"
},
"devDependencies": {
"docco": "0.3.x"
},
"engine": "node >= 0.6.0"
}

285
src/reparse.coffee Normal file
View File

@ -0,0 +1,285 @@
#; -*- mode: coffee -*-
# ___ ___ ___ __ __
# | _ \___| _ \__ _ _ _ ___ ___ / __|___ / _|/ _|___ ___
# | / -_) _/ _` | '_(_-</ -_) | (__/ _ \ _| _/ -_) -_)
# |_|_\___|_| \__,_|_| /__/\___| \___\___/_| |_| \___\___|
#
# A hand-re-written implementation of Ben Weaver's ReParse, a parser
# combinator, which in turn was heavily influenced by Haskell's
# PARSEC. In many cases, this code is almost exactly Ben's; in some,
# I've lifted further ideas and commentary from the JSParsec project.
exports.ReParse = class ReParse
# Extend from ReParse and set to true if you don't care about
# whitespace.
ignorews: false
# Where the parse phase begins. The correct way to override this
# is to create a child method:
# parse: ->
# super
# @start(@your_top_level_production)
parse: (input) =>
@input = input
# Returns true when this parser has exhausted its input.
eof: =>
@input is ""
# Indicate failure, optionally resetting the input to a previous
# state. This is not an exceptional condition (in choice and
# maybes, for example).
fail: (input) =>
@input = input if input isnt `undefined`
throw @fail
# Execute a production, which could be a function or a RegExp.
produce: (method) =>
val = if (method instanceof RegExp) then @match(method) else method.call(this)
@skipWS() if @ignorews
val
# Begin parsing using the given production, return the result.
# All input must be consumed.
start: (method) =>
val = undefined
@ignorews and @skipWS()
try
val = @produce method
return val if @eof()
catch err
throw err if err isnt @fail
throw new Error("Could not parse '" + @input + "'.")
# Attempts to apply the method and produce a value. If it fails,
# restores the input to the previous state.
maybe: (method) =>
input = @input
try
return @produce method
catch err
throw err if err isnt @fail
@fail input
# Try to run the production `method`. If the production fails,
# don't fail, just return the otherwise.
option: (method, otherwise) =>
try
return @maybe method
catch err
throw err if err isnt @fail
return otherwise
# Given three parsers, return the value produced by `body`. This
# is equivalent to seq(left, body, right)[0]. I'm not sure why
# Weaver created an alternative syntax, then. Wishing JSParsec
# wasn't so damned unreadable.
between: (left, right, body) =>
input = @input
val = undefined
try
@produce left
val = @produce body
@produce right
return val
catch err
throw err if err isnt @fail
@fail input
# Match a regular expression against the input, returning the
# first captured group. If no group is captured, return the
# matched string. This can result in surprises, if you don't wrap
# your groups exactly right, which is common in ()? regexps.
match: (pattern) =>
probe = @input.match pattern
return @fail() unless probe
@input = @input.substr probe[0].length
if probe[1] is `undefined` then probe[0] else probe[1]
# Returns the first production among arguments for which the
# production does not fail.
choice: =>
input = @input
for arg in arguments
try
return @produce arg
catch err
throw err if err isnt @fail
@fail input
# Match every production in a sequence, returning a list of the
# values produced. Sometimes Coffeescript's parser surprises me,
# as in this case where the try-return pairing confused it, and it
# needed help isolating the element.
#
# I have yet to find a case where where Weaver's unshift of the
# beginning of the input string to the front of the return value
# makes sense. It's not a feature of Parsec's sequence primitive,
# for example.
#
# It could be useful if one needed the raw of a seq: for example,
# when processing XML entities for correctness, not value. But in
# the short term, the productions can be as preservative as
# Weaver's technique, and for my needs that's my story, and I'm
# sticking to it.
seq: =>
input = @input
try
return (for arg in arguments
@produce(arg))
catch err
throw err if err isnt @fail
@fail input
# Applies the production `method` `min` or more times. Returns
# the parser object as a chainable convenience if it does not
# fail. Will fail if it skips less than `min` times.
skip: (method, min = null) =>
found = 0
input = @input
until @eof()
try
@maybe method
found++
catch err
throw err if err isnt @fail
break
if min and (found < min) then @fail input else @
# Applies the production `method` one or more times.
skip1: (method) => @skip(method, 1)
# Skip whitespace. Returns the parser object for chainable
# convenience. Note that this is the baseline whitespace: this
# will not skip carriage returns or linefeeds.
skipWS: =>
@match(/^\s*/)
@
# Returns an array of `min` values produced by `method`.
many: (method, min = null) =>
input = @input
result = until @eof()
try
@maybe(method)
catch err
throw err if err isnt @fail
break
if min and (result.length < min) then @fail input else result
# Returns an array of at least one values produced by `method`.
# Fails if zero values are produced.
many1: (method) => @many method, 1
# Return the array of values produced by `method` with `sep`
# between each value. The series may be terminated by a `sep`.
sepBy: (method, sep, min = 0) =>
orig = @input
input = undefined
result = []
try
result.push @produce method
until @eof()
try
input = @input
@produce sep
result.push @produce method
catch err
throw err if err isnt @fail
@fail input
catch err
throw err if err isnt @fail
if min and (result.length < min) then @fail orig else result
sepBy1: (method, sep) => @sepBy method, sep, 1
# parses `min` or more productions of `method` (zero by default),
# which must be terminated with the `end` production. RESOLVE:
# There is no alternative production being given to `@option` in
# Weaver's code. I've changed this to @produce for the time
# being, which seems to be in line with the JSParsec
# implementation.
endBy: (method, end, min = 0) =>
val = @many method, min
@option end
val
# Parses 1 or more productions of method, which must be terminated
# with the end production
endBy1: (method, end) =>
@endBy method, end, 1
# Returns an array of `min` or more values produced by `method`,
# separated by `sep`, and optionally terminated by `sep`.
# Defaults to zero productions.
sepEndBy: (method, sep, min = 0) =>
val = @sepBy method, sep, min
@option sep
val
# Returns an array of `min` or more values produced by `method`,
# separated by `sep`, and optionally terminated by `sep`.
# Defaults to zero productions. Must return at least one
# production; fails if there are zero productions.
sepEndBy1: (method, sep) => @sepEndBy method, sep, 1
# Process `min` occurrences of `method`, separated by `op`. Return
# a value obtained by the repeated application of the return of
# `op` to the return of `method`. If there are less that `min`
# occurrences of `method`, `otherwise` is returned. Used, for
# example, to process a collection of mathematical productions of
# the same precedence.
chainl: (method, op, otherwise = null, min = null) =>
found = 0
result = otherwise
orig = @input
input = undefined
try
result = @maybe(method)
found++
until @eof()
try
input = @input
result = @produce(op)(result, @produce(method))
found++
catch err
throw err if err isnt @fail
@fail input
catch err
throw err if err isnt @fail
if min and (found < min) then @fail input else result
# Like `chainl`, but must produce at least one production. Fails
# if there are zero productions.
chainl1: (method, op) => @chainl method, op, null, 1