Add peg.el as a built-in library
* lisp/progmodes/peg.el: New file, taken from ELPA package. * test/lisp/peg-tests.el: Package tests. * doc/lispref/peg.texi: Documentation.
This commit is contained in:
parent
0df8dadde2
commit
8bee4060ea
6 changed files with 1679 additions and 0 deletions
|
@ -112,6 +112,7 @@ srcs = \
|
|||
$(srcdir)/os.texi \
|
||||
$(srcdir)/package.texi \
|
||||
$(srcdir)/parsing.texi \
|
||||
$(srcdir)/peg.texi \
|
||||
$(srcdir)/positions.texi \
|
||||
$(srcdir)/processes.texi \
|
||||
$(srcdir)/records.texi \
|
||||
|
|
|
@ -222,6 +222,7 @@ To view this manual in other formats, click
|
|||
* Non-ASCII Characters:: Non-ASCII text in buffers and strings.
|
||||
* Searching and Matching:: Searching buffers for strings or regexps.
|
||||
* Syntax Tables:: The syntax table controls word and list parsing.
|
||||
* Parsing Expression Grammars:: Parsing structured buffer text.
|
||||
* Parsing Program Source:: Generate syntax tree for program sources.
|
||||
* Abbrevs:: How Abbrev mode works, and its data structures.
|
||||
|
||||
|
@ -1365,6 +1366,12 @@ Syntax Tables
|
|||
* Syntax Table Internals:: How syntax table information is stored.
|
||||
* Categories:: Another way of classifying character syntax.
|
||||
|
||||
Parsing Expression Grammars
|
||||
|
||||
* PEX Definitions:: The syntax of PEX rules
|
||||
* Parsing Actions:: Running actions upon successful parsing.
|
||||
* Writing PEG Rules:: Tips for writing parsing rules.
|
||||
|
||||
Parsing Program Source
|
||||
|
||||
* Language Grammar:: Loading tree-sitter language grammar.
|
||||
|
@ -1720,6 +1727,7 @@ Object Internals
|
|||
|
||||
@include searching.texi
|
||||
@include syntax.texi
|
||||
@include peg.texi
|
||||
@include parsing.texi
|
||||
@include abbrevs.texi
|
||||
@include threads.texi
|
||||
|
|
351
doc/lispref/peg.texi
Normal file
351
doc/lispref/peg.texi
Normal file
|
@ -0,0 +1,351 @@
|
|||
@c -*-texinfo-*-
|
||||
@c This is part of the GNU Emacs Lisp Reference Manual.
|
||||
@c Copyright (C) 1990--1995, 1998--1999, 2001--2023 Free Software
|
||||
@c Foundation, Inc.
|
||||
@c See the file elisp.texi for copying conditions.
|
||||
@node Parsing Expression Grammars
|
||||
@chapter Parsing Expression Grammars
|
||||
@cindex text parsing
|
||||
@cindex parsing expression grammar
|
||||
|
||||
Emacs Lisp provides several tools for parsing and matching text,
|
||||
from regular expressions (@pxref{Regular Expressions}) to full
|
||||
@acronym{LL} grammar parsers (@pxref{Top,, Bovine parser
|
||||
development,bovine}). @dfn{Parsing Expression Grammars}
|
||||
(@acronym{PEG}) are another approach to text parsing that offer more
|
||||
structure and composibility than regular expressions, but less
|
||||
complexity than context-free grammars.
|
||||
|
||||
A @acronym{PEG} parser is defined as a list of named rules, each of
|
||||
which matches text patterns, and/or contains references to other
|
||||
rules. Parsing is initiated with the function @code{peg-run} or the
|
||||
macro @code{peg-parse} (see below), and parses text after point in the
|
||||
current buffer, using a given set of rules.
|
||||
|
||||
@cindex parsing expression
|
||||
The definition of each rule is referred to as a @dfn{parsing
|
||||
expression} (@acronym{PEX}), and can consist of a literal string, a
|
||||
regexp-like character range or set, a peg-specific construct
|
||||
resembling an elisp function call, a reference to another rule, or a
|
||||
combination of any of these. A grammar is expressed as a tree of
|
||||
rules in which one rule is typically treated as a ``root'' or
|
||||
``entry-point'' rule. For instance:
|
||||
|
||||
@example
|
||||
@group
|
||||
((number sign digit (* digit))
|
||||
(sign (or "+" "-" ""))
|
||||
(digit [0-9]))
|
||||
@end group
|
||||
@end example
|
||||
|
||||
Once defined, grammars can be used to parse text after point in the
|
||||
current buffer, in the following ways:
|
||||
|
||||
@defmac peg-parse &rest pexs
|
||||
Match @var{pexs} at point. If @var{pexs} is a list of PEG rules, the
|
||||
first rule is considered the ``entry-point'':
|
||||
@end defmac
|
||||
|
||||
@example
|
||||
@group
|
||||
(peg-parse
|
||||
((number sign digit (* digit))
|
||||
(sign (or "+" "-" ""))
|
||||
(digit [0-9])))
|
||||
@end group
|
||||
@end example
|
||||
|
||||
This macro represents the simplest use of the @acronym{PEG} library,
|
||||
but also the least flexible, as the rules must be written directly
|
||||
into the source code. A more flexible approach involves use of three
|
||||
macros in conjunction: @code{with-peg-rules}, a @code{let}-like
|
||||
construct that makes a set of rules available within the macro body;
|
||||
@code{peg-run}, which initiates parsing given a single rule; and
|
||||
@code{peg}, which is used to wrap the entry-point rule name. In fact,
|
||||
a call to @code{peg-parse} expands to just this set of calls. The
|
||||
above example could be written as:
|
||||
|
||||
@example
|
||||
@group
|
||||
(with-peg-rules
|
||||
((number sign digit (* digit))
|
||||
(sign (or "+" "-" ""))
|
||||
(digit [0-9]))
|
||||
(peg-run (peg number)))
|
||||
@end group
|
||||
@end example
|
||||
|
||||
This allows more explicit control over the ``entry-point'' of parsing,
|
||||
and allows the combination of rules from different sources.
|
||||
|
||||
Individual rules can also be defined using a more @code{defun}-like
|
||||
syntax, using the macro @code{define-peg-rule}:
|
||||
|
||||
@example
|
||||
(define-peg-rule digit ()
|
||||
[0-9])
|
||||
@end example
|
||||
|
||||
This also allows for rules that accept an argument (supplied by the
|
||||
@code{funcall} PEG rule).
|
||||
|
||||
Another possibility is to define a named set of rules with
|
||||
@code{define-peg-ruleset}:
|
||||
|
||||
@example
|
||||
(define-peg-ruleset number-grammar
|
||||
'((number sign digit (* digit))
|
||||
digit ;; A reference to the definition above.
|
||||
(sign (or "+" "-" ""))))
|
||||
@end example
|
||||
|
||||
Rules and rulesets defined this way can be referred to by name in
|
||||
later calls to @code{peg-run} or @code{with-peg-rules}:
|
||||
|
||||
@example
|
||||
(with-peg-rules number-grammar
|
||||
(peg-run (peg number)))
|
||||
@end example
|
||||
|
||||
By default, calls to @code{peg-run} or @code{peg-parse} produce no
|
||||
output: parsing simply moves point. In order to return or otherwise
|
||||
act upon parsed strings, rules can include @dfn{actions}, see
|
||||
@ref{Parsing Actions}.
|
||||
|
||||
@menu
|
||||
* PEX Definitions:: The syntax of PEX rules.
|
||||
* Parsing Actions:: Running actions upon successful parsing.
|
||||
* Writing PEG Rules:: Tips for writing parsing rules.
|
||||
@end menu
|
||||
|
||||
@node PEX Definitions
|
||||
@section PEX Definitions
|
||||
|
||||
Parsing expressions can be defined using the following syntax:
|
||||
|
||||
@table @code
|
||||
@item (and E1 E2 ...)
|
||||
A sequence of @acronym{PEX}s that must all be matched. The @code{and} form is
|
||||
optional and implicit.
|
||||
|
||||
@item (or E1 E2 ...)
|
||||
Prioritized choices, meaning that, as in Elisp, the choices are tried
|
||||
in order, and the first successful match is used. Note that this is
|
||||
distinct from context-free grammars, in which selection between
|
||||
multiple matches is indeterminate.
|
||||
|
||||
@item (any)
|
||||
Matches any single character, as the regexp ``.''.
|
||||
|
||||
@item @var{string}
|
||||
A literal string.
|
||||
|
||||
@item (char @var{C})
|
||||
A single character @var{C}, as an Elisp character literal.
|
||||
|
||||
@item (* @var{E})
|
||||
Zero or more instances of expression @var{E}, as the regexp @samp{*}.
|
||||
Matching is always ``greedy''.
|
||||
|
||||
@item (+ @var{E})
|
||||
One or more instances of expression @var{E}, as the regexp @samp{+}.
|
||||
Matching is always ``greedy''.
|
||||
|
||||
@item (opt @var{E})
|
||||
Zero or one instance of expression @var{E}, as the regexp @samp{?}.
|
||||
|
||||
@item SYMBOL
|
||||
A symbol representing a previously-defined PEG rule.
|
||||
|
||||
@item (range CH1 CH2)
|
||||
The character range between CH1 and CH2, as the regexp @samp{[CH1-CH2]}.
|
||||
|
||||
@item [CH1-CH2 "+*" ?x]
|
||||
A character set, which can include ranges, character literals, or
|
||||
strings of characters.
|
||||
|
||||
@item [ascii cntrl]
|
||||
A list of named character classes.
|
||||
|
||||
@item (syntax-class @var{NAME})
|
||||
A single syntax class.
|
||||
|
||||
@item (funcall E ARGS...)
|
||||
Call @acronym{PEX} E (previously defined with @code{define-peg-rule})
|
||||
with arguments @var{ARGS}.
|
||||
|
||||
@item (null)
|
||||
The empty string.
|
||||
|
||||
@end table
|
||||
|
||||
The following expressions are used as anchors or tests -- they do not
|
||||
move point, but return a boolean value which can be used to constrain
|
||||
matches as a way of controlling the parsing process (@pxref{Writing
|
||||
PEG Rules}).
|
||||
|
||||
@table @code
|
||||
@item (bob)
|
||||
Beginning of buffer.
|
||||
|
||||
@item (eob)
|
||||
End of buffer.
|
||||
|
||||
@item (bol)
|
||||
Beginning of line.
|
||||
|
||||
@item (eol)
|
||||
End of line.
|
||||
|
||||
@item (bow)
|
||||
Beginning of word.
|
||||
|
||||
@item (eow)
|
||||
End of word.
|
||||
|
||||
@item (bos)
|
||||
Beginning of symbol.
|
||||
|
||||
@item (eos)
|
||||
End of symbol.
|
||||
|
||||
@item (if E)
|
||||
Returns non-@code{nil} if parsing @acronym{PEX} E from point succeeds (point
|
||||
is not moved).
|
||||
|
||||
@item (not E)
|
||||
Returns non-@code{nil} if parsing @acronym{PEX} E from point fails (point
|
||||
is not moved).
|
||||
|
||||
@item (guard EXP)
|
||||
Treats the value of the Lisp expression EXP as a boolean.
|
||||
|
||||
@end table
|
||||
|
||||
@vindex peg-char-classes
|
||||
Character class matching can use the same named character classes as
|
||||
in regular expressions (@pxref{Top,, Character Classes,elisp})
|
||||
|
||||
@node Parsing Actions
|
||||
@section Parsing Actions
|
||||
|
||||
@cindex parsing actions
|
||||
@cindex parsing stack
|
||||
By default the process of parsing simply moves point in the current
|
||||
buffer, ultimately returning @code{t} if the parsing succeeds, and
|
||||
@code{nil} if it doesn't. It's also possible to define ``actions''
|
||||
that can run arbitrary Elisp at certain points in the parsed text.
|
||||
These actions can optionally affect something called the @dfn{parsing
|
||||
stack}, which is a list of values returned by the parsing process.
|
||||
These actions only run (and only return values) if the parsing process
|
||||
ultimately succeeds; if it fails the action code is not run at all.
|
||||
|
||||
Actions can be added anywhere in the definition of a rule. They are
|
||||
distinguished from parsing expressions by an initial backquote
|
||||
(@samp{`}), followed by a parenthetical form that must contain a pair
|
||||
of hyphens (@samp{--}) somewhere within it. Symbols to the left of
|
||||
the hyphens are bound to values popped from the stack (they are
|
||||
somewhat analogous to the argument list of a lambda form). Values
|
||||
produced by code to the right are pushed to the stack (analogous to
|
||||
the return value of the lambda). For instance, the previous grammar
|
||||
can be augmented with actions to return the parsed number as an actual
|
||||
integer:
|
||||
|
||||
@example
|
||||
(with-peg-rules ((number sign digit (* digit
|
||||
`(a b -- (+ (* a 10) b)))
|
||||
`(sign val -- (* sign val)))
|
||||
(sign (or (and "+" `(-- 1))
|
||||
(and "-" `(-- -1))
|
||||
(and "" `(-- 1))))
|
||||
(digit [0-9] `(-- (- (char-before) ?0))))
|
||||
(peg-run (peg number)))
|
||||
@end example
|
||||
|
||||
There must be values on the stack before they can be popped and
|
||||
returned -- if there aren't enough stack values to bind to an action's
|
||||
left-hand terms, they will be bound to @code{nil}. An action with
|
||||
only right-hand terms will push values to the stack; an action with
|
||||
only left-hand terms will consume (and discard) values from the stack.
|
||||
At the end of parsing, stack values are returned as a flat list.
|
||||
|
||||
To return the string matched by a @acronym{PEX} (instead of simply
|
||||
moving point over it), a rule like this can be used:
|
||||
|
||||
@example
|
||||
(one-word
|
||||
`(-- (point))
|
||||
(+ [word])
|
||||
`(start -- (buffer-substring start (point))))
|
||||
@end example
|
||||
|
||||
The first action pushes the initial value of point to the stack. The
|
||||
intervening @acronym{PEX} moves point over the next word. The second
|
||||
action pops the previous value from the stack (binding it to the
|
||||
variable @code{start}), and uses that value to extract a substring
|
||||
from the buffer and push it to the stack. This pattern is so common
|
||||
that @acronym{PEG} provides a shorthand function that does exactly the
|
||||
above, along with a few other shorthands for common scenarios:
|
||||
|
||||
@table @code
|
||||
@item (substring @var{E})
|
||||
Match @acronym{PEX} @var{E} and push the matched string to the stack.
|
||||
|
||||
@item (region @var{E})
|
||||
Match @var{E} and push the start and end positions of the matched
|
||||
region to the stack.
|
||||
|
||||
@item (replace @var{E} @var{replacement})
|
||||
Match @var{E} and replaced the matched region with the string @var{replacement}.
|
||||
|
||||
@item (list @var{E})
|
||||
Match @var{E}, collect all values produced by @var{E} (and its
|
||||
sub-expressions) into a list, and push that list to the stack. Stack
|
||||
values are typically returned as a flat list; this is a way of
|
||||
``grouping'' values together.
|
||||
@end table
|
||||
|
||||
@node Writing PEG Rules
|
||||
@section Writing PEG Rules
|
||||
|
||||
Something to be aware of when writing PEG rules is that they are
|
||||
greedy. Rules which can consume a variable amount of text will always
|
||||
consume the maximum amount possible, even if that causes a rule that
|
||||
might otherwise have matched to fail later on -- there is no
|
||||
backtracking. For instance, this rule will never succeed:
|
||||
|
||||
@example
|
||||
(forest (+ "tree" (* [blank])) "tree" (eol))
|
||||
@end example
|
||||
|
||||
The @acronym{PEX} @code{(+ "tree" (* [blank]))} will consume all
|
||||
repetitions of the word ``tree'', leaving none to match the final
|
||||
@code{"tree"}.
|
||||
|
||||
In these situations, the desired result can be obtained by using
|
||||
predicates and guards -- namely the @code{not}, @code{if} and
|
||||
@code{guard} expressions -- to constrain behavior. For instance:
|
||||
|
||||
@example
|
||||
(forest (+ "tree" (* [blank])) (not (eol)) "tree" (eol))
|
||||
@end example
|
||||
|
||||
The @code{if} and @code{not} operators accept a parsing expression and
|
||||
interpret it as a boolean, without moving point. The contents of a
|
||||
@code{guard} operator are evaluated as regular Lisp (not a
|
||||
@acronym{PEX}) and should return a boolean value. A @code{nil} value
|
||||
causes the match to fail.
|
||||
|
||||
Another potentially unexpected behavior is that parsing will move
|
||||
point as far as possible, even if the parsing ultimately fails. This
|
||||
rule:
|
||||
|
||||
@example
|
||||
(end-game "game" (eob))
|
||||
@end example
|
||||
|
||||
when run in a buffer containing the text ``game over'' after point,
|
||||
will move point to just after ``game'' then halt parsing, returning
|
||||
@code{nil}. Successful parsing will always return @code{t}, or the
|
||||
contexts of the parsing stack.
|
8
etc/NEWS
8
etc/NEWS
|
@ -1585,6 +1585,14 @@ forwards-compatibility Compat package from GNU ELPA. This allows
|
|||
built-in packages to use the library more effectively, and helps
|
||||
preventing the installation of Compat if unnecessary.
|
||||
|
||||
+++
|
||||
** New package PEG.
|
||||
Emacs now includes a library for writing (P)arsing (E)xpression
|
||||
(G)rammars, an approach to text parsing that provides more structure
|
||||
than regular expressions, but less complexity than context-free
|
||||
grammars. The Info manual "(elisp) Parsing Expression Grammars" has
|
||||
documentation and examples.
|
||||
|
||||
|
||||
* Incompatible Lisp Changes in Emacs 30.1
|
||||
|
||||
|
|
944
lisp/progmodes/peg.el
Normal file
944
lisp/progmodes/peg.el
Normal file
|
@ -0,0 +1,944 @@
|
|||
;;; peg.el --- Parsing Expression Grammars in Emacs Lisp -*- lexical-binding:t -*-
|
||||
|
||||
;; Copyright (C) 2008-2023 Free Software Foundation, Inc.
|
||||
;;
|
||||
;; Author: Helmut Eller <eller.helmut@gmail.com>
|
||||
;; Maintainer: Stefan Monnier <monnier@iro.umontreal.ca>
|
||||
;; Version: 1.0.1
|
||||
;;
|
||||
;; This program is free software: you can redistribute it and/or modify
|
||||
;; it under the terms of the GNU General Public License as published by
|
||||
;; the Free Software Foundation, either version 3 of the License, or
|
||||
;; (at your option) any later version.
|
||||
;;
|
||||
;; This program is distributed in the hope that it will be useful,
|
||||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;; GNU General Public License for more details.
|
||||
;;
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
;;
|
||||
;;; Commentary:
|
||||
;;
|
||||
;; This package implements Parsing Expression Grammars for Emacs Lisp.
|
||||
|
||||
;; Parsing Expression Grammars (PEG) are a formalism in the spirit of
|
||||
;; Context Free Grammars (CFG) with some simplifications which makes
|
||||
;; the implementation of PEGs as recursive descent parsers particularly
|
||||
;; simple and easy to understand [Ford, Baker].
|
||||
;; PEGs are more expressive than regexps and potentially easier to use.
|
||||
;;
|
||||
;; This file implements the macros `define-peg-rule', `with-peg-rules', and
|
||||
;; `peg-parse' which parses the current buffer according to a PEG.
|
||||
;; E.g. we can match integers with:
|
||||
;;
|
||||
;; (with-peg-rules
|
||||
;; ((number sign digit (* digit))
|
||||
;; (sign (or "+" "-" ""))
|
||||
;; (digit [0-9]))
|
||||
;; (peg-run (peg number)))
|
||||
;; or
|
||||
;; (define-peg-rule digit ()
|
||||
;; [0-9])
|
||||
;; (peg-parse (number sign digit (* digit))
|
||||
;; (sign (or "+" "-" "")))
|
||||
;;
|
||||
;; In contrast to regexps, PEGs allow us to define recursive "rules".
|
||||
;; A "grammar" is a set of rules. A rule is written as (NAME PEX...)
|
||||
;; E.g. (sign (or "+" "-" "")) is a rule with the name "sign".
|
||||
;; The syntax for PEX (Parsing Expression) is a follows:
|
||||
;;
|
||||
;; Description Lisp Traditional, as in Ford's paper
|
||||
;; =========== ==== ===========
|
||||
;; Sequence (and E1 E2) e1 e2
|
||||
;; Prioritized Choice (or E1 E2) e1 / e2
|
||||
;; Not-predicate (not E) !e
|
||||
;; And-predicate (if E) &e
|
||||
;; Any character (any) .
|
||||
;; Literal string "abc" "abc"
|
||||
;; Character C (char C) 'c'
|
||||
;; Zero-or-more (* E) e*
|
||||
;; One-or-more (+ E) e+
|
||||
;; Optional (opt E) e?
|
||||
;; Non-terminal SYMBOL A
|
||||
;; Character range (range A B) [a-b]
|
||||
;; Character set [a-b "+*" ?x] [a-b+*x] ;Note: it's a vector
|
||||
;; Character classes [ascii cntrl]
|
||||
;; Boolean-guard (guard EXP)
|
||||
;; Syntax-Class (syntax-class NAME)
|
||||
;; Local definitions (with RULES PEX...)
|
||||
;; Indirect call (funcall EXP ARGS...)
|
||||
;; and
|
||||
;; Empty-string (null) ε
|
||||
;; Beginning-of-Buffer (bob)
|
||||
;; End-of-Buffer (eob)
|
||||
;; Beginning-of-Line (bol)
|
||||
;; End-of-Line (eol)
|
||||
;; Beginning-of-Word (bow)
|
||||
;; End-of-Word (eow)
|
||||
;; Beginning-of-Symbol (bos)
|
||||
;; End-of-Symbol (eos)
|
||||
;;
|
||||
;; Rules can refer to other rules, and a grammar is often structured
|
||||
;; as a tree, with a root rule referring to one or more "branch
|
||||
;; rules", all the way down to the "leaf rules" that deal with actual
|
||||
;; buffer text. Rules can be recursive or mutually referential,
|
||||
;; though care must be taken not to create infinite loops.
|
||||
;;
|
||||
;;;; Named rulesets:
|
||||
;;
|
||||
;; You can define a set of rules for later use with:
|
||||
;;
|
||||
;; (define-peg-ruleset myrules
|
||||
;; (sign () (or "+" "-" ""))
|
||||
;; (digit () [0-9])
|
||||
;; (nat () digit (* digit))
|
||||
;; (int () sign digit (* digit))
|
||||
;; (float () int "." nat))
|
||||
;;
|
||||
;; and later refer to it:
|
||||
;;
|
||||
;; (with-peg-rules
|
||||
;; (myrules
|
||||
;; (complex float "+i" float))
|
||||
;; ... (peg-parse nat "," nat "," complex) ...)
|
||||
;;
|
||||
;;;; Parsing actions:
|
||||
;;
|
||||
;; PEXs also support parsing actions, i.e. Lisp snippets which are
|
||||
;; executed when a pex matches. This can be used to construct syntax
|
||||
;; trees or for similar tasks. The most basic form of action is
|
||||
;; written as:
|
||||
;;
|
||||
;; (action FORM) ; evaluate FORM for its side-effects
|
||||
;;
|
||||
;; Actions don't consume input, but are executed at the point of
|
||||
;; match. Another kind of action is called a "stack action", and
|
||||
;; looks like this:
|
||||
;;
|
||||
;; `(VAR... -- FORM...) ; stack action
|
||||
;;
|
||||
;; A stack action takes VARs from the "value stack" and pushes the
|
||||
;; results of evaluating FORMs to that stack.
|
||||
|
||||
;; The value stack is created during the course of parsing. Certain
|
||||
;; operators (see below) that match buffer text can push values onto
|
||||
;; this stack. "Upstream" rules can then draw values from the stack,
|
||||
;; and optionally push new ones back. For instance, consider this
|
||||
;; very simple grammar:
|
||||
;;
|
||||
;; (with-peg-rules
|
||||
;; ((query (+ term) (eol))
|
||||
;; (term key ":" value (opt (+ [space]))
|
||||
;; `(k v -- (cons (intern k) v)))
|
||||
;; (key (substring (and (not ":") (+ [word]))))
|
||||
;; (value (or string-value number-value))
|
||||
;; (string-value (substring (+ [alpha])))
|
||||
;; (number-value (substring (+ [digit]))
|
||||
;; `(val -- (string-to-number val))))
|
||||
;; (peg-run (peg query)))
|
||||
;;
|
||||
;; This invocation of `peg-run' would parse this buffer text:
|
||||
;;
|
||||
;; name:Jane age:30
|
||||
;;
|
||||
;; And return this Elisp sexp:
|
||||
;;
|
||||
;; ((age . 30) (name . "Jane"))
|
||||
;;
|
||||
;; Note that, in complex grammars, some care must be taken to make
|
||||
;; sure that the number and type of values drawn from the stack always
|
||||
;; match those pushed. In the example above, both `string-value' and
|
||||
;; `number-value' push a single value to the stack. Since the `value'
|
||||
;; rule only includes these two sub-rules, any upstream rule that
|
||||
;; makes use of `value' can be confident it will always and only push
|
||||
;; a single value to the stack.
|
||||
;;
|
||||
;; Stack action forms are in a sense analogous to lambda forms: the
|
||||
;; symbols before the "--" are the equivalent of lambda arguments,
|
||||
;; while the forms after the "--" are return values. The difference
|
||||
;; being that a lambda form can only return a single value, while a
|
||||
;; stack action can push multiple values onto the stack. It's also
|
||||
;; perfectly valid to use `(-- FORM...)' or `(VAR... --)': the former
|
||||
;; pushes values to the stack without consuming any, and the latter
|
||||
;; pops values from the stack and discards them.
|
||||
;;
|
||||
;;;; Derived Operators:
|
||||
;;
|
||||
;; The following operators are implemented as combinations of
|
||||
;; primitive expressions:
|
||||
;;
|
||||
;; (substring E) ; Match E and push the substring for the matched region.
|
||||
;; (region E) ; Match E and push the start and end positions.
|
||||
;; (replace E RPL); Match E and replace the matched region with RPL.
|
||||
;; (list E) ; Match E and push a list of the items that E produced.
|
||||
;;
|
||||
;; See `peg-ex-parse-int' in `peg-tests.el' for further examples.
|
||||
;;
|
||||
;; Regexp equivalents:
|
||||
;;
|
||||
;; Here a some examples for regexps and how those could be written as pex.
|
||||
;; [Most are taken from rx.el]
|
||||
;;
|
||||
;; "^[a-z]*"
|
||||
;; (and (bol) (* [a-z]))
|
||||
;;
|
||||
;; "\n[^ \t]"
|
||||
;; (and "\n" (not [" \t"]) (any))
|
||||
;;
|
||||
;; "\\*\\*\\* EOOH \\*\\*\\*\n"
|
||||
;; "*** EOOH ***\n"
|
||||
;;
|
||||
;; "\\<\\(catch\\|finally\\)\\>[^_]"
|
||||
;; (and (bow) (or "catch" "finally") (eow) (not "_") (any))
|
||||
;;
|
||||
;; "[ \t\n]*:\\([^:]+\\|$\\)"
|
||||
;; (and (* [" \t\n"]) ":" (or (+ (not ":") (any)) (eol)))
|
||||
;;
|
||||
;; "^content-transfer-encoding:\\(\n?[\t ]\\)*quoted-printable\\(\n?[\t ]\\)*"
|
||||
;; (and (bol)
|
||||
;; "content-transfer-encoding:"
|
||||
;; (* (opt "\n") ["\t "])
|
||||
;; "quoted-printable"
|
||||
;; (* (opt "\n") ["\t "]))
|
||||
;;
|
||||
;; "\\$[I]d: [^ ]+ \\([^ ]+\\) "
|
||||
;; (and "$Id: " (+ (not " ") (any)) " " (+ (not " ") (any)) " ")
|
||||
;;
|
||||
;; "^;;\\s-*\n\\|^\n"
|
||||
;; (or (and (bol) ";;" (* (syntax-class whitespace)) "\n")
|
||||
;; (and (bol) "\n"))
|
||||
;;
|
||||
;; "\\\\\\\\\\[\\w+"
|
||||
;; (and "\\\\[" (+ (syntax-class word)))
|
||||
;;
|
||||
;; See ";;; Examples" in `peg-tests.el' for other examples.
|
||||
;;
|
||||
;;;; Rule argument and indirect calls:
|
||||
;;
|
||||
;; Rules can take arguments and those arguments can themselves be PEGs.
|
||||
;; For example:
|
||||
;;
|
||||
;; (define-peg-rule 2-or-more (peg)
|
||||
;; (funcall peg)
|
||||
;; (funcall peg)
|
||||
;; (* (funcall peg)))
|
||||
;;
|
||||
;; ... (peg-parse
|
||||
;; ...
|
||||
;; (2-or-more (peg foo))
|
||||
;; ...
|
||||
;; (2-or-more (peg bar))
|
||||
;; ...)
|
||||
;;
|
||||
;;;; References:
|
||||
;;
|
||||
;; [Ford] Bryan Ford. Parsing Expression Grammars: a Recognition-Based
|
||||
;; Syntactic Foundation. In POPL'04: Proceedings of the 31st ACM
|
||||
;; SIGPLAN-SIGACT symposium on Principles of Programming Languages,
|
||||
;; pages 111-122, New York, NY, USA, 2004. ACM Press.
|
||||
;; http://pdos.csail.mit.edu/~baford/packrat/
|
||||
;;
|
||||
;; [Baker] Baker, Henry G. "Pragmatic Parsing in Common Lisp". ACM Lisp
|
||||
;; Pointers 4(2), April--June 1991, pp. 3--15.
|
||||
;; http://home.pipeline.com/~hbaker1/Prag-Parse.html
|
||||
;;
|
||||
;; Roman Redziejowski does good PEG related research
|
||||
;; http://www.romanredz.se/pubs.htm
|
||||
|
||||
;;;; Todo:
|
||||
|
||||
;; - Fix the exponential blowup in `peg-translate-exp'.
|
||||
;; - Add a proper debug-spec for PEXs.
|
||||
|
||||
;;; News:
|
||||
|
||||
;; Since 1.0.1:
|
||||
;; - Use OClosures to represent PEG rules when available, and let cl-print
|
||||
;; display their source code.
|
||||
;; - New PEX form (with RULES PEX...).
|
||||
;; - Named rulesets.
|
||||
;; - You can pass arguments to rules.
|
||||
;; - New `funcall' rule to call rules indirectly (e.g. a peg you received
|
||||
;; as argument).
|
||||
|
||||
;; Version 1.0:
|
||||
;; - New official entry points `peg` and `peg-run`.
|
||||
|
||||
;;; Code:
|
||||
|
||||
(eval-when-compile (require 'cl-lib))
|
||||
|
||||
(defvar peg--actions nil
|
||||
"Actions collected along the current parse.
|
||||
Used at runtime for backtracking. It's a list ((POS . THUNK)...).
|
||||
Each THUNK is executed at the corresponding POS. Thunks are
|
||||
executed in a postprocessing step, not during parsing.")
|
||||
|
||||
(defvar peg--errors nil
|
||||
"Data keeping track of the rightmost parse failure location.
|
||||
It's a pair (POSITION . EXPS ...). POSITION is the buffer position and
|
||||
EXPS is a list of rules/expressions that failed.")
|
||||
|
||||
;;;; Main entry points
|
||||
|
||||
(defmacro peg--when-fboundp (f &rest body)
|
||||
(declare (indent 1) (debug (sexp body)))
|
||||
(when (fboundp f)
|
||||
(macroexp-progn body)))
|
||||
|
||||
(peg--when-fboundp oclosure-define
|
||||
(oclosure-define peg-function
|
||||
"Parsing function built from PEG rule."
|
||||
pexs)
|
||||
|
||||
(cl-defmethod cl-print-object ((peg peg-function) stream)
|
||||
(princ "#f<peg " stream)
|
||||
(let ((args (help-function-arglist peg 'preserve-names)))
|
||||
(if args
|
||||
(prin1 args stream)
|
||||
(princ "()" stream)))
|
||||
(princ " " stream)
|
||||
(prin1 (peg-function--pexs peg) stream)
|
||||
(princ ">" stream)))
|
||||
|
||||
(defmacro peg--lambda (pexs args &rest body)
|
||||
(declare (indent 2)
|
||||
(debug (&define form lambda-list def-body)))
|
||||
(if (fboundp 'oclosure-lambda)
|
||||
`(oclosure-lambda (peg-function (pexs ,pexs)) ,args . ,body)
|
||||
`(lambda ,args . ,body)))
|
||||
|
||||
;; Sometimes (with-peg-rules ... (peg-run (peg ...))) is too
|
||||
;; longwinded for the task at hand, so `peg-parse' comes in handy.
|
||||
(defmacro peg-parse (&rest pexs)
|
||||
"Match PEXS at point.
|
||||
PEXS is a sequence of PEG expressions, implicitly combined with `and'.
|
||||
Returns STACK if the match succeed and signals an error on failure,
|
||||
moving point along the way.
|
||||
PEXS can also be a list of PEG rules, in which case the first rule is used."
|
||||
(if (and (consp (car pexs))
|
||||
(symbolp (caar pexs))
|
||||
(not (ignore-errors (peg-normalize (car pexs)))))
|
||||
;; `pexs' is a list of rules: use the first rule as entry point.
|
||||
`(with-peg-rules ,pexs (peg-run (peg ,(caar pexs)) #'peg-signal-failure))
|
||||
`(peg-run (peg ,@pexs) #'peg-signal-failure)))
|
||||
|
||||
(defmacro peg (&rest pexs)
|
||||
"Return a PEG-matcher that matches PEXS."
|
||||
(pcase (peg-normalize `(and . ,pexs))
|
||||
(`(call ,name) `#',(peg--rule-id name)) ;Optimize this case by η-reduction!
|
||||
(exp `(peg--lambda ',pexs () ,(peg-translate-exp exp)))))
|
||||
|
||||
;; There are several "infos we want to return" when parsing a given PEX:
|
||||
;; 1- We want to return the success/failure of the parse.
|
||||
;; 2- We want to return the data of the successful parse (the stack).
|
||||
;; 3- We want to return the diagnostic of the failures.
|
||||
;; 4- We want to perform the actions (upon parse success)!
|
||||
;; `peg-parse' used an error signal to encode the (1) boolean, which
|
||||
;; lets it return all the info conveniently but the error signal was sometimes
|
||||
;; inconvenient. Other times one wants to just know (1) maybe without even
|
||||
;; performing (4).
|
||||
;; `peg-run' lets you choose all that, and by default gives you
|
||||
;; (1) as a simple boolean, while also doing (2), and (4).
|
||||
|
||||
(defun peg-run (peg-matcher &optional failure-function success-function)
|
||||
"Parse with PEG-MATCHER at point and run the success/failure function.
|
||||
If a match was found, move to the end of the match and call SUCCESS-FUNCTION
|
||||
with one argument: a function which will perform all the actions collected
|
||||
during the parse and then return the resulting stack (or t if empty).
|
||||
If no match was found, move to the (rightmost) point of parse failure and call
|
||||
FAILURE-FUNCTION with one argument, which is a list of PEG expressions that
|
||||
failed at this point.
|
||||
SUCCESS-FUNCTION defaults to `funcall' and FAILURE-FUNCTION
|
||||
defaults to `ignore'."
|
||||
(let ((peg--actions '()) (peg--errors '(-1)))
|
||||
(if (funcall peg-matcher)
|
||||
;; Found a parse: run the actions collected along the way.
|
||||
(funcall (or success-function #'funcall)
|
||||
(lambda ()
|
||||
(save-excursion (peg-postprocess peg--actions))))
|
||||
(goto-char (car peg--errors))
|
||||
(when failure-function
|
||||
(funcall failure-function (peg-merge-errors (cdr peg--errors)))))))
|
||||
|
||||
(defmacro define-peg-rule (name args &rest pexs)
|
||||
"Define PEG rule NAME as equivalent to PEXS.
|
||||
The PEG expressions in PEXS are implicitly combined with the
|
||||
sequencing `and' operator of PEG grammars."
|
||||
(declare (indent 1))
|
||||
(let ((inline nil))
|
||||
(while (keywordp (car pexs))
|
||||
(pcase (pop pexs)
|
||||
(:inline (setq inline (car pexs))))
|
||||
(setq pexs (cdr pexs)))
|
||||
(let ((id (peg--rule-id name))
|
||||
(exp (peg-normalize `(and . ,pexs))))
|
||||
`(progn
|
||||
(defalias ',id
|
||||
(peg--lambda ',pexs ,args
|
||||
,(if inline
|
||||
;; Short-circuit to peg--translate in order to skip
|
||||
;; the extra failure-recording of `peg-translate-exp'.
|
||||
;; It also skips the cycle detection of
|
||||
;; `peg--translate-rule-body', which is not the main
|
||||
;; purpose but we can live with it.
|
||||
(apply #'peg--translate exp)
|
||||
(peg--translate-rule-body name exp))))
|
||||
(eval-and-compile
|
||||
;; FIXME: We shouldn't need this any more since the info is now
|
||||
;; stored in the function, but sadly we need to find a name's EXP
|
||||
;; during compilation (i.e. before the `defalias' is executed)
|
||||
;; as part of cycle-detection!
|
||||
(put ',id 'peg--rule-definition ',exp)
|
||||
,@(when inline
|
||||
;; FIXME: Copied from `defsubst'.
|
||||
`(;; Never native-compile defsubsts as we need the byte
|
||||
;; definition in `byte-compile-unfold-bcf' to perform the
|
||||
;; inlining (Bug#42664, Bug#43280, Bug#44209).
|
||||
,(byte-run--set-speed id nil -1)
|
||||
(put ',id 'byte-optimizer #'byte-compile-inline-expand))))))))
|
||||
|
||||
(defmacro define-peg-ruleset (name &rest rules)
|
||||
"Define a set of PEG rules for later use, e.g., in `with-peg-rules'."
|
||||
(declare (indent 1))
|
||||
(let ((defs ())
|
||||
(aliases ()))
|
||||
(dolist (rule rules)
|
||||
(let* ((rname (car rule))
|
||||
(full-rname (format "%s %s" name rname)))
|
||||
(push `(define-peg-rule ,full-rname . ,(cdr rule)) defs)
|
||||
(push `(,(peg--rule-id rname) #',(peg--rule-id full-rname)) aliases)))
|
||||
`(cl-flet ,aliases
|
||||
,@defs
|
||||
(eval-and-compile (put ',name 'peg--rules ',aliases)))))
|
||||
|
||||
(defmacro with-peg-rules (rules &rest body)
|
||||
"Make PEG rules RULES available within the scope of BODY.
|
||||
RULES is a list of rules of the form (NAME . PEXS), where PEXS is a sequence
|
||||
of PEG expressions, implicitly combined with `and'.
|
||||
RULES can also contain symbols in which case these must name
|
||||
rulesets defined previously with `define-peg-ruleset'."
|
||||
(declare (indent 1) (debug (sexp form))) ;FIXME: `sexp' is not good enough!
|
||||
(let* ((rulesets nil)
|
||||
(rules
|
||||
;; First, macroexpand the rules.
|
||||
(delq nil
|
||||
(mapcar (lambda (rule)
|
||||
(if (symbolp rule)
|
||||
(progn (push rule rulesets) nil)
|
||||
(cons (car rule) (peg-normalize `(and . ,(cdr rule))))))
|
||||
rules)))
|
||||
(ctx (assq :peg-rules macroexpand-all-environment)))
|
||||
(macroexpand-all
|
||||
`(cl-labels
|
||||
,(mapcar (lambda (rule)
|
||||
;; FIXME: Use `peg--lambda' as well.
|
||||
`(,(peg--rule-id (car rule))
|
||||
()
|
||||
,(peg--translate-rule-body (car rule) (cdr rule))))
|
||||
rules)
|
||||
,@body)
|
||||
`((:peg-rules ,@(append rules (cdr ctx)))
|
||||
,@macroexpand-all-environment))))
|
||||
|
||||
;;;;; Old entry points
|
||||
|
||||
(defmacro peg-parse-exp (exp)
|
||||
"Match the parsing expression EXP at point."
|
||||
(declare (obsolete peg-parse "peg-0.9"))
|
||||
`(peg-run (peg ,exp)))
|
||||
|
||||
;;;; The actual implementation
|
||||
|
||||
(defun peg--lookup-rule (name)
|
||||
(or (cdr (assq name (cdr (assq :peg-rules macroexpand-all-environment))))
|
||||
;; With `peg-function' objects, we can recover the PEG from which it was
|
||||
;; defined, but this info is not yet available at compile-time. :-(
|
||||
;;(let ((id (peg--rule-id name)))
|
||||
;; (peg-function--pexs (symbol-function id)))
|
||||
(get (peg--rule-id name) 'peg--rule-definition)))
|
||||
|
||||
(defun peg--rule-id (name)
|
||||
(intern (format "peg-rule %s" name)))
|
||||
|
||||
(define-error 'peg-search-failed "Parse error at %d (expecting %S)")
|
||||
|
||||
(defun peg-signal-failure (failures)
|
||||
(signal 'peg-search-failed (list (point) failures)))
|
||||
|
||||
(defun peg-parse-at-point (peg-matcher)
|
||||
"Parse text at point according to the PEG rule PEG-MATCHER."
|
||||
(declare (obsolete peg-run "peg-1.0"))
|
||||
(peg-run peg-matcher
|
||||
#'peg-signal-failure
|
||||
(lambda (f) (let ((r (funcall f))) (if (listp r) r)))))
|
||||
|
||||
;; Internally we use a regularized syntax, e.g. we only have binary OR
|
||||
;; nodes. Regularized nodes are lists of the form (OP ARGS...).
|
||||
(cl-defgeneric peg-normalize (exp)
|
||||
"Return a \"normalized\" form of EXP."
|
||||
(error "Invalid parsing expression: %S" exp))
|
||||
|
||||
(cl-defmethod peg-normalize ((exp string))
|
||||
(let ((len (length exp)))
|
||||
(cond ((zerop len) '(guard t))
|
||||
((= len 1) `(char ,(aref exp 0)))
|
||||
(t `(str ,exp)))))
|
||||
|
||||
(cl-defmethod peg-normalize ((exp symbol))
|
||||
;; (peg--lookup-rule exp)
|
||||
`(call ,exp))
|
||||
|
||||
(cl-defmethod peg-normalize ((exp vector))
|
||||
(peg-normalize `(set . ,(append exp '()))))
|
||||
|
||||
(cl-defmethod peg-normalize ((exp cons))
|
||||
(apply #'peg--macroexpand exp))
|
||||
|
||||
(defconst peg-leaf-types '(any call action char range str set
|
||||
guard syntax-class = funcall))
|
||||
|
||||
(cl-defgeneric peg--macroexpand (head &rest args)
|
||||
(cond
|
||||
((memq head peg-leaf-types) (cons head args))
|
||||
(t `(call ,head ,@args))))
|
||||
|
||||
(cl-defmethod peg--macroexpand ((_ (eql or)) &rest args)
|
||||
(cond ((null args) '(guard nil))
|
||||
((null (cdr args)) (peg-normalize (car args)))
|
||||
(t `(or ,(peg-normalize (car args))
|
||||
,(peg-normalize `(or . ,(cdr args)))))))
|
||||
|
||||
(cl-defmethod peg--macroexpand ((_ (eql and)) &rest args)
|
||||
(cond ((null args) '(guard t))
|
||||
((null (cdr args)) (peg-normalize (car args)))
|
||||
(t `(and ,(peg-normalize (car args))
|
||||
,(peg-normalize `(and . ,(cdr args)))))))
|
||||
|
||||
(cl-defmethod peg--macroexpand ((_ (eql *)) &rest args)
|
||||
`(* ,(peg-normalize `(and . ,args))))
|
||||
|
||||
;; FIXME: this duplicates code; could use some loop to avoid that
|
||||
(cl-defmethod peg--macroexpand ((_ (eql +)) &rest args)
|
||||
(let ((e (peg-normalize `(and . ,args))))
|
||||
`(and ,e (* ,e))))
|
||||
|
||||
(cl-defmethod peg--macroexpand ((_ (eql opt)) &rest args)
|
||||
(let ((e (peg-normalize `(and . ,args))))
|
||||
`(or ,e (guard t))))
|
||||
|
||||
(cl-defmethod peg--macroexpand ((_ (eql if)) &rest args)
|
||||
`(if ,(peg-normalize `(and . ,args))))
|
||||
|
||||
(cl-defmethod peg--macroexpand ((_ (eql not)) &rest args)
|
||||
`(not ,(peg-normalize `(and . ,args))))
|
||||
|
||||
(cl-defmethod peg--macroexpand ((_ (eql \`)) form)
|
||||
(peg-normalize `(stack-action ,form)))
|
||||
|
||||
(cl-defmethod peg--macroexpand ((_ (eql stack-action)) form)
|
||||
(unless (member '-- form)
|
||||
(error "Malformed stack action: %S" form))
|
||||
(let ((args (cdr (member '-- (reverse form))))
|
||||
(values (cdr (member '-- form))))
|
||||
(let ((form `(let ,(mapcar (lambda (var) `(,var (pop peg--stack))) args)
|
||||
,@(mapcar (lambda (val) `(push ,val peg--stack)) values))))
|
||||
`(action ,form))))
|
||||
|
||||
(defvar peg-char-classes
|
||||
'(ascii alnum alpha blank cntrl digit graph lower multibyte nonascii print
|
||||
punct space unibyte upper word xdigit))
|
||||
|
||||
(cl-defmethod peg--macroexpand ((_ (eql set)) &rest specs)
|
||||
(cond ((null specs) '(guard nil))
|
||||
((and (null (cdr specs))
|
||||
(let ((range (peg-range-designator (car specs))))
|
||||
(and range `(range ,(car range) ,(cdr range))))))
|
||||
(t
|
||||
(let ((chars '()) (ranges '()) (classes '()))
|
||||
(while specs
|
||||
(let* ((spec (pop specs))
|
||||
(range (peg-range-designator spec)))
|
||||
(cond (range
|
||||
(push range ranges))
|
||||
((peg-characterp spec)
|
||||
(push spec chars))
|
||||
((stringp spec)
|
||||
(setq chars (append (reverse (append spec ())) chars)))
|
||||
((memq spec peg-char-classes)
|
||||
(push spec classes))
|
||||
(t (error "Invalid set specifier: %S" spec)))))
|
||||
(setq ranges (reverse ranges))
|
||||
(setq chars (delete-dups (reverse chars)))
|
||||
(setq classes (reverse classes))
|
||||
(cond ((and (null ranges)
|
||||
(null classes)
|
||||
(cond ((null chars) '(guard nil))
|
||||
((null (cdr chars)) `(char ,(car chars))))))
|
||||
(t `(set ,ranges ,chars ,classes)))))))
|
||||
|
||||
(defun peg-range-designator (x)
|
||||
(and (symbolp x)
|
||||
(let ((str (symbol-name x)))
|
||||
(and (= (length str) 3)
|
||||
(eq (aref str 1) ?-)
|
||||
(< (aref str 0) (aref str 2))
|
||||
(cons (aref str 0) (aref str 2))))))
|
||||
|
||||
;; characterp is new in Emacs 23.
|
||||
(defun peg-characterp (x)
|
||||
(if (fboundp 'characterp)
|
||||
(characterp x)
|
||||
(integerp x)))
|
||||
|
||||
(cl-defmethod peg--macroexpand ((_ (eql list)) &rest args)
|
||||
(peg-normalize
|
||||
(let ((marker (make-symbol "magic-marker")))
|
||||
`(and (stack-action (-- ',marker))
|
||||
,@args
|
||||
(stack-action (--
|
||||
(let ((l '()))
|
||||
(while
|
||||
(let ((e (pop peg--stack)))
|
||||
(cond ((eq e ',marker) nil)
|
||||
((null peg--stack)
|
||||
(error "No marker on stack"))
|
||||
(t (push e l) t))))
|
||||
l)))))))
|
||||
|
||||
(cl-defmethod peg--macroexpand ((_ (eql substring)) &rest args)
|
||||
(peg-normalize
|
||||
`(and `(-- (point))
|
||||
,@args
|
||||
`(start -- (buffer-substring-no-properties start (point))))))
|
||||
|
||||
(cl-defmethod peg--macroexpand ((_ (eql region)) &rest args)
|
||||
(peg-normalize
|
||||
`(and `(-- (point))
|
||||
,@args
|
||||
`(-- (point)))))
|
||||
|
||||
(cl-defmethod peg--macroexpand ((_ (eql replace)) pe replacement)
|
||||
(peg-normalize
|
||||
`(and (stack-action (-- (point)))
|
||||
,pe
|
||||
(stack-action (start -- (progn
|
||||
(delete-region start (point))
|
||||
(insert-before-markers ,replacement))))
|
||||
(stack-action (_ --)))))
|
||||
|
||||
(cl-defmethod peg--macroexpand ((_ (eql quote)) _form)
|
||||
(error "quote is reserved for future use"))
|
||||
|
||||
(cl-defgeneric peg--translate (head &rest args)
|
||||
(error "No translator for: %S" (cons head args)))
|
||||
|
||||
(defun peg--translate-rule-body (name exp)
|
||||
(let ((msg (condition-case err
|
||||
(progn (peg-detect-cycles exp (list name)) nil)
|
||||
(error (error-message-string err))))
|
||||
(code (peg-translate-exp exp)))
|
||||
(cond
|
||||
((null msg) code)
|
||||
((fboundp 'macroexp--warn-and-return)
|
||||
(macroexp--warn-and-return msg code))
|
||||
(t
|
||||
(message "%s" msg)
|
||||
code))))
|
||||
|
||||
;; This is the main translation function.
|
||||
(defun peg-translate-exp (exp)
|
||||
"Return the ELisp code to match the PE EXP."
|
||||
;; FIXME: This expansion basically duplicates `exp' in the output, which is
|
||||
;; a serious problem because it's done recursively, so it makes the output
|
||||
;; code's size exponentially larger than the input!
|
||||
`(or ,(apply #'peg--translate exp)
|
||||
(peg--record-failure ',exp))) ; for error reporting
|
||||
|
||||
(define-obsolete-function-alias 'peg-record-failure
|
||||
#'peg--record-failure "peg-1.0")
|
||||
(defun peg--record-failure (exp)
|
||||
(cond ((= (point) (car peg--errors))
|
||||
(setcdr peg--errors (cons exp (cdr peg--errors))))
|
||||
((> (point) (car peg--errors))
|
||||
(setq peg--errors (list (point) exp))))
|
||||
nil)
|
||||
|
||||
(cl-defmethod peg--translate ((_ (eql and)) e1 e2)
|
||||
`(and ,(peg-translate-exp e1)
|
||||
,(peg-translate-exp e2)))
|
||||
|
||||
;; Choicepoints are used for backtracking. At a choicepoint we save
|
||||
;; enough state, so that we can continue from there if needed.
|
||||
(defun peg--choicepoint-moved-p (choicepoint)
|
||||
`(/= ,(car choicepoint) (point)))
|
||||
|
||||
(defun peg--choicepoint-restore (choicepoint)
|
||||
`(progn
|
||||
(goto-char ,(car choicepoint))
|
||||
(setq peg--actions ,(cdr choicepoint))))
|
||||
|
||||
(defmacro peg--with-choicepoint (var &rest body)
|
||||
(declare (indent 1) (debug (symbolp form)))
|
||||
`(let ((,var (cons (make-symbol "point") (make-symbol "actions"))))
|
||||
`(let ((,(car ,var) (point))
|
||||
(,(cdr ,var) peg--actions))
|
||||
,@(list ,@body))))
|
||||
|
||||
(cl-defmethod peg--translate ((_ (eql or)) e1 e2)
|
||||
(peg--with-choicepoint cp
|
||||
`(or ,(peg-translate-exp e1)
|
||||
(,@(peg--choicepoint-restore cp)
|
||||
,(peg-translate-exp e2)))))
|
||||
|
||||
(cl-defmethod peg--translate ((_ (eql with)) rules &rest exps)
|
||||
`(with-peg-rules ,rules ,(peg--translate `(and . ,exps))))
|
||||
|
||||
(cl-defmethod peg--translate ((_ (eql guard)) exp) exp)
|
||||
|
||||
(defvar peg-syntax-classes
|
||||
'((whitespace ?-) (word ?w) (symbol ?s) (punctuation ?.)
|
||||
(open ?\() (close ?\)) (string ?\") (escape ?\\) (charquote ?/)
|
||||
(math ?$) (prefix ?') (comment ?<) (endcomment ?>)
|
||||
(comment-fence ?!) (string-fence ?|)))
|
||||
|
||||
(cl-defmethod peg--translate ((_ (eql syntax-class)) class)
|
||||
(let ((probe (assoc class peg-syntax-classes)))
|
||||
(cond (probe `(when (looking-at ,(format "\\s%c" (cadr probe)))
|
||||
(forward-char)
|
||||
t))
|
||||
(t (error "Invalid syntax class: %S\nMust be one of: %s" class
|
||||
(mapcar #'car peg-syntax-classes))))))
|
||||
|
||||
(cl-defmethod peg--translate ((_ (eql =)) string)
|
||||
`(let ((str ,string))
|
||||
(when (zerop (length str))
|
||||
(error "Empty strings not allowed for ="))
|
||||
(search-forward str (+ (point) (length str)) t)))
|
||||
|
||||
(cl-defmethod peg--translate ((_ (eql *)) e)
|
||||
`(progn (while ,(peg--with-choicepoint cp
|
||||
`(if ,(peg-translate-exp e)
|
||||
;; Just as regexps do for the `*' operator,
|
||||
;; we allow the body of `*' loops to match
|
||||
;; the empty string, but we don't repeat the loop if
|
||||
;; we haven't moved, to avoid inf-loops.
|
||||
,(peg--choicepoint-moved-p cp)
|
||||
,(peg--choicepoint-restore cp)
|
||||
nil)))
|
||||
t))
|
||||
|
||||
(cl-defmethod peg--translate ((_ (eql if)) e)
|
||||
(peg--with-choicepoint cp
|
||||
`(when ,(peg-translate-exp e)
|
||||
,(peg--choicepoint-restore cp)
|
||||
t)))
|
||||
|
||||
(cl-defmethod peg--translate ((_ (eql not)) e)
|
||||
(peg--with-choicepoint cp
|
||||
`(unless ,(peg-translate-exp e)
|
||||
,(peg--choicepoint-restore cp)
|
||||
t)))
|
||||
|
||||
(cl-defmethod peg--translate ((_ (eql any)) )
|
||||
'(when (not (eobp))
|
||||
(forward-char)
|
||||
t))
|
||||
|
||||
(cl-defmethod peg--translate ((_ (eql char)) c)
|
||||
`(when (eq (char-after) ',c)
|
||||
(forward-char)
|
||||
t))
|
||||
|
||||
(cl-defmethod peg--translate ((_ (eql set)) ranges chars classes)
|
||||
`(when (looking-at ',(peg-make-charset-regexp ranges chars classes))
|
||||
(forward-char)
|
||||
t))
|
||||
|
||||
(defun peg-make-charset-regexp (ranges chars classes)
|
||||
(when (and (not ranges) (not classes) (<= (length chars) 1))
|
||||
(error "Bug"))
|
||||
(let ((rbracket (member ?\] chars))
|
||||
(minus (member ?- chars))
|
||||
(hat (member ?^ chars)))
|
||||
(dolist (c '(?\] ?- ?^))
|
||||
(setq chars (remove c chars)))
|
||||
(format "[%s%s%s%s%s%s]"
|
||||
(if rbracket "]" "")
|
||||
(if minus "-" "")
|
||||
(mapconcat (lambda (x) (format "%c-%c" (car x) (cdr x))) ranges "")
|
||||
(mapconcat (lambda (c) (format "[:%s:]" c)) classes "")
|
||||
(mapconcat (lambda (c) (format "%c" c)) chars "")
|
||||
(if hat "^" ""))))
|
||||
|
||||
(cl-defmethod peg--translate ((_ (eql range)) from to)
|
||||
`(when (and (char-after)
|
||||
(<= ',from (char-after))
|
||||
(<= (char-after) ',to))
|
||||
(forward-char)
|
||||
t))
|
||||
|
||||
(cl-defmethod peg--translate ((_ (eql str)) str)
|
||||
`(when (looking-at ',(regexp-quote str))
|
||||
(goto-char (match-end 0))
|
||||
t))
|
||||
|
||||
(cl-defmethod peg--translate ((_ (eql call)) name &rest args)
|
||||
`(,(peg--rule-id name) ,@args))
|
||||
|
||||
(cl-defmethod peg--translate ((_ (eql funcall)) exp &rest args)
|
||||
`(funcall ,exp ,@args))
|
||||
|
||||
(cl-defmethod peg--translate ((_ (eql action)) form)
|
||||
`(progn
|
||||
(push (cons (point) (lambda () ,form)) peg--actions)
|
||||
t))
|
||||
|
||||
(defvar peg--stack nil)
|
||||
(defun peg-postprocess (actions)
|
||||
"Execute \"actions\"."
|
||||
(let ((peg--stack '())
|
||||
(forw-actions ()))
|
||||
(pcase-dolist (`(,pos . ,thunk) actions)
|
||||
(push (cons (copy-marker pos) thunk) forw-actions))
|
||||
(pcase-dolist (`(,pos . ,thunk) forw-actions)
|
||||
(goto-char pos)
|
||||
(funcall thunk))
|
||||
(or peg--stack t)))
|
||||
|
||||
;; Left recursion is presumably a common mistake when using PEGs.
|
||||
;; Here we try to detect such mistakes. Essentially we traverse the
|
||||
;; graph as long as we can without consuming input. When we find a
|
||||
;; recursive call we signal an error.
|
||||
|
||||
(defun peg-detect-cycles (exp path)
|
||||
"Signal an error on a cycle.
|
||||
Otherwise traverse EXP recursively and return T if EXP can match
|
||||
without consuming input. Return nil if EXP definitely consumes
|
||||
input. PATH is the list of rules that we have visited so far."
|
||||
(apply #'peg--detect-cycles path exp))
|
||||
|
||||
(cl-defgeneric peg--detect-cycles (head _path &rest args)
|
||||
(error "No detect-cycle method for: %S" (cons head args)))
|
||||
|
||||
(cl-defmethod peg--detect-cycles (path (_ (eql call)) name)
|
||||
(if (member name path)
|
||||
(error "Possible left recursion: %s"
|
||||
(mapconcat (lambda (x) (format "%s" x))
|
||||
(reverse (cons name path)) " -> "))
|
||||
(let ((exp (peg--lookup-rule name)))
|
||||
(if (null exp)
|
||||
;; If there's no rule by that name, either we'll fail at
|
||||
;; run-time or it will be defined later. In any case, at this
|
||||
;; point there's no evidence of a cycle, and if a cycle appears
|
||||
;; later we'll hopefully catch it when the rule gets defined.
|
||||
;; FIXME: In practice, if `name' is part of the cycle, we will
|
||||
;; indeed detect it when it gets defined, but OTOH if `name'
|
||||
;; is not part of a cycle but it *enables* a cycle because
|
||||
;; it matches the empty string (i.e. we should have returned t
|
||||
;; here), then we may not catch the problem at all :-(
|
||||
nil
|
||||
(peg-detect-cycles exp (cons name path))))))
|
||||
|
||||
(cl-defmethod peg--detect-cycles (path (_ (eql and)) e1 e2)
|
||||
(and (peg-detect-cycles e1 path)
|
||||
(peg-detect-cycles e2 path)))
|
||||
|
||||
(cl-defmethod peg--detect-cycles (path (_ (eql or)) e1 e2)
|
||||
(or (peg-detect-cycles e1 path)
|
||||
(peg-detect-cycles e2 path)))
|
||||
|
||||
(cl-defmethod peg--detect-cycles (path (_ (eql *)) e)
|
||||
(peg-detect-cycles e path)
|
||||
t)
|
||||
|
||||
(cl-defmethod peg--detect-cycles (path (_ (eql if)) e)
|
||||
(peg-unary-nullable e path))
|
||||
(cl-defmethod peg--detect-cycles (path (_ (eql not)) e)
|
||||
(peg-unary-nullable e path))
|
||||
|
||||
(defun peg-unary-nullable (exp path)
|
||||
(peg-detect-cycles exp path)
|
||||
t)
|
||||
|
||||
(cl-defmethod peg--detect-cycles (_path (_ (eql any))) nil)
|
||||
(cl-defmethod peg--detect-cycles (_path (_ (eql char)) _c) nil)
|
||||
(cl-defmethod peg--detect-cycles (_path (_ (eql set)) _r _c _k) nil)
|
||||
(cl-defmethod peg--detect-cycles (_path (_ (eql range)) _c1 _c2) nil)
|
||||
(cl-defmethod peg--detect-cycles (_path (_ (eql str)) s) (equal s ""))
|
||||
(cl-defmethod peg--detect-cycles (_path (_ (eql guard)) _e) t)
|
||||
(cl-defmethod peg--detect-cycles (_path (_ (eql =)) _s) nil)
|
||||
(cl-defmethod peg--detect-cycles (_path (_ (eql syntax-class)) _n) nil)
|
||||
(cl-defmethod peg--detect-cycles (_path (_ (eql action)) _form) t)
|
||||
|
||||
(defun peg-merge-errors (exps)
|
||||
"Build a more readable error message out of failed expression."
|
||||
(let ((merged '()))
|
||||
(dolist (exp exps)
|
||||
(setq merged (peg-merge-error exp merged)))
|
||||
merged))
|
||||
|
||||
(defun peg-merge-error (exp merged)
|
||||
(apply #'peg--merge-error merged exp))
|
||||
|
||||
(cl-defgeneric peg--merge-error (_merged head &rest args)
|
||||
(error "No merge-error method for: %S" (cons head args)))
|
||||
|
||||
(cl-defmethod peg--merge-error (merged (_ (eql or)) e1 e2)
|
||||
(peg-merge-error e2 (peg-merge-error e1 merged)))
|
||||
|
||||
(cl-defmethod peg--merge-error (merged (_ (eql and)) e1 _e2)
|
||||
;; FIXME: Why is `e2' not used?
|
||||
(peg-merge-error e1 merged))
|
||||
|
||||
(cl-defmethod peg--merge-error (merged (_ (eql str)) str)
|
||||
;;(add-to-list 'merged str)
|
||||
(cl-adjoin str merged :test #'equal))
|
||||
|
||||
(cl-defmethod peg--merge-error (merged (_ (eql call)) rule)
|
||||
;; (add-to-list 'merged rule)
|
||||
(cl-adjoin rule merged :test #'equal))
|
||||
|
||||
(cl-defmethod peg--merge-error (merged (_ (eql char)) char)
|
||||
;; (add-to-list 'merged (string char))
|
||||
(cl-adjoin (string char) merged :test #'equal))
|
||||
|
||||
(cl-defmethod peg--merge-error (merged (_ (eql set)) r c k)
|
||||
;; (add-to-list 'merged (peg-make-charset-regexp r c k))
|
||||
(cl-adjoin (peg-make-charset-regexp r c k) merged :test #'equal))
|
||||
|
||||
(cl-defmethod peg--merge-error (merged (_ (eql range)) from to)
|
||||
;; (add-to-list 'merged (format "[%c-%c]" from to))
|
||||
(cl-adjoin (format "[%c-%c]" from to) merged :test #'equal))
|
||||
|
||||
(cl-defmethod peg--merge-error (merged (_ (eql *)) exp)
|
||||
(peg-merge-error exp merged))
|
||||
|
||||
(cl-defmethod peg--merge-error (merged (_ (eql any)))
|
||||
;; (add-to-list 'merged '(any))
|
||||
(cl-adjoin '(any) merged :test #'equal))
|
||||
|
||||
(cl-defmethod peg--merge-error (merged (_ (eql not)) x)
|
||||
;; (add-to-list 'merged `(not ,x))
|
||||
(cl-adjoin `(not ,x) merged :test #'equal))
|
||||
|
||||
(cl-defmethod peg--merge-error (merged (_ (eql action)) _action) merged)
|
||||
(cl-defmethod peg--merge-error (merged (_ (eql null))) merged)
|
||||
|
||||
(provide 'peg)
|
||||
(require 'peg)
|
||||
|
||||
(define-peg-rule null () :inline t (guard t))
|
||||
(define-peg-rule fail () :inline t (guard nil))
|
||||
(define-peg-rule bob () :inline t (guard (bobp)))
|
||||
(define-peg-rule eob () :inline t (guard (eobp)))
|
||||
(define-peg-rule bol () :inline t (guard (bolp)))
|
||||
(define-peg-rule eol () :inline t (guard (eolp)))
|
||||
(define-peg-rule bow () :inline t (guard (looking-at "\\<")))
|
||||
(define-peg-rule eow () :inline t (guard (looking-at "\\>")))
|
||||
(define-peg-rule bos () :inline t (guard (looking-at "\\_<")))
|
||||
(define-peg-rule eos () :inline t (guard (looking-at "\\_>")))
|
||||
|
||||
;;; peg.el ends here
|
367
test/lisp/peg-tests.el
Normal file
367
test/lisp/peg-tests.el
Normal file
|
@ -0,0 +1,367 @@
|
|||
;;; peg-tests.el --- Tests of PEG parsers -*- lexical-binding: t; -*-
|
||||
|
||||
;; Copyright (C) 2008-2023 Free Software Foundation, Inc.
|
||||
|
||||
;; This program is free software; you can redistribute it and/or modify
|
||||
;; it under the terms of the GNU General Public License as published by
|
||||
;; the Free Software Foundation, either version 3 of the License, or
|
||||
;; (at your option) any later version.
|
||||
|
||||
;; This program is distributed in the hope that it will be useful,
|
||||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;; GNU General Public License for more details.
|
||||
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
;;; Commentary:
|
||||
|
||||
;; Tests and examples, that used to live in peg.el wrapped inside an `eval'.
|
||||
|
||||
;;; Code:
|
||||
|
||||
(require 'peg)
|
||||
(require 'ert)
|
||||
|
||||
;;; Tests:
|
||||
|
||||
(defmacro peg-parse-string (pex string &optional noerror)
|
||||
"Parse STRING according to PEX.
|
||||
If NOERROR is non-nil, push nil resp. t if the parse failed
|
||||
resp. succeeded instead of signaling an error."
|
||||
(let ((oldstyle (consp (car-safe pex)))) ;PEX is really a list of rules.
|
||||
`(with-temp-buffer
|
||||
(insert ,string)
|
||||
(goto-char (point-min))
|
||||
,(if oldstyle
|
||||
`(with-peg-rules ,pex
|
||||
(peg-run (peg ,(caar pex))
|
||||
,(unless noerror '#'peg-signal-failure)))
|
||||
`(peg-run (peg ,pex)
|
||||
,(unless noerror '#'peg-signal-failure))))))
|
||||
|
||||
(define-peg-rule peg-test-natural ()
|
||||
[0-9] (* [0-9]))
|
||||
|
||||
(ert-deftest peg-test ()
|
||||
(should (peg-parse-string peg-test-natural "99 bottles" t))
|
||||
(should (peg-parse-string ((s "a")) "a" t))
|
||||
(should (not (peg-parse-string ((s "a")) "b" t)))
|
||||
(should (peg-parse-string ((s (not "a"))) "b" t))
|
||||
(should (not (peg-parse-string ((s (not "a"))) "a" t)))
|
||||
(should (peg-parse-string ((s (if "a"))) "a" t))
|
||||
(should (not (peg-parse-string ((s (if "a"))) "b" t)))
|
||||
(should (peg-parse-string ((s "ab")) "ab" t))
|
||||
(should (not (peg-parse-string ((s "ab")) "ba" t)))
|
||||
(should (not (peg-parse-string ((s "ab")) "a" t)))
|
||||
(should (peg-parse-string ((s (range ?0 ?9))) "0" t))
|
||||
(should (not (peg-parse-string ((s (range ?0 ?9))) "a" t)))
|
||||
(should (peg-parse-string ((s [0-9])) "0" t))
|
||||
(should (not (peg-parse-string ((s [0-9])) "a" t)))
|
||||
(should (not (peg-parse-string ((s [0-9])) "" t)))
|
||||
(should (peg-parse-string ((s (any))) "0" t))
|
||||
(should (not (peg-parse-string ((s (any))) "" t)))
|
||||
(should (peg-parse-string ((s (eob))) "" t))
|
||||
(should (peg-parse-string ((s (not (eob)))) "a" t))
|
||||
(should (peg-parse-string ((s (or "a" "b"))) "a" t))
|
||||
(should (peg-parse-string ((s (or "a" "b"))) "b" t))
|
||||
(should (not (peg-parse-string ((s (or "a" "b"))) "c" t)))
|
||||
(should (peg-parse-string (and "a" "b") "ab" t))
|
||||
(should (peg-parse-string ((s (and "a" "b"))) "abc" t))
|
||||
(should (not (peg-parse-string (and "a" "b") "ba" t)))
|
||||
(should (peg-parse-string ((s (and "a" "b" "c"))) "abc" t))
|
||||
(should (peg-parse-string ((s (* "a") "b" (eob))) "b" t))
|
||||
(should (peg-parse-string ((s (* "a") "b" (eob))) "ab" t))
|
||||
(should (peg-parse-string ((s (* "a") "b" (eob))) "aaab" t))
|
||||
(should (not (peg-parse-string ((s (* "a") "b" (eob))) "abc" t)))
|
||||
(should (peg-parse-string ((s "")) "abc" t))
|
||||
(should (peg-parse-string ((s "" (eob))) "" t))
|
||||
(should (peg-parse-string ((s (opt "a") "b")) "abc" t))
|
||||
(should (peg-parse-string ((s (opt "a") "b")) "bc" t))
|
||||
(should (not (peg-parse-string ((s (or))) "ab" t)))
|
||||
(should (peg-parse-string ((s (and))) "ab" t))
|
||||
(should (peg-parse-string ((s (and))) "" t))
|
||||
(should (peg-parse-string ((s ["^"])) "^" t))
|
||||
(should (peg-parse-string ((s ["^a"])) "a" t))
|
||||
(should (peg-parse-string ["-"] "-" t))
|
||||
(should (peg-parse-string ((s ["]-"])) "]" t))
|
||||
(should (peg-parse-string ((s ["^]"])) "^" t))
|
||||
(should (peg-parse-string ((s [alpha])) "z" t))
|
||||
(should (not (peg-parse-string ((s [alpha])) "0" t)))
|
||||
(should (not (peg-parse-string ((s [alpha])) "" t)))
|
||||
(should (not (peg-parse-string ((s ["][:alpha:]"])) "z" t)))
|
||||
(should (peg-parse-string ((s (bob))) "" t))
|
||||
(should (peg-parse-string ((s (bos))) "x" t))
|
||||
(should (not (peg-parse-string ((s (bos))) " x" t)))
|
||||
(should (peg-parse-string ((s "x" (eos))) "x" t))
|
||||
(should (peg-parse-string ((s (syntax-class whitespace))) " " t))
|
||||
(should (peg-parse-string ((s (= "foo"))) "foo" t))
|
||||
(should (let ((f "foo")) (peg-parse-string ((s (= f))) "foo" t)))
|
||||
(should (not (peg-parse-string ((s (= "foo"))) "xfoo" t)))
|
||||
(should (equal (peg-parse-string ((s `(-- 1 2))) "") '(2 1)))
|
||||
(should (equal (peg-parse-string ((s `(-- 1 2) `(a b -- a b))) "") '(2 1)))
|
||||
(should (equal (peg-parse-string ((s (or (and (any) s)
|
||||
(substring [0-9]))))
|
||||
"ab0cd1ef2gh")
|
||||
'("2")))
|
||||
;; The PEG rule `other' doesn't exist, which will cause a byte-compiler
|
||||
;; warning, but not an error at run time because the rule is not actually
|
||||
;; used in this particular case.
|
||||
(should (equal (peg-parse-string ((s (substring (or "a" other)))
|
||||
;; Unused left-recursive rule, should
|
||||
;; cause a byte-compiler warning.
|
||||
(r (* "a") r))
|
||||
"af")
|
||||
'("a")))
|
||||
(should (equal (peg-parse-string ((s (list x y))
|
||||
(x `(-- 1))
|
||||
(y `(-- 2)))
|
||||
"")
|
||||
'((1 2))))
|
||||
(should (equal (peg-parse-string ((s (list (* x)))
|
||||
(x "" `(-- 'x)))
|
||||
"xxx")
|
||||
;; The empty loop body should be matched once!
|
||||
'((x))))
|
||||
(should (equal (peg-parse-string ((s (list (* x)))
|
||||
(x "x" `(-- 'x)))
|
||||
"xxx")
|
||||
'((x x x))))
|
||||
(should (equal (peg-parse-string ((s (region (* x)))
|
||||
(x "x" `(-- 'x)))
|
||||
"xxx")
|
||||
;; FIXME: Since string positions start at 0, this should
|
||||
;; really be '(3 x x x 0) !!
|
||||
'(4 x x x 1)))
|
||||
(should (equal (peg-parse-string ((s (region (list (* x))))
|
||||
(x "x" `(-- 'x 'y)))
|
||||
"xxx")
|
||||
'(4 (x y x y x y) 1)))
|
||||
(should (equal (with-temp-buffer
|
||||
(save-excursion (insert "abcdef"))
|
||||
(list
|
||||
(peg-run (peg "a"
|
||||
(replace "bc" "x")
|
||||
(replace "de" "y")
|
||||
"f"))
|
||||
(buffer-string)))
|
||||
'(t "axyf")))
|
||||
(with-temp-buffer
|
||||
(insert "toro")
|
||||
(goto-char (point-min))
|
||||
(should (peg-run (peg "to")))
|
||||
(should-not (peg-run (peg "to")))
|
||||
(should (peg-run (peg "ro")))
|
||||
(should (eobp)))
|
||||
(with-temp-buffer
|
||||
(insert " ")
|
||||
(goto-char (point-min))
|
||||
(peg-run (peg (+ (syntax-class whitespace))))
|
||||
(should (eobp)))
|
||||
)
|
||||
|
||||
;;; Examples:
|
||||
|
||||
;; peg-ex-recognize-int recognizes integers. An integer begins with a
|
||||
;; optional sign, then follows one or more digits. Digits are all
|
||||
;; characters from 0 to 9.
|
||||
;;
|
||||
;; Notes:
|
||||
;; 1) "" matches the empty sequence, i.e. matches without consuming
|
||||
;; input.
|
||||
;; 2) [0-9] is the character range from 0 to 9. This can also be
|
||||
;; written as (range ?0 ?9). Note that 0-9 is a symbol.
|
||||
(defun peg-ex-recognize-int ()
|
||||
(with-peg-rules ((number sign digit (* digit))
|
||||
(sign (or "+" "-" ""))
|
||||
(digit [0-9]))
|
||||
(peg-run (peg number))))
|
||||
|
||||
;; peg-ex-parse-int recognizes integers and computes the corresponding
|
||||
;; value. The grammar is the same as for `peg-ex-recognize-int'
|
||||
;; augmented with parsing actions. Unfortunaletly, the actions add
|
||||
;; quite a bit of clutter.
|
||||
;;
|
||||
;; The actions for the sign rule push -1 on the stack for a minus sign
|
||||
;; and 1 for plus or no sign.
|
||||
;;
|
||||
;; The action for the digit rule pushes the value for a single digit.
|
||||
;;
|
||||
;; The action `(a b -- (+ (* a 10) b)), takes two items from the stack
|
||||
;; and pushes the first digit times 10 added to the second digit.
|
||||
;;
|
||||
;; The action `(sign val -- (* sign val)), multiplies val with the
|
||||
;; sign (1 or -1).
|
||||
(defun peg-ex-parse-int ()
|
||||
(with-peg-rules ((number sign digit (* digit
|
||||
`(a b -- (+ (* a 10) b)))
|
||||
`(sign val -- (* sign val)))
|
||||
(sign (or (and "+" `(-- 1))
|
||||
(and "-" `(-- -1))
|
||||
(and "" `(-- 1))))
|
||||
(digit [0-9] `(-- (- (char-before) ?0))))
|
||||
(peg-run (peg number))))
|
||||
|
||||
;; Put point after the ) and press C-x C-e
|
||||
;; (peg-ex-parse-int)-234234
|
||||
|
||||
;; Parse arithmetic expressions and compute the result as side effect.
|
||||
(defun peg-ex-arith ()
|
||||
(peg-parse
|
||||
(expr _ sum eol)
|
||||
(sum product (* (or (and "+" _ product `(a b -- (+ a b)))
|
||||
(and "-" _ product `(a b -- (- a b))))))
|
||||
(product value (* (or (and "*" _ value `(a b -- (* a b)))
|
||||
(and "/" _ value `(a b -- (/ a b))))))
|
||||
(value (or (and (substring number) `(string -- (string-to-number string)))
|
||||
(and "(" _ sum ")" _)))
|
||||
(number (+ [0-9]) _)
|
||||
(_ (* [" \t"]))
|
||||
(eol (or "\n" "\r\n" "\r"))))
|
||||
|
||||
;; (peg-ex-arith) 1 + 2 * 3 * (4 + 5)
|
||||
;; (peg-ex-arith) 1 + 2 ^ 3 * (4 + 5) ; fails to parse
|
||||
|
||||
;; Parse URI according to RFC 2396.
|
||||
(defun peg-ex-uri ()
|
||||
(peg-parse
|
||||
(URI-reference (or absoluteURI relativeURI)
|
||||
(or (and "#" (substring fragment))
|
||||
`(-- nil))
|
||||
`(scheme user host port path query fragment --
|
||||
(list :scheme scheme :user user
|
||||
:host host :port port
|
||||
:path path :query query
|
||||
:fragment fragment)))
|
||||
(absoluteURI (substring scheme) ":" (or hier-part opaque-part))
|
||||
(hier-part ;(-- user host port path query)
|
||||
(or net-path
|
||||
(and `(-- nil nil nil)
|
||||
abs-path))
|
||||
(or (and "?" (substring query))
|
||||
`(-- nil)))
|
||||
(net-path "//" authority (or abs-path `(-- nil)))
|
||||
(abs-path "/" path-segments)
|
||||
(path-segments segment (list (* "/" segment)) `(s l -- (cons s l)))
|
||||
(segment (substring (* pchar) (* ";" param)))
|
||||
(param (* pchar))
|
||||
(pchar (or unreserved escaped [":@&=+$,"]))
|
||||
(query (* uric))
|
||||
(fragment (* uric))
|
||||
(relativeURI (or net-path abs-path rel-path) (opt "?" query))
|
||||
(rel-path rel-segment (opt abs-path))
|
||||
(rel-segment (+ unreserved escaped [";@&=+$,"]))
|
||||
(authority (or server reg-name))
|
||||
(server (or (and (or (and (substring userinfo) "@")
|
||||
`(-- nil))
|
||||
hostport)
|
||||
`(-- nil nil nil)))
|
||||
(userinfo (* (or unreserved escaped [";:&=+$,"])))
|
||||
(hostport (substring host) (or (and ":" (substring port))
|
||||
`(-- nil)))
|
||||
(host (or hostname ipv4address))
|
||||
(hostname (* domainlabel ".") toplabel (opt "."))
|
||||
(domainlabel alphanum
|
||||
(opt (* (or alphanum "-") (if alphanum))
|
||||
alphanum))
|
||||
(toplabel alpha
|
||||
(* (or alphanum "-") (if alphanum))
|
||||
alphanum)
|
||||
(ipv4address (+ digit) "." (+ digit) "." (+ digit) "." (+ digit))
|
||||
(port (* digit))
|
||||
(scheme alpha (* (or alpha digit ["+-."])))
|
||||
(reg-name (or unreserved escaped ["$,;:@&=+"]))
|
||||
(opaque-part uric-no-slash (* uric))
|
||||
(uric (or reserved unreserved escaped))
|
||||
(uric-no-slash (or unreserved escaped [";?:@&=+$,"]))
|
||||
(reserved (set ";/?:@&=+$,"))
|
||||
(unreserved (or alphanum mark))
|
||||
(escaped "%" hex hex)
|
||||
(hex (or digit [A-F] [a-f]))
|
||||
(mark (set "-_.!~*'()"))
|
||||
(alphanum (or alpha digit))
|
||||
(alpha (or lowalpha upalpha))
|
||||
(lowalpha [a-z])
|
||||
(upalpha [A-Z])
|
||||
(digit [0-9])))
|
||||
|
||||
;; (peg-ex-uri)http://luser@www.foo.com:8080/bar/baz.html?x=1#foo
|
||||
;; (peg-ex-uri)file:/bar/baz.html?foo=df#x
|
||||
|
||||
;; Split STRING where SEPARATOR occurs.
|
||||
(defun peg-ex-split (string separator)
|
||||
(peg-parse-string ((s (list (* (* sep) elt)))
|
||||
(elt (substring (+ (not sep) (any))))
|
||||
(sep (= separator)))
|
||||
string))
|
||||
|
||||
;; (peg-ex-split "-abc-cd-" "-")
|
||||
|
||||
;; Parse a lisp style Sexp.
|
||||
;; [To keep the example short, ' and . are handled as ordinary symbol.]
|
||||
(defun peg-ex-lisp ()
|
||||
(peg-parse
|
||||
(sexp _ (or string list number symbol))
|
||||
(_ (* (or [" \n\t"] comment)))
|
||||
(comment ";" (* (not (or "\n" (eob))) (any)))
|
||||
(string "\"" (substring (* (not "\"") (any))) "\"")
|
||||
(number (substring (opt (set "+-")) (+ digit))
|
||||
(if terminating)
|
||||
`(string -- (string-to-number string)))
|
||||
(symbol (substring (and symchar (* (not terminating) symchar)))
|
||||
`(s -- (intern s)))
|
||||
(symchar [a-z A-Z 0-9 "-;!#%&'*+,./:;<=>?@[]^_`{|}~"])
|
||||
(list "(" `(-- (cons nil nil)) `(hd -- hd hd)
|
||||
(* sexp `(tl e -- (setcdr tl (list e))))
|
||||
_ ")" `(hd _tl -- (cdr hd)))
|
||||
(digit [0-9])
|
||||
(terminating (or (set " \n\t();\"'") (eob)))))
|
||||
|
||||
;; (peg-ex-lisp)
|
||||
|
||||
;; We try to detect left recursion and report it as error.
|
||||
(defun peg-ex-left-recursion ()
|
||||
(eval '(peg-parse (exp (or term
|
||||
(and exp "+" exp)))
|
||||
(term (or digit
|
||||
(and term "*" term)))
|
||||
(digit [0-9]))
|
||||
t))
|
||||
|
||||
(defun peg-ex-infinite-loop ()
|
||||
(eval '(peg-parse (exp (* (or "x"
|
||||
"y"
|
||||
(action (foo))))))
|
||||
t))
|
||||
|
||||
;; Some efficiency problems:
|
||||
|
||||
;; Find the last digit in a string.
|
||||
;; Recursive definition with excessive stack usage.
|
||||
(defun peg-ex-last-digit (string)
|
||||
(peg-parse-string ((s (or (and (any) s)
|
||||
(substring [0-9]))))
|
||||
string))
|
||||
|
||||
;; (peg-ex-last-digit "ab0cd1ef2gh")
|
||||
;; (peg-ex-last-digit (make-string 50 ?-))
|
||||
;; (peg-ex-last-digit (make-string 1000 ?-))
|
||||
|
||||
;; Find the last digit without recursion. Doesn't run out of stack,
|
||||
;; but probably still too inefficient for large inputs.
|
||||
(defun peg-ex-last-digit2 (string)
|
||||
(peg-parse-string ((s `(-- nil)
|
||||
(+ (* (not digit) (any))
|
||||
(substring digit)
|
||||
`(_d1 d2 -- d2)))
|
||||
(digit [0-9]))
|
||||
string))
|
||||
|
||||
;; (peg-ex-last-digit2 "ab0cd1ef2gh")
|
||||
;; (peg-ex-last-digit2 (concat (make-string 500000 ?-) "8a9b"))
|
||||
;; (peg-ex-last-digit2 (make-string 500000 ?-))
|
||||
;; (peg-ex-last-digit2 (make-string 500000 ?5))
|
||||
|
||||
(provide 'peg-tests)
|
||||
;;; peg-tests.el ends here
|
Loading…
Add table
Reference in a new issue