2009-09-07 16:38:54 +00:00
|
|
|
|
;;; semantic/wisent.el --- Wisent - Semantic gateway
|
|
|
|
|
|
2020-01-01 00:19:43 +00:00
|
|
|
|
;; Copyright (C) 2001-2007, 2009-2020 Free Software Foundation, Inc.
|
2009-09-07 16:38:54 +00:00
|
|
|
|
|
|
|
|
|
;; Author: David Ponce <david@dponce.com>
|
|
|
|
|
;; Created: 30 Aug 2001
|
|
|
|
|
;; Keywords: syntax
|
|
|
|
|
|
|
|
|
|
;; This file is part of GNU Emacs.
|
|
|
|
|
|
|
|
|
|
;; GNU Emacs is free software: you can redistribute it and/or modify
|
|
|
|
|
;; it under the terms of the GNU General Public License as published by
|
|
|
|
|
;; the Free Software Foundation, either version 3 of the License, or
|
|
|
|
|
;; (at your option) any later version.
|
|
|
|
|
|
|
|
|
|
;; GNU Emacs is distributed in the hope that it will be useful,
|
|
|
|
|
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
;; GNU General Public License for more details.
|
|
|
|
|
|
|
|
|
|
;; You should have received a copy of the GNU General Public License
|
2017-09-13 15:52:52 -07:00
|
|
|
|
;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
|
2009-09-07 16:38:54 +00:00
|
|
|
|
|
|
|
|
|
;;; Commentary:
|
|
|
|
|
;;
|
|
|
|
|
;; Here are functions necessary to use the Wisent LALR parser from
|
|
|
|
|
;; Semantic environment.
|
|
|
|
|
|
|
|
|
|
;;; History:
|
|
|
|
|
;;
|
|
|
|
|
|
|
|
|
|
;;; Code:
|
|
|
|
|
|
|
|
|
|
(require 'semantic)
|
|
|
|
|
(require 'semantic/wisent/wisent)
|
|
|
|
|
|
|
|
|
|
;;; Lexical analysis
|
|
|
|
|
;;
|
|
|
|
|
(defvar wisent-lex-istream nil
|
|
|
|
|
"Input stream of `semantic-lex' syntactic tokens.")
|
|
|
|
|
|
|
|
|
|
(defvar wisent-lex-lookahead nil
|
|
|
|
|
"Extra lookahead token.
|
|
|
|
|
When non-nil it is directly returned by `wisent-lex-function'.")
|
|
|
|
|
|
|
|
|
|
(defmacro wisent-lex-eoi ()
|
|
|
|
|
"Return an End-Of-Input lexical token.
|
2009-10-01 04:54:05 +00:00
|
|
|
|
The EOI token is like this: ($EOI \"\" POINT-MAX . POINT-MAX)."
|
2009-09-07 16:38:54 +00:00
|
|
|
|
`(cons ',wisent-eoi-term
|
|
|
|
|
(cons ""
|
|
|
|
|
(cons (point-max) (point-max)))))
|
|
|
|
|
|
|
|
|
|
(defmacro define-wisent-lexer (name doc &rest body)
|
|
|
|
|
"Create a new lexical analyzer with NAME.
|
|
|
|
|
DOC is a documentation string describing this analyzer.
|
|
|
|
|
When a token is available in `wisent-lex-istream', eval BODY forms
|
|
|
|
|
sequentially. BODY must return a lexical token for the LALR parser.
|
|
|
|
|
|
|
|
|
|
Each token in input was produced by `semantic-lex', it is a list:
|
|
|
|
|
|
|
|
|
|
(TOKSYM START . END)
|
|
|
|
|
|
|
|
|
|
TOKSYM is a terminal symbol used in the grammar.
|
|
|
|
|
START and END mark boundary in the current buffer of that token's
|
|
|
|
|
value.
|
|
|
|
|
|
|
|
|
|
Returned tokens must have the form:
|
|
|
|
|
|
|
|
|
|
(TOKSYM VALUE START . END)
|
|
|
|
|
|
|
|
|
|
where VALUE is the buffer substring between START and END positions."
|
|
|
|
|
`(defun
|
|
|
|
|
,name () ,doc
|
|
|
|
|
(cond
|
|
|
|
|
(wisent-lex-lookahead
|
|
|
|
|
(prog1 wisent-lex-lookahead
|
|
|
|
|
(setq wisent-lex-lookahead nil)))
|
|
|
|
|
(wisent-lex-istream
|
|
|
|
|
,@body)
|
|
|
|
|
((wisent-lex-eoi)))))
|
|
|
|
|
|
|
|
|
|
(define-wisent-lexer wisent-lex
|
|
|
|
|
"Return the next available lexical token in Wisent's form.
|
|
|
|
|
The variable `wisent-lex-istream' contains the list of lexical tokens
|
|
|
|
|
produced by `semantic-lex'. Pop the next token available and convert
|
|
|
|
|
it to a form suitable for the Wisent's parser."
|
|
|
|
|
(let* ((tk (car wisent-lex-istream)))
|
|
|
|
|
;; Eat input stream
|
|
|
|
|
(setq wisent-lex-istream (cdr wisent-lex-istream))
|
|
|
|
|
(cons (semantic-lex-token-class tk)
|
|
|
|
|
(cons (semantic-lex-token-text tk)
|
|
|
|
|
(semantic-lex-token-bounds tk)))))
|
|
|
|
|
|
|
|
|
|
;;; Syntax analysis
|
|
|
|
|
;;
|
|
|
|
|
(defvar wisent-error-function nil
|
|
|
|
|
"Function used to report parse error.
|
|
|
|
|
By default use the function `wisent-message'.")
|
|
|
|
|
(make-variable-buffer-local 'wisent-error-function)
|
|
|
|
|
|
|
|
|
|
(defvar wisent-lexer-function 'wisent-lex
|
|
|
|
|
"Function used to obtain the next lexical token in input.
|
|
|
|
|
Should be a lexical analyzer created with `define-wisent-lexer'.")
|
|
|
|
|
(make-variable-buffer-local 'wisent-lexer-function)
|
|
|
|
|
|
|
|
|
|
;; Tag production
|
|
|
|
|
;;
|
|
|
|
|
(defsubst wisent-raw-tag (semantic-tag)
|
|
|
|
|
"Return raw form of given Semantic tag SEMANTIC-TAG.
|
|
|
|
|
Should be used in semantic actions, in grammars, to build a Semantic
|
|
|
|
|
parse tree."
|
|
|
|
|
(nconc semantic-tag
|
|
|
|
|
(if (or $region
|
|
|
|
|
(setq $region (nthcdr 2 wisent-input)))
|
|
|
|
|
(list (car $region) (cdr $region))
|
|
|
|
|
(list (point-max) (point-max)))))
|
|
|
|
|
|
|
|
|
|
(defsubst wisent-cook-tag (raw-tag)
|
|
|
|
|
"From raw form of Semantic tag RAW-TAG, return a list of cooked tags.
|
|
|
|
|
Should be used in semantic actions, in grammars, to build a Semantic
|
|
|
|
|
parse tree."
|
|
|
|
|
(let* ((cooked (semantic--tag-expand raw-tag))
|
|
|
|
|
(l cooked))
|
|
|
|
|
(while l
|
|
|
|
|
(semantic--tag-put-property (car l) 'reparse-symbol $nterm)
|
|
|
|
|
(setq l (cdr l)))
|
|
|
|
|
cooked))
|
|
|
|
|
|
|
|
|
|
;; Unmatched syntax collector
|
|
|
|
|
;;
|
|
|
|
|
(defun wisent-collect-unmatched-syntax (nomatch)
|
|
|
|
|
"Add lexical token NOMATCH to the cache of unmatched tokens.
|
|
|
|
|
See also the variable `semantic-unmatched-syntax-cache'.
|
|
|
|
|
|
|
|
|
|
NOMATCH is in Wisent's form: (SYMBOL VALUE START . END)
|
|
|
|
|
and will be collected in `semantic-lex' form: (SYMBOL START . END)."
|
|
|
|
|
(let ((region (cddr nomatch)))
|
|
|
|
|
(and (number-or-marker-p (car region))
|
|
|
|
|
(number-or-marker-p (cdr region))
|
|
|
|
|
(setq semantic-unmatched-syntax-cache
|
|
|
|
|
(cons (cons (car nomatch) region)
|
|
|
|
|
semantic-unmatched-syntax-cache)))))
|
|
|
|
|
|
|
|
|
|
;; Parser plug-ins
|
|
|
|
|
;;
|
2016-01-24 20:30:39 +00:00
|
|
|
|
;; The following functions permit plugging the Wisent LALR parser in
|
2009-09-07 16:38:54 +00:00
|
|
|
|
;; Semantic toolkit. They use the standard API provided by Semantic
|
|
|
|
|
;; to plug parsers in.
|
|
|
|
|
;;
|
|
|
|
|
;; Two plug-ins are available, BUT ONLY ONE MUST BE USED AT A TIME:
|
|
|
|
|
;;
|
|
|
|
|
;; - `wisent-parse-stream' designed to override the standard function
|
|
|
|
|
;; `semantic-parse-stream'.
|
|
|
|
|
;;
|
|
|
|
|
;; - `wisent-parse-region' designed to override the standard function
|
|
|
|
|
;; `semantic-parse-region'.
|
|
|
|
|
;;
|
|
|
|
|
;; Maybe the latter is faster because it eliminates a lot of function
|
|
|
|
|
;; call.
|
|
|
|
|
;;
|
|
|
|
|
(defun wisent-parse-stream (stream goal)
|
|
|
|
|
"Parse STREAM using the Wisent LALR parser.
|
|
|
|
|
GOAL is a nonterminal symbol to start parsing at.
|
|
|
|
|
Return the list (STREAM SEMANTIC-STREAM) where STREAM are those
|
|
|
|
|
elements of STREAM that have not been used. SEMANTIC-STREAM is the
|
|
|
|
|
list of semantic tags found.
|
|
|
|
|
The LALR parser automaton must be available in buffer local variable
|
|
|
|
|
`semantic--parse-table'.
|
|
|
|
|
|
|
|
|
|
Must be installed by `semantic-install-function-overrides' to override
|
|
|
|
|
the standard function `semantic-parse-stream'."
|
|
|
|
|
(let (wisent-lex-istream wisent-lex-lookahead la-elt cache)
|
|
|
|
|
|
|
|
|
|
;; IMPLEMENTATION NOTES:
|
|
|
|
|
;; `wisent-parse' returns a lookahead token when it stopped
|
|
|
|
|
;; parsing before encountering the end of input. To re-enter the
|
|
|
|
|
;; parser it is necessary to push back in the lexical input stream
|
|
|
|
|
;; the last lookahead token issued. Because the format of
|
|
|
|
|
;; lookahead tokens and tokens in STREAM can be different the
|
|
|
|
|
;; lookahead token is put in the variable `wisent-lex-lookahead'
|
|
|
|
|
;; before calling `wisent-parse'. Wisent's lexers always pop the
|
lisp/*.el, src/*.c: Fix typos in docstrings
* lisp/apropos.el (apropos-do-all):
* lisp/auth-source-pass.el (auth-source-pass--select-from-entries):
* lisp/auth-source.el (auth-source-user-or-password):
* lisp/calc/calc-forms.el (math-tzone-names):
* lisp/calendar/diary-lib.el (diary-face-attrs)
(diary-mark-entries-1):
* lisp/cedet/cedet-files.el (cedet-files-list-recursively):
* lisp/cedet/ede.el (ede-constructing, ede-deep-rescan):
* lisp/cedet/ede/cpp-root.el (ede-cpp-root-header-file-p):
* lisp/cedet/ede/proj.el (ede-proj-target-makefile):
* lisp/cedet/inversion.el (inversion-check-version)
(inversion-test):
* lisp/cedet/mode-local.el (mode-local-map-file-buffers):
* lisp/cedet/semantic/complete.el (semantic-displayer-ghost):
* lisp/cedet/semantic/db-find.el (semanticdb-find-translate-path-default):
* lisp/cedet/semantic/db.el (semanticdb-table)
(semanticdb-search-system-databases):
* lisp/cedet/semantic/imenu.el (semantic-imenu-index-directory):
* lisp/cedet/semantic/java.el (semantic-java-doc-keywords-map):
* lisp/cedet/semantic/lex-spp.el (semantic-lex-spp-use-headers-flag):
* lisp/cedet/semantic/lex.el (semantic-lex-make-keyword-table)
(semantic-lex-make-type-table, semantic-lex-debug-analyzers):
* lisp/cedet/semantic/tag-ls.el (semantic-tag-abstract-p)
(semantic-tag-leaf-p, semantic-tag-static-p)
(semantic-tag-prototype-p):
* lisp/dnd.el (dnd-open-remote-file-function, dnd-open-local-file):
* lisp/emacs-lisp/eieio-opt.el (eieio-build-class-alist)
(eieio-read-class, eieio-read-subclass):
* lisp/emacs-lisp/generator.el (cps--replace-variable-references)
(cps--handle-loop-for):
* lisp/erc/erc-dcc.el (erc-dcc-list, erc-dcc-member, erc-dcc-server)
(erc-dcc-auto-mask-p, erc-dcc-get-file, erc-dcc-chat-accept):
* lisp/eshell/em-pred.el (eshell-pred-file-type):
* lisp/faces.el (defined-colors-with-face-attributes):
* lisp/font-core.el (font-lock-mode):
* lisp/frame.el (frame-restack):
* lisp/net/shr.el (shr-image-animate):
* lisp/org/org-agenda.el (org-agenda-change-all-lines)
(org-agenda-today-p):
* lisp/org/org-id.el (org-id-get):
* lisp/org/org.el (org-highlight-latex-and-related)
(org--valid-property-p):
* lisp/org/ox-beamer.el (org-beamer--get-label):
* lisp/org/ox-latex.el (org-latex--caption-above-p):
* lisp/org/ox-odt.el (org-odt--copy-image-file)
(org-odt--copy-formula-file):
* lisp/org/ox.el (org-export-with-timestamps):
* lisp/progmodes/verilog-mode.el (verilog-indent-declaration-macros):
* lisp/ses.el (ses-file-format-extend-parameter-list):
* lisp/term.el (ansi-term):
* lisp/textmodes/bibtex.el (bibtex-no-opt-remove-re)
(bibtex-beginning-of-first-entry, bibtex-autokey-get-title)
(bibtex-read-key, bibtex-initialize):
* lisp/textmodes/flyspell.el (flyspell-word):
* lisp/view.el (view-mode-exit):
* src/composite.c:
* src/floatfns.c (Fisnan): Fix typos in docstrings.
2019-09-19 04:32:25 +02:00
|
|
|
|
;; next lexical token from that variable when non-nil, then from
|
2009-09-07 16:38:54 +00:00
|
|
|
|
;; the lexical input stream.
|
|
|
|
|
;;
|
|
|
|
|
;; The first element of STREAM is used to keep lookahead tokens
|
|
|
|
|
;; across successive calls to `wisent-parse-stream'. In fact
|
|
|
|
|
;; what is kept is a stack of lookaheads encountered so far. It
|
|
|
|
|
;; is cleared when `wisent-parse' returns a valid semantic tag,
|
|
|
|
|
;; or twice the same lookahead token! The latter indicates that
|
|
|
|
|
;; there is a syntax error on that token. If so, tokens currently
|
|
|
|
|
;; in the lookahead stack have not been used, and are moved into
|
|
|
|
|
;; `semantic-unmatched-syntax-cache'. When the parser will be
|
|
|
|
|
;; re-entered, a new lexical token will be read from STREAM.
|
|
|
|
|
;;
|
|
|
|
|
;; The first element of STREAM that contains the lookahead stack
|
|
|
|
|
;; has this format (compatible with the format of `semantic-lex'
|
|
|
|
|
;; tokens):
|
|
|
|
|
;;
|
|
|
|
|
;; (LOOKAHEAD-STACK START . END)
|
|
|
|
|
;;
|
|
|
|
|
;; where LOOKAHEAD-STACK is a list of lookahead tokens. And
|
|
|
|
|
;; START/END are the bounds of the lookahead at top of stack.
|
|
|
|
|
|
|
|
|
|
;; Retrieve lookahead token from stack
|
|
|
|
|
(setq la-elt (car stream))
|
|
|
|
|
(if (consp (car la-elt))
|
|
|
|
|
;; The first elt of STREAM contains a lookahead stack
|
|
|
|
|
(setq wisent-lex-lookahead (caar la-elt)
|
|
|
|
|
stream (cdr stream))
|
|
|
|
|
(setq la-elt nil))
|
|
|
|
|
;; Parse
|
|
|
|
|
(setq wisent-lex-istream stream
|
|
|
|
|
cache (semantic-safe "wisent-parse-stream: %s"
|
|
|
|
|
(condition-case error-to-filter
|
|
|
|
|
(wisent-parse semantic--parse-table
|
|
|
|
|
wisent-lexer-function
|
|
|
|
|
wisent-error-function
|
|
|
|
|
goal)
|
|
|
|
|
(args-out-of-range
|
|
|
|
|
(if (and (not debug-on-error)
|
|
|
|
|
(= wisent-parse-max-stack-size
|
|
|
|
|
(nth 2 error-to-filter)))
|
|
|
|
|
(progn
|
|
|
|
|
(message "wisent-parse-stream: %s"
|
|
|
|
|
(error-message-string error-to-filter))
|
|
|
|
|
(message "wisent-parse-max-stack-size \
|
|
|
|
|
might need to be increased"))
|
|
|
|
|
(apply 'signal error-to-filter))))))
|
|
|
|
|
;; Manage returned lookahead token
|
|
|
|
|
(if wisent-lookahead
|
|
|
|
|
(if (eq (caar la-elt) wisent-lookahead)
|
|
|
|
|
;; It is already at top of lookahead stack
|
|
|
|
|
(progn
|
|
|
|
|
(setq cache nil
|
|
|
|
|
la-elt (car la-elt))
|
|
|
|
|
(while la-elt
|
|
|
|
|
;; Collect unmatched tokens from the stack
|
|
|
|
|
(run-hook-with-args
|
|
|
|
|
'wisent-discarding-token-functions (car la-elt))
|
|
|
|
|
(setq la-elt (cdr la-elt))))
|
|
|
|
|
;; New lookahead token
|
|
|
|
|
(if (or (consp cache) ;; Clear the stack if parse succeeded
|
|
|
|
|
(null la-elt))
|
|
|
|
|
(setq la-elt (cons nil nil)))
|
|
|
|
|
;; Push it into the stack
|
|
|
|
|
(setcar la-elt (cons wisent-lookahead (car la-elt)))
|
|
|
|
|
;; Update START/END
|
|
|
|
|
(setcdr la-elt (cddr wisent-lookahead))
|
|
|
|
|
;; Push (LOOKAHEAD-STACK START . END) in STREAM
|
|
|
|
|
(setq wisent-lex-istream (cons la-elt wisent-lex-istream))))
|
|
|
|
|
;; Return (STREAM SEMANTIC-STREAM)
|
|
|
|
|
(list wisent-lex-istream
|
|
|
|
|
(if (consp cache) cache '(nil))
|
|
|
|
|
)))
|
|
|
|
|
|
|
|
|
|
(defun wisent-parse-region (start end &optional goal depth returnonerror)
|
|
|
|
|
"Parse the area between START and END using the Wisent LALR parser.
|
|
|
|
|
Return the list of semantic tags found.
|
|
|
|
|
Optional arguments GOAL is a nonterminal symbol to start parsing at,
|
|
|
|
|
DEPTH is the lexical depth to scan, and RETURNONERROR is a flag to
|
|
|
|
|
stop parsing on syntax error, when non-nil.
|
|
|
|
|
The LALR parser automaton must be available in buffer local variable
|
|
|
|
|
`semantic--parse-table'.
|
|
|
|
|
|
|
|
|
|
Must be installed by `semantic-install-function-overrides' to override
|
|
|
|
|
the standard function `semantic-parse-region'."
|
|
|
|
|
(if (or (< start (point-min)) (> end (point-max)) (< end start))
|
|
|
|
|
(error "Invalid bounds [%s %s] passed to `wisent-parse-region'"
|
|
|
|
|
start end))
|
|
|
|
|
(let* ((case-fold-search semantic-case-fold)
|
|
|
|
|
(wisent-lex-istream (semantic-lex start end depth))
|
|
|
|
|
ptree tag cooked lstack wisent-lex-lookahead)
|
|
|
|
|
;; Loop while there are lexical tokens available
|
|
|
|
|
(while wisent-lex-istream
|
|
|
|
|
;; Parse
|
|
|
|
|
(setq wisent-lex-lookahead (car lstack)
|
|
|
|
|
tag (semantic-safe "wisent-parse-region: %s"
|
|
|
|
|
(wisent-parse semantic--parse-table
|
|
|
|
|
wisent-lexer-function
|
|
|
|
|
wisent-error-function
|
|
|
|
|
goal)))
|
|
|
|
|
;; Manage returned lookahead token
|
|
|
|
|
(if wisent-lookahead
|
|
|
|
|
(if (eq (car lstack) wisent-lookahead)
|
|
|
|
|
;; It is already at top of lookahead stack
|
|
|
|
|
(progn
|
|
|
|
|
(setq tag nil)
|
|
|
|
|
(while lstack
|
|
|
|
|
;; Collect unmatched tokens from lookahead stack
|
|
|
|
|
(run-hook-with-args
|
|
|
|
|
'wisent-discarding-token-functions (car lstack))
|
|
|
|
|
(setq lstack (cdr lstack))))
|
|
|
|
|
;; Push new lookahead token into the stack
|
|
|
|
|
(setq lstack (cons wisent-lookahead lstack))))
|
|
|
|
|
;; Manage the parser result
|
|
|
|
|
(cond
|
|
|
|
|
;; Parse succeeded, cook result
|
|
|
|
|
((consp tag)
|
|
|
|
|
(setq lstack nil ;; Clear the lookahead stack
|
|
|
|
|
cooked (semantic--tag-expand tag)
|
|
|
|
|
ptree (append cooked ptree))
|
|
|
|
|
(while cooked
|
|
|
|
|
(setq tag (car cooked)
|
|
|
|
|
cooked (cdr cooked))
|
|
|
|
|
(or (semantic--tag-get-property tag 'reparse-symbol)
|
|
|
|
|
(semantic--tag-put-property tag 'reparse-symbol goal)))
|
|
|
|
|
)
|
|
|
|
|
;; Return on error if requested
|
|
|
|
|
(returnonerror
|
|
|
|
|
(setq wisent-lex-istream nil)
|
|
|
|
|
))
|
|
|
|
|
;; Work in progress...
|
|
|
|
|
(if wisent-lex-istream
|
|
|
|
|
(and (eq semantic-working-type 'percent)
|
|
|
|
|
(boundp 'semantic--progress-reporter)
|
|
|
|
|
semantic--progress-reporter
|
|
|
|
|
(progress-reporter-update
|
|
|
|
|
semantic--progress-reporter
|
2015-07-31 10:12:37 -07:00
|
|
|
|
(floor (* 100.0 (semantic-lex-token-start
|
|
|
|
|
(car wisent-lex-istream)))
|
|
|
|
|
(point-max))))))
|
2009-09-07 16:38:54 +00:00
|
|
|
|
;; Return parse tree
|
|
|
|
|
(nreverse ptree)))
|
|
|
|
|
|
|
|
|
|
;;; Interfacing with edebug
|
|
|
|
|
;;
|
|
|
|
|
(add-hook
|
|
|
|
|
'edebug-setup-hook
|
|
|
|
|
#'(lambda ()
|
|
|
|
|
|
|
|
|
|
(def-edebug-spec define-wisent-lexer
|
|
|
|
|
(&define name stringp def-body)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
(provide 'semantic/wisent)
|
|
|
|
|
|
|
|
|
|
;;; semantic/wisent.el ends here
|