emacs/lisp/cedet/semantic/wisent.el

340 lines
12 KiB
EmacsLisp
Raw Normal View History

;;; semantic/wisent.el --- Wisent - Semantic gateway
;; Copyright (C) 2001-2007, 2009-2020 Free Software Foundation, Inc.
;; Author: David Ponce <david@dponce.com>
;; Created: 30 Aug 2001
;; Keywords: syntax
;; This file is part of GNU Emacs.
;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
;;; Commentary:
;;
;; Here are functions necessary to use the Wisent LALR parser from
;; Semantic environment.
;;; History:
;;
;;; Code:
(require 'semantic)
(require 'semantic/wisent/wisent)
;;; Lexical analysis
;;
(defvar wisent-lex-istream nil
"Input stream of `semantic-lex' syntactic tokens.")
(defvar wisent-lex-lookahead nil
"Extra lookahead token.
When non-nil it is directly returned by `wisent-lex-function'.")
(defmacro wisent-lex-eoi ()
"Return an End-Of-Input lexical token.
2009-10-01 04:54:05 +00:00
The EOI token is like this: ($EOI \"\" POINT-MAX . POINT-MAX)."
`(cons ',wisent-eoi-term
(cons ""
(cons (point-max) (point-max)))))
(defmacro define-wisent-lexer (name doc &rest body)
"Create a new lexical analyzer with NAME.
DOC is a documentation string describing this analyzer.
When a token is available in `wisent-lex-istream', eval BODY forms
sequentially. BODY must return a lexical token for the LALR parser.
Each token in input was produced by `semantic-lex', it is a list:
(TOKSYM START . END)
TOKSYM is a terminal symbol used in the grammar.
START and END mark boundary in the current buffer of that token's
value.
Returned tokens must have the form:
(TOKSYM VALUE START . END)
where VALUE is the buffer substring between START and END positions."
`(defun
,name () ,doc
(cond
(wisent-lex-lookahead
(prog1 wisent-lex-lookahead
(setq wisent-lex-lookahead nil)))
(wisent-lex-istream
,@body)
((wisent-lex-eoi)))))
(define-wisent-lexer wisent-lex
"Return the next available lexical token in Wisent's form.
The variable `wisent-lex-istream' contains the list of lexical tokens
produced by `semantic-lex'. Pop the next token available and convert
it to a form suitable for the Wisent's parser."
(let* ((tk (car wisent-lex-istream)))
;; Eat input stream
(setq wisent-lex-istream (cdr wisent-lex-istream))
(cons (semantic-lex-token-class tk)
(cons (semantic-lex-token-text tk)
(semantic-lex-token-bounds tk)))))
;;; Syntax analysis
;;
(defvar wisent-error-function nil
"Function used to report parse error.
By default use the function `wisent-message'.")
(make-variable-buffer-local 'wisent-error-function)
(defvar wisent-lexer-function 'wisent-lex
"Function used to obtain the next lexical token in input.
Should be a lexical analyzer created with `define-wisent-lexer'.")
(make-variable-buffer-local 'wisent-lexer-function)
;; Tag production
;;
(defsubst wisent-raw-tag (semantic-tag)
"Return raw form of given Semantic tag SEMANTIC-TAG.
Should be used in semantic actions, in grammars, to build a Semantic
parse tree."
(nconc semantic-tag
(if (or $region
(setq $region (nthcdr 2 wisent-input)))
(list (car $region) (cdr $region))
(list (point-max) (point-max)))))
(defsubst wisent-cook-tag (raw-tag)
"From raw form of Semantic tag RAW-TAG, return a list of cooked tags.
Should be used in semantic actions, in grammars, to build a Semantic
parse tree."
(let* ((cooked (semantic--tag-expand raw-tag))
(l cooked))
(while l
(semantic--tag-put-property (car l) 'reparse-symbol $nterm)
(setq l (cdr l)))
cooked))
;; Unmatched syntax collector
;;
(defun wisent-collect-unmatched-syntax (nomatch)
"Add lexical token NOMATCH to the cache of unmatched tokens.
See also the variable `semantic-unmatched-syntax-cache'.
NOMATCH is in Wisent's form: (SYMBOL VALUE START . END)
and will be collected in `semantic-lex' form: (SYMBOL START . END)."
(let ((region (cddr nomatch)))
(and (number-or-marker-p (car region))
(number-or-marker-p (cdr region))
(setq semantic-unmatched-syntax-cache
(cons (cons (car nomatch) region)
semantic-unmatched-syntax-cache)))))
;; Parser plug-ins
;;
Expunge "allow" + infinitive without direct object from source and doc. Do the same for "permit", "enable", and "prevent". * doc/emacs/mule.texi: * doc/lispref/control.texi: * doc/lispref/display.texi: * doc/lispref/frames.texi: * doc/lispref/functions.texi: * doc/lispref/nonascii.texi: * doc/lispref/streams.texi: * doc/lispref/windows.texi: * doc/misc/dbus.texi: * doc/misc/eww.texi: * doc/misc/flymake.texi: * doc/misc/octave-mode.texi: * doc/misc/org.texi: * doc/misc/reftex.texi: * doc/misc/tramp.texi: * doc/misc/wisent.texi: * etc/NEWS: * lisp/autorevert.el: * lisp/cedet/mode-local.el: * lisp/cedet/semantic/senator.el: * lisp/cedet/semantic/wisent.el: * lisp/dos-fns.el: * lisp/frameset.el: * lisp/gnus/gnus-agent.el: * lisp/gnus/mm-util.el: * lisp/international/characters.el: * lisp/ldefs-boot.el: * lisp/mail/mailclient.el: * lisp/man.el: * lisp/mh-e/mh-search.el: * lisp/net/tramp-cmds.el: * lisp/net/tramp-gvfs.el: * lisp/org/org-crypt.el: * lisp/org/org-element.el: * lisp/org/org-feed.el: * lisp/org/org.el: * lisp/org/ox-ascii.el: * lisp/org/ox-icalendar.el: * lisp/org/ox-publish.el: * lisp/org/ox.el: * lisp/play/gamegrid.el: * lisp/play/gomoku.el: * lisp/progmodes/antlr-mode.el: * lisp/progmodes/python.el: * lisp/progmodes/vhdl-mode.el: * lisp/strokes.el: * lisp/textmodes/ispell.el: * lisp/tree-widget.el: * lisp/vc/pcvs.el: * lisp/window.el: * src/lisp.h: * src/w32.c: * src/w32heap.c: * src/w32term.c: * src/window.c: * src/xfaces.c: Replace solecisms like "This allow to do something" with a correct alternative, such as "This allow you to do something", "This allows something to be done" or "This allows the doing of something".
2016-01-24 20:30:39 +00:00
;; The following functions permit plugging the Wisent LALR parser in
;; Semantic toolkit. They use the standard API provided by Semantic
;; to plug parsers in.
;;
;; Two plug-ins are available, BUT ONLY ONE MUST BE USED AT A TIME:
;;
;; - `wisent-parse-stream' designed to override the standard function
;; `semantic-parse-stream'.
;;
;; - `wisent-parse-region' designed to override the standard function
;; `semantic-parse-region'.
;;
;; Maybe the latter is faster because it eliminates a lot of function
;; call.
;;
(defun wisent-parse-stream (stream goal)
"Parse STREAM using the Wisent LALR parser.
GOAL is a nonterminal symbol to start parsing at.
Return the list (STREAM SEMANTIC-STREAM) where STREAM are those
elements of STREAM that have not been used. SEMANTIC-STREAM is the
list of semantic tags found.
The LALR parser automaton must be available in buffer local variable
`semantic--parse-table'.
Must be installed by `semantic-install-function-overrides' to override
the standard function `semantic-parse-stream'."
(let (wisent-lex-istream wisent-lex-lookahead la-elt cache)
;; IMPLEMENTATION NOTES:
;; `wisent-parse' returns a lookahead token when it stopped
;; parsing before encountering the end of input. To re-enter the
;; parser it is necessary to push back in the lexical input stream
;; the last lookahead token issued. Because the format of
;; lookahead tokens and tokens in STREAM can be different the
;; lookahead token is put in the variable `wisent-lex-lookahead'
;; before calling `wisent-parse'. Wisent's lexers always pop the
lisp/*.el, src/*.c: Fix typos in docstrings * lisp/apropos.el (apropos-do-all): * lisp/auth-source-pass.el (auth-source-pass--select-from-entries): * lisp/auth-source.el (auth-source-user-or-password): * lisp/calc/calc-forms.el (math-tzone-names): * lisp/calendar/diary-lib.el (diary-face-attrs) (diary-mark-entries-1): * lisp/cedet/cedet-files.el (cedet-files-list-recursively): * lisp/cedet/ede.el (ede-constructing, ede-deep-rescan): * lisp/cedet/ede/cpp-root.el (ede-cpp-root-header-file-p): * lisp/cedet/ede/proj.el (ede-proj-target-makefile): * lisp/cedet/inversion.el (inversion-check-version) (inversion-test): * lisp/cedet/mode-local.el (mode-local-map-file-buffers): * lisp/cedet/semantic/complete.el (semantic-displayer-ghost): * lisp/cedet/semantic/db-find.el (semanticdb-find-translate-path-default): * lisp/cedet/semantic/db.el (semanticdb-table) (semanticdb-search-system-databases): * lisp/cedet/semantic/imenu.el (semantic-imenu-index-directory): * lisp/cedet/semantic/java.el (semantic-java-doc-keywords-map): * lisp/cedet/semantic/lex-spp.el (semantic-lex-spp-use-headers-flag): * lisp/cedet/semantic/lex.el (semantic-lex-make-keyword-table) (semantic-lex-make-type-table, semantic-lex-debug-analyzers): * lisp/cedet/semantic/tag-ls.el (semantic-tag-abstract-p) (semantic-tag-leaf-p, semantic-tag-static-p) (semantic-tag-prototype-p): * lisp/dnd.el (dnd-open-remote-file-function, dnd-open-local-file): * lisp/emacs-lisp/eieio-opt.el (eieio-build-class-alist) (eieio-read-class, eieio-read-subclass): * lisp/emacs-lisp/generator.el (cps--replace-variable-references) (cps--handle-loop-for): * lisp/erc/erc-dcc.el (erc-dcc-list, erc-dcc-member, erc-dcc-server) (erc-dcc-auto-mask-p, erc-dcc-get-file, erc-dcc-chat-accept): * lisp/eshell/em-pred.el (eshell-pred-file-type): * lisp/faces.el (defined-colors-with-face-attributes): * lisp/font-core.el (font-lock-mode): * lisp/frame.el (frame-restack): * lisp/net/shr.el (shr-image-animate): * lisp/org/org-agenda.el (org-agenda-change-all-lines) (org-agenda-today-p): * lisp/org/org-id.el (org-id-get): * lisp/org/org.el (org-highlight-latex-and-related) (org--valid-property-p): * lisp/org/ox-beamer.el (org-beamer--get-label): * lisp/org/ox-latex.el (org-latex--caption-above-p): * lisp/org/ox-odt.el (org-odt--copy-image-file) (org-odt--copy-formula-file): * lisp/org/ox.el (org-export-with-timestamps): * lisp/progmodes/verilog-mode.el (verilog-indent-declaration-macros): * lisp/ses.el (ses-file-format-extend-parameter-list): * lisp/term.el (ansi-term): * lisp/textmodes/bibtex.el (bibtex-no-opt-remove-re) (bibtex-beginning-of-first-entry, bibtex-autokey-get-title) (bibtex-read-key, bibtex-initialize): * lisp/textmodes/flyspell.el (flyspell-word): * lisp/view.el (view-mode-exit): * src/composite.c: * src/floatfns.c (Fisnan): Fix typos in docstrings.
2019-09-19 04:32:25 +02:00
;; next lexical token from that variable when non-nil, then from
;; the lexical input stream.
;;
;; The first element of STREAM is used to keep lookahead tokens
;; across successive calls to `wisent-parse-stream'. In fact
;; what is kept is a stack of lookaheads encountered so far. It
;; is cleared when `wisent-parse' returns a valid semantic tag,
;; or twice the same lookahead token! The latter indicates that
;; there is a syntax error on that token. If so, tokens currently
;; in the lookahead stack have not been used, and are moved into
;; `semantic-unmatched-syntax-cache'. When the parser will be
;; re-entered, a new lexical token will be read from STREAM.
;;
;; The first element of STREAM that contains the lookahead stack
;; has this format (compatible with the format of `semantic-lex'
;; tokens):
;;
;; (LOOKAHEAD-STACK START . END)
;;
;; where LOOKAHEAD-STACK is a list of lookahead tokens. And
;; START/END are the bounds of the lookahead at top of stack.
;; Retrieve lookahead token from stack
(setq la-elt (car stream))
(if (consp (car la-elt))
;; The first elt of STREAM contains a lookahead stack
(setq wisent-lex-lookahead (caar la-elt)
stream (cdr stream))
(setq la-elt nil))
;; Parse
(setq wisent-lex-istream stream
cache (semantic-safe "wisent-parse-stream: %s"
(condition-case error-to-filter
(wisent-parse semantic--parse-table
wisent-lexer-function
wisent-error-function
goal)
(args-out-of-range
(if (and (not debug-on-error)
(= wisent-parse-max-stack-size
(nth 2 error-to-filter)))
(progn
(message "wisent-parse-stream: %s"
(error-message-string error-to-filter))
(message "wisent-parse-max-stack-size \
might need to be increased"))
(apply 'signal error-to-filter))))))
;; Manage returned lookahead token
(if wisent-lookahead
(if (eq (caar la-elt) wisent-lookahead)
;; It is already at top of lookahead stack
(progn
(setq cache nil
la-elt (car la-elt))
(while la-elt
;; Collect unmatched tokens from the stack
(run-hook-with-args
'wisent-discarding-token-functions (car la-elt))
(setq la-elt (cdr la-elt))))
;; New lookahead token
(if (or (consp cache) ;; Clear the stack if parse succeeded
(null la-elt))
(setq la-elt (cons nil nil)))
;; Push it into the stack
(setcar la-elt (cons wisent-lookahead (car la-elt)))
;; Update START/END
(setcdr la-elt (cddr wisent-lookahead))
;; Push (LOOKAHEAD-STACK START . END) in STREAM
(setq wisent-lex-istream (cons la-elt wisent-lex-istream))))
;; Return (STREAM SEMANTIC-STREAM)
(list wisent-lex-istream
(if (consp cache) cache '(nil))
)))
(defun wisent-parse-region (start end &optional goal depth returnonerror)
"Parse the area between START and END using the Wisent LALR parser.
Return the list of semantic tags found.
Optional arguments GOAL is a nonterminal symbol to start parsing at,
DEPTH is the lexical depth to scan, and RETURNONERROR is a flag to
stop parsing on syntax error, when non-nil.
The LALR parser automaton must be available in buffer local variable
`semantic--parse-table'.
Must be installed by `semantic-install-function-overrides' to override
the standard function `semantic-parse-region'."
(if (or (< start (point-min)) (> end (point-max)) (< end start))
(error "Invalid bounds [%s %s] passed to `wisent-parse-region'"
start end))
(let* ((case-fold-search semantic-case-fold)
(wisent-lex-istream (semantic-lex start end depth))
ptree tag cooked lstack wisent-lex-lookahead)
;; Loop while there are lexical tokens available
(while wisent-lex-istream
;; Parse
(setq wisent-lex-lookahead (car lstack)
tag (semantic-safe "wisent-parse-region: %s"
(wisent-parse semantic--parse-table
wisent-lexer-function
wisent-error-function
goal)))
;; Manage returned lookahead token
(if wisent-lookahead
(if (eq (car lstack) wisent-lookahead)
;; It is already at top of lookahead stack
(progn
(setq tag nil)
(while lstack
;; Collect unmatched tokens from lookahead stack
(run-hook-with-args
'wisent-discarding-token-functions (car lstack))
(setq lstack (cdr lstack))))
;; Push new lookahead token into the stack
(setq lstack (cons wisent-lookahead lstack))))
;; Manage the parser result
(cond
;; Parse succeeded, cook result
((consp tag)
(setq lstack nil ;; Clear the lookahead stack
cooked (semantic--tag-expand tag)
ptree (append cooked ptree))
(while cooked
(setq tag (car cooked)
cooked (cdr cooked))
(or (semantic--tag-get-property tag 'reparse-symbol)
(semantic--tag-put-property tag 'reparse-symbol goal)))
)
;; Return on error if requested
(returnonerror
(setq wisent-lex-istream nil)
))
;; Work in progress...
(if wisent-lex-istream
(and (eq semantic-working-type 'percent)
(boundp 'semantic--progress-reporter)
semantic--progress-reporter
(progress-reporter-update
semantic--progress-reporter
Don't overflow if computing approximate percentage * lisp/align.el (align-region): * lisp/cedet/semantic.el (semantic-repeat-parse-whole-stream): * lisp/cedet/semantic/wisent.el (wisent-parse-region): * lisp/cus-edit.el (custom-buffer-create-internal): * lisp/emacs-lisp/checkdoc.el (checkdoc-interactive-ispell-loop) (checkdoc-message-interactive-ispell-loop, checkdoc-next-error) (checkdoc-next-message-error): * lisp/emacs-lisp/eieio-opt.el (eieio-display-method-list): * lisp/epa.el (epa-progress-callback-function): * lisp/erc/erc-dcc.el (erc-dcc-do-LIST-command): * lisp/ffap.el (ffap-menu-rescan): * lisp/gnus/nnbabyl.el (nnbabyl-retrieve-headers): * lisp/gnus/nndiary.el (nndiary-retrieve-headers): * lisp/gnus/nneething.el (nneething-retrieve-headers): * lisp/gnus/nnmbox.el (nnmbox-retrieve-headers): * lisp/gnus/nnmh.el (nnmh-retrieve-headers): * lisp/gnus/nnml.el (nnml-retrieve-headers): * lisp/gnus/nnspool.el (nnspool-retrieve-headers): * lisp/gnus/nntp.el (nntp-retrieve-headers) (nntp-retrieve-articles): * lisp/imenu.el (imenu--relative-position): * lisp/international/ja-dic-cnv.el (skkdic-collect-okuri-nasi) (skkdic-convert-okuri-nasi): * lisp/net/ange-ftp.el (ange-ftp-process-handle-hash): * lisp/nxml/rng-valid.el (rng-compute-mode-line-string): * lisp/org/org-list.el (org-update-checkbox-count): * lisp/org/org.el (org-table-map-tables) (org-update-parent-todo-statistics): * lisp/play/decipher.el (decipher-insert-frequency-counts) (decipher-analyze-buffer): * lisp/profiler.el (profiler-format-percent): * lisp/progmodes/cc-cmds.el (c-progress-update): * lisp/progmodes/cpp.el (cpp-highlight-buffer): * lisp/progmodes/idlwave.el (idlwave-convert-xml-system-routine-info) (idlwave-list-load-path-shadows): * lisp/progmodes/opascal.el (opascal-step-progress): * lisp/progmodes/vhdl-mode.el (vhdl-update-progress-info) (vhdl-scan-directory-contents): * lisp/textmodes/bibtex.el (bibtex-progress-message): * lisp/textmodes/flyspell.el (flyspell-small-region) (flyspell-external-point-words): * lisp/textmodes/table.el (table-recognize): Prefer (floor (* 100.0 NUMERATOR) DENOMINATOR) when calculating progress-report percentages and the like. This avoids problems if (* 100 NUMERATOR) would overflow. * lisp/gnus/gnus-registry.el (gnus-registry-import-eld): * lisp/gnus/registry.el (registry-reindex): Use (* 100.0 ...) rather than (* 100 ...) to avoid int overflow issues. * lisp/descr-text.el (describe-char): * lisp/org/org-colview.el (org-nofm-to-completion): * lisp/ps-print.el (ps-plot): * lisp/simple.el (what-cursor-position): Prefer (round (* 100.0 NUMERATOR) DENOMINATOR) to a more-complicated and less-accurate approximation.
2015-07-31 10:12:37 -07:00
(floor (* 100.0 (semantic-lex-token-start
(car wisent-lex-istream)))
(point-max))))))
;; Return parse tree
(nreverse ptree)))
;;; Interfacing with edebug
;;
(add-hook
'edebug-setup-hook
#'(lambda ()
(def-edebug-spec define-wisent-lexer
(&define name stringp def-body)
)
))
(provide 'semantic/wisent)
;;; semantic/wisent.el ends here