diff --git a/lisp/progmodes/cperl-mode.el b/lisp/progmodes/cperl-mode.el index 0dffe279c39..44a75269524 100644 --- a/lisp/progmodes/cperl-mode.el +++ b/lisp/progmodes/cperl-mode.el @@ -440,12 +440,6 @@ after reload." :type 'boolean :group 'cperl-speed) -(defcustom cperl-imenu-addback nil - "Not-nil means add backreferences to generated `imenu's. -May require patched `imenu' and `imenu-go'. Obsolete." - :type 'boolean - :group 'cperl-help-system) - (defcustom cperl-max-help-size 66 "Non-nil means shrink-wrapping of info-buffer allowed up to these percents." :type '(choice integer (const nil)) @@ -1216,6 +1210,153 @@ versions of Emacs." The expansion is entirely correct because it uses the C preprocessor." t) + +;;; Perl Grammar Components +;; +;; The following regular expressions are building blocks for a +;; minimalistic Perl grammar, to be used instead of individual (and +;; not always consistent) literal regular expressions. + +(defconst cperl--basic-identifier-regexp + (rx (sequence (or alpha "_") (* (or word "_")))) + "A regular expression for the name of a \"basic\" Perl variable. +Neither namespace separators nor sigils are included. As is, +this regular expression applies to labels,subroutine calls where +the ampersand sigil is not required, and names of subroutine +attributes.") + +(defconst cperl--label-regexp + (rx-to-string + `(sequence + symbol-start + (regexp ,cperl--basic-identifier-regexp) + (0+ space) + ":")) + "A regular expression for a Perl label. +By convention, labels are uppercase alphabetics, but this isn't +enforced.") + +(defconst cperl--normal-identifier-regexp + (rx-to-string + `(or + (sequence + (1+ (sequence + (opt (regexp ,cperl--basic-identifier-regexp)) + "::")) + (opt (regexp ,cperl--basic-identifier-regexp))) + (regexp ,cperl--basic-identifier-regexp))) + "A regular expression for a Perl variable name with optional namespace. +Examples are `foo`, `Some::Module::VERSION`, and `::` (yes, that +is a legal variable name).") + +(defconst cperl--special-identifier-regexp + (rx-to-string + `(or + (1+ digit) ; $0, $1, $2, ... + (sequence "^" (any "A-Z" "]^_?\\")) ; $^V + (sequence "{" (0+ space) ; ${^MATCH} + "^" (any "A-Z" "]^_?\\") + (0+ (any "A-Z" "_" digit)) + (0+ space) "}") + (in "!\"$%&'()+,-./:;<=>?@\\]^_`|~"))) ; $., $|, $", ... but not $^ or ${ + "The list of Perl \"punctuation\" variables, as listed in perlvar.") + +(defconst cperl--ws-regexp + (rx-to-string + '(or space "\n")) + "Regular expression for a single whitespace in Perl.") + +(defconst cperl--eol-comment-regexp + (rx-to-string + '(sequence "#" (0+ (not (in "\n"))) "\n")) + "Regular expression for a single end-of-line comment in Perl") + +(defconst cperl--ws-or-comment-regexp + (rx-to-string + `(1+ + (or + (regexp ,cperl--ws-regexp) + (regexp ,cperl--eol-comment-regexp)))) + "Regular expression for a sequence of whitespace and comments in Perl.") + +(defconst cperl--ows-regexp + (rx-to-string + `(opt (regexp ,cperl--ws-or-comment-regexp))) + "Regular expression for optional whitespaces or comments in Perl") + +(defconst cperl--version-regexp + (rx-to-string + `(or + (sequence (opt "v") + (>= 2 (sequence (1+ digit) ".")) + (1+ digit) + (opt (sequence "_" (1+ word)))) + (sequence (1+ digit) + (opt (sequence "." (1+ digit))) + (opt (sequence "_" (1+ word)))))) + "A sequence for recommended version number schemes in Perl.") + +(defconst cperl--package-regexp + (rx-to-string + `(sequence + "package" ; FIXME: the "class" and "role" keywords need to be + ; recognized soon...ish. + (regexp ,cperl--ws-or-comment-regexp) + (group (regexp ,cperl--normal-identifier-regexp)) + (opt + (sequence + (1+ (regexp ,cperl--ws-or-comment-regexp)) + (group (regexp ,cperl--version-regexp)))))) + "A regular expression for package NAME VERSION in Perl. +Contains two groups for the package name and version.") + +(defconst cperl--package-for-imenu-regexp + (rx-to-string + `(sequence + (regexp ,cperl--package-regexp) + (regexp ,cperl--ows-regexp) + (group (or ";" "{")))) + "A regular expression to collect package names for `imenu`. +Catches \"package NAME;\", \"package NAME VERSION;\", \"package +NAME BLOCK\" and \"package NAME VERSION BLOCK.\" Contains three +groups: Two from `cperl--package-regexp` for the package name and +version, and a third to detect \"package BLOCK\" syntax.") + +(defconst cperl--sub-name-regexp + (rx-to-string + `(sequence + (optional (sequence (group (or "my" "state" "our")) + (regexp ,cperl--ws-or-comment-regexp))) + "sub" ; FIXME: the "method" and maybe "fun" keywords need to be + ; recognized soon...ish. + (regexp ,cperl--ws-or-comment-regexp) + (group (regexp ,cperl--normal-identifier-regexp)))) + "A regular expression to detect a subroutine start. +Contains two groups: One for to distinguish lexical from +\"normal\" subroutines and one for the subroutine name.") + +(defconst cperl--pod-heading-regexp + (rx-to-string + `(sequence + line-start "=head" + (group (in "1-4")) + (1+ (in " \t")) + (group (1+ (not (in "\n")))) + line-end)) ; that line-end seems to be redundant? + "A regular expression to detect a POD heading. +Contains two groups: One for the heading level, and one for the +heading text.") + +(defconst cperl--imenu-entries-regexp + (rx-to-string + `(or + (regexp ,cperl--package-for-imenu-regexp) ; 1..3 + (regexp ,cperl--sub-name-regexp) ; 4..5 + (regexp ,cperl--pod-heading-regexp))) ; 6..7 + "A regular expression to collect stuff that goes into the `imenu` index. +Covers packages, subroutines, and POD headings.") + + ;; These two must be unwound, otherwise take exponential time (defconst cperl-maybe-white-and-comment-rex "[ \t\n]*\\(#[^\n]*\n[ \t\n]*\\)*" "Regular expression to match optional whitespace with interspersed comments. @@ -1227,8 +1368,7 @@ Should contain exactly one group.") Should contain exactly one group.") -;; Is incorporated in `cperl-imenu--function-name-regexp-perl' -;; `cperl-outline-regexp', `defun-prompt-regexp'. +;; Is incorporated in `cperl-outline-regexp', `defun-prompt-regexp'. ;; Details of groups in this may be used in several functions; see comments ;; near mentioned above variable(s)... ;; sub($$):lvalue{} sub:lvalue{} Both allowed... @@ -5147,117 +5287,80 @@ indentation and initial hashes. Behaves usually outside of comment." ;; Previous space could have gone: (or (memq (preceding-char) '(?\s ?\t)) (insert " ")))))) -(defun cperl-imenu-addback (lst &optional isback name) - ;; We suppose that the lst is a DAG, unless the first element only - ;; loops back, and ISBACK is set. Thus this function cannot be - ;; applied twice without ISBACK set. - (cond ((not cperl-imenu-addback) lst) - (t - (or name - (setq name "+++BACK+++")) - (mapc (lambda (elt) - (if (and (listp elt) (listp (cdr elt))) - (progn - ;; In the other order it goes up - ;; one level only ;-( - (setcdr elt (cons (cons name lst) - (cdr elt))) - (cperl-imenu-addback (cdr elt) t name)))) - (if isback (cdr lst) lst)) - lst))) - -(defun cperl-imenu--create-perl-index (&optional regexp) - (require 'imenu) ; May be called from TAGS creator - (let ((index-alist '()) (index-pack-alist '()) (index-pod-alist '()) +(defun cperl-imenu--create-perl-index () + "Implement `imenu-create-index-function` for CPerl mode. +This function relies on syntaxification to exclude lines which +look like declarations but actually are part of a string, a +comment, or POD." + (interactive) ; We'll remove that at some point + (goto-char (point-min)) + (cperl-update-syntaxification (point-max)) + (let ((case-fold-search nil) + (index-alist '()) + (index-package-alist '()) + (index-pod-alist '()) + (index-sub-alist '()) (index-unsorted-alist '()) - (index-meth-alist '()) meth - packages ends-ranges p marker is-proto - is-pack index index1 name (end-range 0) package) - (goto-char (point-min)) - (cperl-update-syntaxification (point-max)) - ;; Search for the function - (progn ;;save-match-data - (while (re-search-forward - (or regexp cperl-imenu--function-name-regexp-perl) - nil t) - ;; 2=package-group, 5=package-name 8=sub-name + (package-stack '()) ; for package NAME BLOCK + (current-package "(main)") + (current-package-end (point-max))) ; end of package scope + ;; collect index entries + (while (re-search-forward cperl--imenu-entries-regexp nil t) + ;; First, check whether we have left the scope of previously + ;; recorded packages, and if so, eliminate them from the stack. + (while (< current-package-end (point)) + (setq current-package (pop package-stack)) + (setq current-package-end (pop package-stack))) + (let ((state (syntax-ppss)) + name marker) ; for the "current" entry (cond - ((and ; Skip some noise if building tags - (match-beginning 5) ; package name - ;;(eq (char-after (match-beginning 2)) ?p) ; package - (not (save-match-data - (looking-at "[ \t\n]*;")))) ; Plain text word 'package' - nil) - ((and - (or (match-beginning 2) - (match-beginning 8)) ; package or sub - ;; Skip if quoted (will not skip multi-line ''-strings :-(): - (null (get-text-property (match-beginning 1) 'syntax-table)) - (null (get-text-property (match-beginning 1) 'syntax-type)) - (null (get-text-property (match-beginning 1) 'in-pod))) - (setq is-pack (match-beginning 2)) - ;; (if (looking-at "([^()]*)[ \t\n\f]*") - ;; (goto-char (match-end 0))) ; Messes what follows - (setq meth nil - p (point)) - (while (and ends-ranges (>= p (car ends-ranges))) - ;; delete obsolete entries - (setq ends-ranges (cdr ends-ranges) packages (cdr packages))) - (setq package (or (car packages) "") - end-range (or (car ends-ranges) 0)) - (if is-pack ; doing "package" - (progn - (if (match-beginning 5) ; named package - (setq name (buffer-substring (match-beginning 5) - (match-end 5)) - name (progn - (set-text-properties 0 (length name) nil name) - name) - package (concat name "::") - name (concat "package " name)) - ;; Support nameless packages - (setq name "package;" package "")) - (setq end-range - (save-excursion - (parse-partial-sexp (point) (point-max) -1) (point)) - ends-ranges (cons end-range ends-ranges) - packages (cons package packages))) - (setq is-proto - (or (eq (following-char) ?\;) - (eq 0 (get-text-property (point) 'attrib-group))))) - ;; Skip this function name if it is a prototype declaration. - (if (and is-proto (not is-pack)) nil - (or is-pack - (setq name - (buffer-substring (match-beginning 8) (match-end 8))) - (set-text-properties 0 (length name) nil name)) - (setq marker (make-marker)) - (set-marker marker (match-end (if is-pack 2 8))) - (cond (is-pack nil) - ((string-match "[:']" name) - (setq meth t)) - ((> p end-range) nil) - (t - (setq name (concat package name) meth t))) - (setq index (cons name marker)) - (if is-pack - (push index index-pack-alist) - (push index index-alist)) - (if meth (push index index-meth-alist)) - (push index index-unsorted-alist))) - ((match-beginning 16) ; POD section - (setq name (buffer-substring (match-beginning 17) (match-end 17)) - marker (make-marker)) - (set-marker marker (match-beginning 17)) - (set-text-properties 0 (length name) nil name) - (setq name (concat (make-string - (* 3 (- (char-after (match-beginning 16)) ?1)) - ?\ ) - name) - index (cons name marker)) - (setq index1 (cons (concat "=" name) (cdr index))) - (push index index-pod-alist) - (push index1 index-unsorted-alist))))) + ((nth 3 state) nil) ; matched in a string, so skip + ((match-string 1) ; found a package name! + (unless (nth 4 state) ; skip if in a comment + (setq name (match-string-no-properties 1) + marker (copy-marker (match-end 1))) + (if (string= (match-string 3) ";") + (setq current-package name) ; package NAME; + ;; No semicolon, therefore we have: package NAME BLOCK. + ;; Stash the current package, because we need to restore + ;; it after the end of BLOCK. + (push current-package-end package-stack) + (push current-package package-stack) + ;; record the current name and its scope + (setq current-package name) + (setq current-package-end (save-excursion + (goto-char (match-beginning 3)) + (forward-sexp) + (point))) + (push (cons name marker) index-package-alist) + (push (cons (concat "package " name) marker) index-unsorted-alist)))) + ((match-string 5) ; found a sub name! + (unless (nth 4 state) ; skip if in a comment + (setq name (match-string-no-properties 5) + marker (copy-marker (match-end 5))) + ;; Qualify the sub name with the package if it doesn't + ;; already have one, and if it isn't lexically scoped. + ;; "my" and "state" subs are lexically scoped, but "our" + ;; are just lexical aliases to package subs. + (if (and (null (string-match "::" name)) + (or (null (match-string 4)) + (string-equal (match-string 4) "our"))) + (setq name (concat current-package "::" name))) + (let ((index (cons name marker))) + (push index index-alist) + (push index index-sub-alist) + (push index index-unsorted-alist)))) + ((match-string 6) ; found a POD heading! + (when (get-text-property (match-beginning 6) 'in-pod) + (setq name (concat (make-string + (* 3 (- (char-after (match-beginning 6)) ?1)) + ?\ ) + (match-string-no-properties 7)) + marker (copy-marker (match-beginning 7))) + (push (cons name marker) index-pod-alist) + (push (cons (concat "=" name) marker) index-unsorted-alist))) + (t (error "Unidentified match: %s" (match-string 0)))))) + ;; Now format the collected stuff (setq index-alist (if (default-value 'imenu-sort-function) (sort index-alist (default-value 'imenu-sort-function)) @@ -5266,14 +5369,14 @@ indentation and initial hashes. Behaves usually outside of comment." (push (cons "+POD headers+..." (nreverse index-pod-alist)) index-alist)) - (and (or index-pack-alist index-meth-alist) - (let ((lst index-pack-alist) hier-list pack elt group name) - ;; Remove "package ", reverse and uniquify. + (and (or index-package-alist index-sub-alist) + (let ((lst index-package-alist) hier-list pack elt group name) + ;; reverse and uniquify. (while lst - (setq elt (car lst) lst (cdr lst) name (substring (car elt) 8)) + (setq elt (car lst) lst (cdr lst) name (car elt)) (if (assoc name hier-list) nil (setq hier-list (cons (cons name (cdr elt)) hier-list)))) - (setq lst index-meth-alist) + (setq lst index-sub-alist) (while lst (setq elt (car lst) lst (cdr lst)) (cond ((string-match "\\(::\\|'\\)[_a-zA-Z0-9]+$" (car elt)) @@ -5301,17 +5404,18 @@ indentation and initial hashes. Behaves usually outside of comment." (push (cons "+Hierarchy+..." hier-list) index-alist))) - (and index-pack-alist + (and index-package-alist (push (cons "+Packages+..." - (nreverse index-pack-alist)) + (nreverse index-package-alist)) index-alist)) - (and (or index-pack-alist index-pod-alist + (and (or index-package-alist index-pod-alist (default-value 'imenu-sort-function)) index-unsorted-alist (push (cons "+Unsorted List+..." (nreverse index-unsorted-alist)) index-alist)) - (cperl-imenu-addback index-alist))) + ;; Finally, return the whole collection + index-alist)) ;; Suggested by Mark A. Hershberger @@ -6631,9 +6735,7 @@ One may build such TAGS files from CPerl mode menu." (cperl-tags-treeify to 1) (setcar (nthcdr 2 cperl-hierarchy) (cperl-menu-to-keymap (cons '("+++UPDATE+++" . -999) (cdr to)))) - (message "Updating list of classes: done, requesting display...") - ;;(cperl-imenu-addback (nth 2 cperl-hierarchy)) - )) + (message "Updating list of classes: done, requesting display..."))) (or (nth 2 cperl-hierarchy) (error "No items found")) (setq update diff --git a/test/lisp/progmodes/cperl-mode-resources/grammar.pl b/test/lisp/progmodes/cperl-mode-resources/grammar.pl new file mode 100644 index 00000000000..c05fd7efc2a --- /dev/null +++ b/test/lisp/progmodes/cperl-mode-resources/grammar.pl @@ -0,0 +1,158 @@ +use 5.024; +use strict; +use warnings; + +sub outside { + say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}'"; +} + +package Package; + +=head1 NAME + +grammar - A Test resource for regular expressions + +=head1 SYNOPSIS + +A Perl file showing a variety of declarations + +=head1 DESCRIPTION + +This file offers several syntactical constructs for packages, +subroutines, and POD to test the imenu capabilities of CPerl mode. + +Perl offers syntactical variations for package and subroutine +declarations. Packages may, or may not, have a version and may, or +may not, have a block of code attached to them. Subroutines can have +old-style prototypes, attributes, and signatures which are still +experimental but widely accepted. + +Various Extensions and future Perl versions will probably add new +keywords for "class" and "method", both with syntactical extras of +their own. + +This test file tries to keep up with them. + +=head2 Details + +The code is supposed to identify and exclude false positives, +e.g. declarations in a string or in POD, as well as POD in a string. +These should not go into the imenu index. + +=cut + +our $VERSION = 3.1415; +say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}', version $VERSION"; + +sub in_package { + # Special test for POD: A line which looks like POD, but actually + # is part of a multiline string. In the case shown here, the + # semicolon is not part of the string, but POD headings go to the + # end of the line. The code needs to distinguish between a POD + # heading "This Is Not A Pod/;" and a multiline string. + my $not_a_pod = q/Another false positive: + +=head1 This Is Not A Pod/; + +} + +sub Shoved::elsewhere { + say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}', sub Shoved::elsewhere"; +} + +sub prototyped ($$) { + ...; +} + +package Versioned::Package 0.07; +say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}', version $VERSION"; + +sub versioned { + # This sub is in package Versioned::Package + say "sub 'versioned' in package '", __PACKAGE__, "'"; +} + +versioned(); + +my $false_positives = <<'EOH'; +The following declarations are not supposed to be recorded for imenu. +They are in a HERE-doc, which is a generic comment in CPerl mode. + +package Don::T::Report::This; +sub this_is_no_sub { + my $self = shuffle; +} + +And this is not a POD heading: + +=head1 Not a POD heading, just a string. + +EOH + +package Block { + our $VERSION = 2.7182; + say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}', version $VERSION"; + + sub attr:lvalue { + say "sub 'attr' in package '", __PACKAGE__, "'"; + } + + attr(); + + package Block::Inner { + # This hopefully doesn't happen too often. + say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}', version $VERSION"; + } + + # Now check that we're back to package "Block" + say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}', version $VERSION"; +} + +sub outer { + # This is in package Versioned::Package + say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}', version $VERSION"; +} + +outer(); + +package Versioned::Block 42 { + say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}', version $VERSION"; + + my sub lexical { + say "sub 'lexical' in package '", __PACKAGE__, "'"; + } + + lexical(); + + use experimental 'signatures'; + sub signatured :prototype($@) ($self,@rest) + { + ...; + } +} + +# After all is said and done, we're back in package Versioned::Package. +say "We're in package '", __PACKAGE__, "' now."; +say "Now try to call a subroutine which went out of scope:"; +eval { lexical() }; +say $@ if $@; + +# Now back to Package. This must not appear separately in the +# hierarchy list. +package Package; + +our sub in_package_again { + say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}', version $VERSION"; +} + + +package :: { + # This is just a weird, but legal, package name. + say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}', version $VERSION"; + + in_package_again(); # weird, but calls the sub from above +} + +Shoved::elsewhere(); + +1; diff --git a/test/lisp/progmodes/cperl-mode-tests.el b/test/lisp/progmodes/cperl-mode-tests.el index 943c454445c..61e4ece49b7 100644 --- a/test/lisp/progmodes/cperl-mode-tests.el +++ b/test/lisp/progmodes/cperl-mode-tests.el @@ -166,6 +166,101 @@ point in the distant past, and is still broken in perl-mode. " (if (match-beginning 3) 0 perl-indent-level))))))) +;;; Grammar based tests: unit tests + +(defun cperl-test--validate-regexp (regexp valid &optional invalid) + "Runs tests for elements of VALID and INVALID lists against REGEXP. +Tests with elements from VALID must match, tests with elements +from INVALID must not match. The match string must be equal to +the whole string." + (funcall cperl-test-mode) + (dolist (string valid) + (should (string-match regexp string)) + (should (string= (match-string 0 string) string))) + (when invalid + (dolist (string invalid) + (should-not + (and (string-match regexp string) + (string= (match-string 0 string) string)))))) + +(ert-deftest cperl-test-ws-regexp () + "Tests capture of very simple regular expressions (yawn)." + (let ((valid + '(" " "\t" "\n")) + (invalid + '("a" " " ""))) + (cperl-test--validate-regexp cperl--ws-regexp + valid invalid))) + +(ert-deftest cperl-test-ws-or-comment-regexp () + "Tests sequences of whitespace and comment lines." + (let ((valid + `(" " "\t#\n" "\n# \n" + ,(concat "# comment\n" "# comment\n" "\n" "#comment\n"))) + (invalid + '("=head1 NAME\n" ))) + (cperl-test--validate-regexp cperl--ws-or-comment-regexp + valid invalid))) + +(ert-deftest cperl-test-version-regexp () + "Tests the regexp for recommended syntax of versions in Perl." + (let ((valid + '("1" "1.1" "1.1_1" "5.032001" + "v120.100.103")) + (invalid + '("alpha" "0." ".123" "1E2" + "v1.1" ; a "v" version string needs at least 3 components + ;; bad examples from "Version numbers should be boring" + ;; by xdg AKA David A. Golden + "1.20alpha" "2.34beta2" "2.00R3"))) + (cperl-test--validate-regexp cperl--version-regexp + valid invalid))) + +(ert-deftest cperl-test-package-regexp () + "Tests the regular expression of Perl package names with versions. +Also includes valid cases with whitespace in strange places." + (let ((valid + '("package Foo" + "package Foo::Bar" + "package Foo::Bar v1.2.3" + "package Foo::Bar::Baz 1.1" + "package \nFoo::Bar\n 1.00")) + (invalid + '("package Foo;" ; semicolon must not be included + "package Foo 1.1 {" ; nor the opening brace + "packageFoo" ; not a package declaration + "package Foo1.1" ; invalid package name + "class O3D::Sphere"))) ; class not yet supported + (cperl-test--validate-regexp cperl--package-regexp + valid invalid))) + +;;; Function test: Building an index for imenu + +(ert-deftest cperl-test-imenu-index () + "Test index creation for imenu. +This test relies on the specific layout of the index alist as +created by CPerl mode, so skip it for Perl mode." + (skip-unless (eq cperl-test-mode #'cperl-mode)) + (with-temp-buffer + (insert-file (ert-resource-file "grammar.pl")) + (cperl-mode) + (let ((index (cperl-imenu--create-perl-index)) + current-list) + (setq current-list (assoc-string "+Unsorted List+..." index)) + (should current-list) + (let ((expected '("(main)::outside" + "Package::in_package" + "Shoved::elsewhere" + "Package::prototyped" + "Versioned::Package::versioned" + "Block::attr" + "Versioned::Package::outer" + "lexical" + "Versioned::Block::signatured" + "Package::in_package_again"))) + (dolist (sub expected) + (should (assoc-string sub index))))))) + ;;; Tests for issues reported in the Bug Tracker (defun cperl-test--run-bug-10483 ()