cperl-mode.el: Allow non-ASCII Perl identifiers

Replace all "A-Z" regexp literals with unicode-aware rx constructs
wherever Perl allows non-ASCII identifiers.
* lisp/progmodes/cperl-mode.el (cperl-after-sub-regexp)
(cperl-after-label. cperl-sniff-for-indent)
(cperl-find-pods-heres, cperl-indent-exp)
(cperl-fix-line-spacing, cperl-imenu--create-perl-index)
(cperl-init-faces, cperl-find-tags):
Replace ASCII regex literals by unicode-aware rx constructs.
(cperl-init-faces): Eliminate unused lexical `font-lock-anchored'.
(cperl-have-help-regexp, cperl-word-at-point-hard): Allow non-ASCII
word characters.

* test/lisp/progmodes/cperl-mode-tests.el
(cperl-test-fontify-special-variables): New test for $^T
and $^{VARNAME}.
(cperl-test-ws-rx cperl-test-ws+-rx),
(cperl-test-version-regexp, cperl-test-package-regexp): Skip
for perl-mode.
(cperl-test-identifier-rx, cperl--test-unicode-setup)
(cperl-test-unicode-labels, cperl-test-unicode-sub)
(cperl-test-unicode-varname)
(cperl-test-unicode-varname-list, cperl-test-unicode-arrays)
(cperl-test-unicode-hashes, cperl-test-unicode-hashref)
(cperl-test-unicode-proto, cperl-test-unicode-fhs)
(cperl-test-unicode-hashkeys, cperl-test-word-at-point):
New tests for unicode identifiers.
 (cperl-test-imenu-index): Add a unicode identifier to the test.

* test/lisp/progmodes/cperl-mode-resources/grammar.pl: Add a
function with non-ASCII name for imenu tests.
This commit is contained in:
Harald Jörg 2021-09-14 17:53:52 +02:00
parent 89068554d7
commit 3d49ad73e5
3 changed files with 545 additions and 100 deletions

View file

@ -1407,7 +1407,7 @@ the last)."
(concat ; Assume n groups before this...
"\\(" ; n+1=name-group
cperl-white-and-comment-rex ; n+2=pre-name
"\\(::[a-zA-Z_0-9:']+\\|[a-zA-Z_'][a-zA-Z_0-9:']*\\)" ; n+3=name
(rx-to-string `(group ,cperl--normal-identifier-rx))
"\\)" ; END n+1=name-group
(if named "" "?")
"\\(" ; n+4=proto-group
@ -2573,7 +2573,8 @@ Return the amount the indentation changed by."
'(?w ?_))
(progn
(backward-sexp)
(looking-at "[a-zA-Z_][a-zA-Z0-9_]*:[^:]"))))
(looking-at (rx (sequence (eval cperl--label-rx)
(not (in ":"))))))))
(defun cperl-get-state (&optional parse-start start-state)
"Return list (START STATE DEPTH PRESTART),
@ -2740,7 +2741,9 @@ Will not look before LIM."
(progn
(forward-sexp -1)
(skip-chars-backward " \t")
(looking-at "[ \t]*[a-zA-Z_][a-zA-Z_0-9]*[ \t]*:")))
(looking-at
(rx (sequence (0+ blank)
(eval cperl--label-rx))))))
(get-text-property (point) 'first-format-line)))
;; Look at previous line that's at column 0
@ -3836,7 +3839,8 @@ recursive calls in starting lines of here-documents."
"\\<" cperl-sub-regexp "\\>" ; sub with proto/attr
"\\("
cperl-white-and-comment-rex
"\\(::[a-zA-Z_:'0-9]*\\|[a-zA-Z_'][a-zA-Z_:'0-9]*\\)\\)?" ; name
(rx (group (eval cperl--normal-identifier-rx)))
"\\)"
"\\("
cperl-maybe-white-and-comment-rex
"\\(([^()]*)\\|:[^:]\\)\\)" ; prototype or attribute start
@ -4111,10 +4115,12 @@ recursive calls in starting lines of here-documents."
(t t))))
;; <file> or <$file>
(and (eq c ?\<)
;; Do not stringify <FH>, <$fh> :
;; Stringify what looks like a glob, but
;; do not stringify file handles <FH>, <$fh> :
(save-match-data
(looking-at
"\\$?\\([_a-zA-Z:][_a-zA-Z0-9:]*\\)?>"))))
(rx (sequence (opt "$")
(eval cperl--normal-identifier-rx)))))))
tb (match-beginning 0))
(goto-char (match-beginning b1))
(cperl-backward-to-noncomment (point-min))
@ -4184,7 +4190,16 @@ recursive calls in starting lines of here-documents."
(error nil)))
(if (or bb
(looking-at ; $foo -> {s}
"[$@]\\$*\\([a-zA-Z0-9_:]+\\|[^{]\\)\\([ \t\n]*->\\)?[ \t\n]*{")
(rx
(sequence
(in "$@") (0+ "$")
(or
(eval cperl--normal-identifier-rx)
(not (in "{")))
(opt (sequence (eval cperl--ws*-rx))
"->")
(eval cperl--ws*-rx)
"{")))
(and ; $foo[12] -> {s}
(memq (following-char) '(?\{ ?\[))
(progn
@ -4199,7 +4214,12 @@ recursive calls in starting lines of here-documents."
(setq bb t))
((and (eq (following-char) ?:)
(eq b1 ?\{) ; Check for $ { s::bar }
(looking-at "::[a-zA-Z0-9_:]*[ \t\n\f]*}")
;; (looking-at "::[a-zA-Z0-9_:]*[ \t\n\f]*}")
(looking-at
(rx (sequence "::"
(eval cperl--normal-identifier-rx)
(eval cperl--ws*-rx)
"}")))
(progn
(goto-char (1- go))
(skip-chars-backward " \t\n\f")
@ -4364,7 +4384,7 @@ recursive calls in starting lines of here-documents."
"\\(" ;; XXXX 1-char variables, exc. |()\s
"[$@]"
"\\("
"[_a-zA-Z:][_a-zA-Z0-9:]*"
(rx (eval cperl--normal-identifier-rx))
"\\|"
"{[^{}]*}" ; only one-level allowed
"\\|"
@ -4820,6 +4840,7 @@ recursive calls in starting lines of here-documents."
(progn
(backward-sexp)
;; sub {BLK}, print {BLK} $data, but NOT `bless', `return', `tr', `constant'
;; a-zA-Z is fine here, these are Perl keywords
(or (and (looking-at "[a-zA-Z0-9_:]+[ \t\n\f]*[{#]") ; Method call syntax
(not (looking-at "\\(bless\\|return\\|q[wqrx]?\\|tr\\|[smy]\\|constant\\)\\>")))
;; sub bless::foo {}
@ -5028,7 +5049,11 @@ conditional/loop constructs."
cperl-maybe-white-and-comment-rex
"\\(state\\|my\\|local\\|our\\)\\)?"
cperl-maybe-white-and-comment-rex
"\\$[_a-zA-Z0-9]+\\)?\\)\\>"))
(rx
(sequence
"$"
(eval cperl--basic-identifier-rx)))
"\\)?\\)\\>"))
(progn
(goto-char top)
(forward-sexp 1)
@ -5122,7 +5147,14 @@ Returns some position at the last line."
;; Looking at:
;; foreach my $var (
(if (looking-at
"[ \t]*\\<for\\(each\\)?[ \t]+\\(state\\|my\\|local\\|our\\)[ \t]*\\$[_a-zA-Z0-9]+\\(\t*\\|[ \t][ \t]+\\)[^ \t\n#]")
(rx (sequence (0+ blank) symbol-start
"for" (opt "each")
(1+ blank)
(or "state" "my" "local" "our")
(0+ blank)
"$" (eval cperl--basic-identifier-rx)
(1+ blank)
(not (in " \t\n#")))))
(progn
(forward-sexp 3)
(delete-horizontal-space)
@ -5132,9 +5164,25 @@ Returns some position at the last line."
;; Looking at (with or without "}" at start, ending after "({"):
;; } foreach my $var () OR {
(if (looking-at
"[ \t]*\\(}[ \t]*\\)?\\<\\(els\\(e\\|if\\)\\|continue\\|if\\|unless\\|while\\|for\\(each\\)?\\(\\([ \t]+\\(state\\|my\\|local\\|our\\)\\)?[ \t]*\\$[_a-zA-Z0-9]+\\)?\\|until\\)\\>\\([ \t]*(\\|[ \t\n]*{\\)\\|[ \t]*{")
(rx (sequence
(0+ blank)
(opt (sequence "}" (0+ blank) ))
symbol-start
(or "else" "elsif" "continue" "if" "unless" "while" "until"
(sequence (or "for" "foreach")
(opt
(opt (sequence (1+ blank)
(or "state" "my" "local" "our")))
(0+ blank)
"$" (eval cperl--basic-identifier-rx))))
symbol-end
(group-n 1
(or
(or (sequence (0+ blank) "(")
(sequence (eval cperl--ws*-rx) "{"))
(sequence (0+ blank) "{"))))))
(progn
(setq ml (match-beginning 8)) ; "(" or "{" after control word
(setq ml (match-beginning 1)) ; "(" or "{" after control word
(re-search-forward "[({]")
(forward-char -1)
(setq p (point))
@ -5544,7 +5592,11 @@ comment, or POD."
(setq lst index-sub-alist)
(while lst
(setq elt (car lst) lst (cdr lst))
(cond ((string-match "\\(::\\|'\\)[_a-zA-Z0-9]+$" (car elt))
(cond ((string-match
(rx (sequence (or "::" "'")
(eval cperl--basic-identifier-rx)
string-end))
(car elt))
(setq pack (substring (car elt) 0 (match-beginning 0)))
(if (setq group (assoc pack hier-list))
(if (listp (cdr group))
@ -5646,8 +5698,7 @@ default function."
(defun cperl-init-faces ()
(condition-case errs
(progn
(let (t-font-lock-keywords t-font-lock-keywords-1 font-lock-anchored)
(setq font-lock-anchored t)
(let (t-font-lock-keywords t-font-lock-keywords-1)
(setq
t-font-lock-keywords
(list
@ -5760,20 +5811,41 @@ default function."
(if (eq (char-after (cperl-1- (match-end 0))) ?\{ )
'font-lock-function-name-face
'font-lock-variable-name-face))))
'("\\<\\(package\\|require\\|use\\|import\\|no\\|bootstrap\\)[ \t]+\\([a-zA-Z_][a-zA-Z_0-9:]*\\)[ \t;]" ; require A if B;
2 font-lock-function-name-face)
`(,(rx (sequence symbol-start
(or "package" "require" "use" "import"
"no" "bootstrap")
(eval cperl--ws+-rx)
(group-n 1 (eval cperl--normal-identifier-rx))
(any " \t;"))) ; require A if B;
1 font-lock-function-name-face)
'("^[ \t]*format[ \t]+\\([a-zA-Z_][a-zA-Z_0-9:]*\\)[ \t]*=[ \t]*$"
1 font-lock-function-name-face)
(cond (font-lock-anchored
'("\\([]}\\%@>*&]\\|\\$[a-zA-Z0-9_:]*\\)[ \t]*{[ \t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}"
(2 font-lock-string-face t)
("\\=[ \t]*{[ \t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}"
nil nil
(1 font-lock-string-face t))))
(t '("\\([]}\\%@>*&]\\|\\$[a-zA-Z0-9_:]*\\)[ \t]*{[ \t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}"
2 font-lock-string-face t)))
'("[[ \t{,(]\\(-?[a-zA-Z0-9_:]+\\)[ \t]*=>" 1
font-lock-string-face t)
;; bareword hash key: $foo{bar}
`(,(rx (or (in "]}\\%@>*&") ; What Perl is this?
(sequence "$" (eval cperl--normal-identifier-rx)))
(0+ blank) "{" (0+ blank)
(group-n 1 (sequence (opt "-")
(eval cperl--basic-identifier-rx)))
(0+ blank) "}")
;; '("\\([]}\\%@>*&]\\|\\$[a-zA-Z0-9_:]*\\)[ \t]*{[ \t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}"
(1 font-lock-string-face t)
;; anchored bareword hash key: $foo{bar}{baz}
(,(rx point
(0+ blank) "{" (0+ blank)
(group-n 1 (sequence (opt "-")
(eval cperl--basic-identifier-rx)))
(0+ blank) "}")
;; ("\\=[ \t]*{[ \t]*\\(-?[a-zA-Z0-9_:]+\\)[ \t]*}"
nil nil
(1 font-lock-string-face t)))
;; hash element assignments with bareword key => value
`(,(rx (in "[ \t{,()")
(group-n 1 (sequence (opt "-")
(eval cperl--basic-identifier-rx)))
(0+ blank) "=>")
1 font-lock-string-face t)
;; '("[[ \t{,(]\\(-?[a-zA-Z0-9_:]+\\)[ \t]*=>" 1
;; font-lock-string-face t)
;; labels
`(,(rx
(sequence
@ -5797,83 +5869,130 @@ default function."
;;; '("[$*]{?\\(\\sw+\\)" 1 font-lock-variable-name-face)
;;; '("\\([@%]\\|\\$#\\)\\(\\sw+\\)"
;;; (2 (cons font-lock-variable-name-face '(underline))))
(cond (font-lock-anchored
;; 1=my_etc, 2=white? 3=(+white? 4=white? 5=var
`(,(concat "\\<\\(state\\|my\\|local\\|our\\)"
cperl-maybe-white-and-comment-rex
"\\(("
cperl-maybe-white-and-comment-rex
"\\)?\\([$@%*]\\([a-zA-Z0-9_:]+\\|[^a-zA-Z0-9_]\\)\\)")
(5 ,(if cperl-font-lock-multiline
'font-lock-variable-name-face
'(progn (setq cperl-font-lock-multiline-start
(match-beginning 0))
'font-lock-variable-name-face)))
(,(concat "\\="
cperl-maybe-white-and-comment-rex
","
cperl-maybe-white-and-comment-rex
"\\([$@%*]\\([a-zA-Z0-9_:]+\\|[^a-zA-Z0-9_]\\)\\)")
;; Bug in font-lock: limit is used not only to limit
;; searches, but to set the "extend window for
;; facification" property. Thus we need to minimize.
,(if cperl-font-lock-multiline
'(if (match-beginning 3)
(save-excursion
(goto-char (match-beginning 3))
(condition-case nil
(forward-sexp 1)
(error
(condition-case nil
(forward-char 200)
(error nil)))) ; typeahead
(1- (point))) ; report limit
(forward-char -2)) ; disable continued expr
'(if (match-beginning 3)
(point-max) ; No limit for continuation
(forward-char -2))) ; disable continued expr
,(if cperl-font-lock-multiline
nil
'(progn ; Do at end
;; "my" may be already fontified (POD),
;; so cperl-font-lock-multiline-start is nil
(if (or (not cperl-font-lock-multiline-start)
(> 2 (count-lines
cperl-font-lock-multiline-start
(point))))
nil
(put-text-property
(1+ cperl-font-lock-multiline-start) (point)
'syntax-type 'multiline))
(setq cperl-font-lock-multiline-start nil)))
(3 font-lock-variable-name-face))))
(t '("^[ \t{}]*\\(state\\|my\\|local\\|our\\)[ \t]*\\(([ \t]*\\)?\\([$@%*][a-zA-Z0-9_:]+\\)"
3 font-lock-variable-name-face)))
'("\\<for\\(each\\)?\\([ \t]+\\(state\\|my\\|local\\|our\\)\\)?[ \t]*\\(\\$[a-zA-Z_][a-zA-Z_0-9]*\\)[ \t]*("
`(,(rx (sequence (or "state" "my" "local" "our"))
(eval cperl--ws*-rx)
(opt (sequence "(" (eval cperl--ws*-rx)))
(group
(in "$@%*")
(or
(eval cperl--normal-identifier-rx)
(eval cperl--special-identifier-rx))
)
)
;; (concat "\\<\\(state\\|my\\|local\\|our\\)"
;; cperl-maybe-white-and-comment-rex
;; "\\(("
;; cperl-maybe-white-and-comment-rex
;; "\\)?\\([$@%*]\\([a-zA-Z0-9_:]+\\|[^a-zA-Z0-9_]\\)\\)")
;; (5 ,(if cperl-font-lock-multiline
(1 ,(if cperl-font-lock-multiline
'font-lock-variable-name-face
'(progn (setq cperl-font-lock-multiline-start
(match-beginning 0))
'font-lock-variable-name-face)))
(,(rx (sequence point
(eval cperl--ws*-rx)
","
(eval cperl--ws*-rx)
(group
(in "$@%*")
(or
(eval cperl--normal-identifier-rx)
(eval cperl--special-identifier-rx))
)
)
)
;; ,(concat "\\="
;; cperl-maybe-white-and-comment-rex
;; ","
;; cperl-maybe-white-and-comment-rex
;; "\\([$@%*]\\([a-zA-Z0-9_:]+\\|[^a-zA-Z0-9_]\\)\\)")
;; Bug in font-lock: limit is used not only to limit
;; searches, but to set the "extend window for
;; facification" property. Thus we need to minimize.
,(if cperl-font-lock-multiline
'(if (match-beginning 1)
(save-excursion
(goto-char (match-beginning 1))
(condition-case nil
(forward-sexp 1)
(error
(condition-case nil
(forward-char 200)
(error nil)))) ; typeahead
(1- (point))) ; report limit
(forward-char -2)) ; disable continued expr
'(if (match-beginning 1)
(point-max) ; No limit for continuation
(forward-char -2))) ; disable continued expr
,(if cperl-font-lock-multiline
nil
'(progn ; Do at end
;; "my" may be already fontified (POD),
;; so cperl-font-lock-multiline-start is nil
(if (or (not cperl-font-lock-multiline-start)
(> 2 (count-lines
cperl-font-lock-multiline-start
(point))))
nil
(put-text-property
(1+ cperl-font-lock-multiline-start) (point)
'syntax-type 'multiline))
(setq cperl-font-lock-multiline-start nil)))
(1 font-lock-variable-name-face)))
;; foreach my $foo (
`(,(rx symbol-start "for" (opt "each")
(opt (sequence (1+ blank)
(or "state" "my" "local" "our")))
(0+ blank)
(group-n 1 (sequence "$"
(eval cperl--basic-identifier-rx)))
(0+ blank) "(")
;; '("\\<for\\(each\\)?\\([ \t]+\\(state\\|my\\|local\\|our\\)\\)?[ \t]*\\(\\$[a-zA-Z_][a-zA-Z_0-9]*\\)[ \t]*("
4 font-lock-variable-name-face)
;; Avoid $!, and s!!, qq!! etc. when not fontifying syntactically
'("\\(?:^\\|[^smywqrx$]\\)\\(!\\)" 1 font-lock-negation-char-face)
'("\\[\\(\\^\\)" 1 font-lock-negation-char-face prepend)))
(setq
t-font-lock-keywords-1
'(
("\\(\\([@%]\\|\\$#\\)[a-zA-Z_:][a-zA-Z0-9_:]*\\)" 1
`(
;; arrays and hashes. Access to elements is fixed below
(,(rx (group-n 1 (group-n 2 (or (in "@%") "$#"))
(eval cperl--normal-identifier-rx)))
1
;; ("\\(\\([@%]\\|\\$#\\)[a-zA-Z_:][a-zA-Z0-9_:]*\\)" 1
(if (eq (char-after (match-beginning 2)) ?%)
'cperl-hash-face
'cperl-array-face)
nil) ; arrays and hashes
("\\(\\([$@%]+\\)[a-zA-Z_:][a-zA-Z0-9_:]*\\)[ \t]*\\([[{]\\)"
;; access to array/hash elements
(,(rx (group-n 1 (group-n 2 (in "$@%"))
(eval cperl--normal-identifier-rx))
(0+ blank)
(group-n 3 (in "[{")))
;; ("\\(\\([$@%]+\\)[a-zA-Z_:][a-zA-Z0-9_:]*\\)[ \t]*\\([[{]\\)"
1
(if (= (- (match-end 2) (match-beginning 2)) 1)
(if (eq (char-after (match-beginning 3)) ?{)
'cperl-hash-face
'cperl-array-face) ; arrays and hashes
font-lock-variable-name-face) ; Just to put something
t)
("\\(@\\|\\$#\\)\\(\\$+\\([a-zA-Z_:][a-zA-Z0-9_:]*\\|[^ \t\n]\\)\\)"
t) ; override previous
;; @$ array dereferences, $#$ last array index
(,(rx (group-n 1 (or "@" "$#"))
(group-n 2 (sequence "$"
(or (eval cperl--normal-identifier-rx)
(not (in " \t\n"))))))
;; ("\\(@\\|\\$#\\)\\(\\$+\\([a-zA-Z_:][a-zA-Z0-9_:]*\\|[^ \t\n]\\)\\)"
(1 'cperl-array-face)
(2 font-lock-variable-name-face))
("\\(%\\)\\(\\$+\\([a-zA-Z_:][a-zA-Z0-9_:]*\\|[^ \t\n]\\)\\)"
;; %$ hash dereferences
(,(rx (group-n 1 "%")
(group-n 2 (sequence "$"
(or (eval cperl--normal-identifier-rx)
(not (in " \t\n"))))))
;; ("\\(%\\)\\(\\$+\\([a-zA-Z_:][a-zA-Z0-9_:]*\\|[^ \t\n]\\)\\)"
(1 'cperl-hash-face)
(2 font-lock-variable-name-face))
;;("\\([smy]\\|tr\\)\\([^a-z_A-Z0-9]\\)\\(\\([^\n\\]*||\\)\\)\\2")
@ -6435,6 +6554,8 @@ Will not move the position at the start to the left."
(indent-region beg end nil)
(goto-char beg)
(setq col (current-column))
;; Assuming that lineup is done on Perl syntax, this regexp
;; doesn't need to be unicode aware -- haj, 2021-09-10
(if (looking-at "[a-zA-Z0-9_]")
(if (looking-at "\\<[a-zA-Z0-9_]+\\>")
(setq search
@ -6472,6 +6593,9 @@ Will not move the position at the start to the left."
"Run etags with appropriate options for Perl files.
If optional argument ALL is `recursive', will process Perl files
in subdirectories too."
;; Apparently etags doesn't support UTF-8 encoded sources, and usage
;; of etags has been commented out in the menu since ... well,
;; forever. So, let's just stick to ASCII here. -- haj, 2021-09-14
(interactive)
(let ((cmd "etags")
(args `("-l" "none" "-r"
@ -6611,6 +6735,9 @@ Does not move point."
;; Search for the function
(progn ;;save-match-data
(while (re-search-forward
;; FIXME: Should XS code be unicode aware? Recent C
;; compilers (Gcc 10+) are, but I guess this isn't used
;; much. -- haj, 2021-09-14
"^\\([ \t]*MODULE\\>[^\n]*\\<PACKAGE[ \t]*=[ \t]*\\([a-zA-Z_][a-zA-Z_0-9:]*\\)\\>\\|\\([a-zA-Z_][a-zA-Z_0-9]*\\)(\\|[ \t]*BOOT:\\)"
nil t)
(cond
@ -6673,7 +6800,7 @@ Does not move point."
(setq lst
(mapcar
(lambda (elt)
(cond ((string-match "^[_a-zA-Z]" (car elt))
(cond ((string-match (rx line-start (or alpha "_")) (car elt))
(goto-char (cdr elt))
(beginning-of-line) ; pos should be of the start of the line
(list (car elt)
@ -6703,9 +6830,14 @@ Does not move point."
","
(number-to-string (1- (elt elt 1))) ; Char pos 0-based
"\n")
(if (and (string-match "^[_a-zA-Z]+::" (car elt))
(string-match (concat "^" cperl-sub-regexp "[ \t]+\\([_a-zA-Z]+\\)[^:_a-zA-Z]")
(elt elt 3)))
(if (and (string-match (rx line-start
(eval cperl--basic-identifier-rx) "++")
(car elt))
(string-match (rx-to-string `(sequence line-start
(regexp ,cperl-sub-regexp)
(1+ (in " \t"))
,cperl--normal-identifier-rx))
(elt elt 3)))
;; Need to insert the name without package as well
(setq lst (cons (cons (substring (elt elt 3)
(match-beginning 1)
@ -7155,14 +7287,14 @@ Currently it is tuned to C and Perl syntax."
;;(concat "\\("
(mapconcat
#'identity
'("[$@%*&][0-9a-zA-Z_:]+\\([ \t]*[[{]\\)?" ; Usual variable
'("[$@%*&][[:alnum:]_:]+\\([ \t]*[[{]\\)?" ; Usual variable
"[$@]\\^[a-zA-Z]" ; Special variable
"[$@][^ \n\t]" ; Special variable
"-[a-zA-Z]" ; File test
"\\\\[a-zA-Z0]" ; Special chars
"^=[a-z][a-zA-Z0-9_]*" ; POD sections
"[-!&*+,./<=>?\\^|~]+" ; Operator
"[a-zA-Z_0-9:]+" ; symbol or number
"[[:alnum:]_:]+" ; symbol or number
"x="
"#!")
;;"\\)\\|\\("
@ -7178,7 +7310,7 @@ Currently it is tuned to C and Perl syntax."
;; Does not save-excursion
;; Get to the something meaningful
(or (eobp) (eolp) (forward-char 1))
(re-search-backward "[-a-zA-Z0-9_:!&*+,./<=>?\\^|~$%@]"
(re-search-backward "[-[:alnum:]_:!&*+,./<=>?\\^|~$%@]"
(point-at-bol)
'to-beg)
;; (cond
@ -7187,8 +7319,8 @@ Currently it is tuned to C and Perl syntax."
;; (or (bobp) (backward-char 1))))
;; Try to backtrace
(cond
((looking-at "[a-zA-Z0-9_:]") ; symbol
(skip-chars-backward "a-zA-Z0-9_:")
((looking-at "[[:alnum:]_:]") ; symbol
(skip-chars-backward "[:alnum:]_:")
(cond
((and (eq (preceding-char) ?^) ; $^I
(eq (char-after (- (point) 2)) ?\$))
@ -7199,7 +7331,7 @@ Currently it is tuned to C and Perl syntax."
(eq (current-column) 1))
(forward-char -1))) ; =head1
(if (and (eq (preceding-char) ?\<)
(looking-at "\\$?[a-zA-Z0-9_:]+>")) ; <FH>
(looking-at "\\$?[[:alnum:]_:]+>")) ; <FH>
(forward-char -1)))
((and (looking-at "=") (eq (preceding-char) ?x)) ; x=
(forward-char -1))
@ -7212,15 +7344,15 @@ Currently it is tuned to C and Perl syntax."
(not (eq (char-after (- (point) 2)) ?\$))) ; $-
(forward-char -1))
((and (eq (following-char) ?\>)
(string-match "[a-zA-Z0-9_]" (char-to-string (preceding-char)))
(string-match "[[:alnum:]_]" (char-to-string (preceding-char)))
(save-excursion
(forward-sexp -1)
(and (eq (preceding-char) ?\<)
(looking-at "\\$?[a-zA-Z0-9_:]+>")))) ; <FH>
(looking-at "\\$?[[:alnum:]_:]+>")))) ; <FH>
(search-backward "<"))))
((and (eq (following-char) ?\$)
(eq (preceding-char) ?\<)
(looking-at "\\$?[a-zA-Z0-9_:]+>")) ; <$fh>
(looking-at "\\$?[[:alnum:]_:]+>")) ; <$fh>
(forward-char -1)))
(if (looking-at cperl-have-help-regexp)
(buffer-substring (match-beginning 0) (match-end 0))))

View file

@ -1,6 +1,7 @@
use 5.024;
use strict;
use warnings;
use utf8;
sub outside {
say "Line @{[__LINE__]}: package '@{[__PACKAGE__]}'";
@ -155,4 +156,17 @@ package :: {
Shoved::elsewhere();
# Finally, try unicode identifiers.
package Erdős::Number;
sub erdős_number {
my $name = shift;
if ($name eq "Erdős Pál") {
return 0;
}
else {
die "No access to the database. Sorry.";
}
}
1;

View file

@ -154,6 +154,22 @@ point in the distant past, and is still broken in perl-mode. "
(should (equal (get-text-property (match-beginning 0) 'face)
'font-lock-keyword-face))))
(ert-deftest cperl-test-fontify-special-variables ()
"Test fontification of variables like $^T or ${^ENCODING}.
These can occur as \"local\" aliases."
(skip-unless (eq cperl-test-mode #'cperl-mode))
(with-temp-buffer
(insert "local ($^I, ${^UNICODE});\n")
(goto-char (point-min))
(funcall cperl-test-mode)
(font-lock-ensure)
(search-forward "$")
(should (equal (get-text-property (point) 'face)
'font-lock-variable-name-face))
(search-forward "$")
(should (equal (get-text-property (point) 'face)
'font-lock-variable-name-face))))
(ert-deftest cperl-test-identify-heredoc ()
"Test whether a construct containing \"<<\" followed by a
bareword is properly identified for a here-document if
@ -297,6 +313,7 @@ the whole string."
(ert-deftest cperl-test-ws-rx ()
"Tests capture of very simple regular expressions (yawn)."
(skip-unless (eq cperl-test-mode #'cperl-mode))
(let ((valid
'(" " "\t" "\n"))
(invalid
@ -306,6 +323,7 @@ the whole string."
(ert-deftest cperl-test-ws+-rx ()
"Tests sequences of whitespace and comment lines."
(skip-unless (eq cperl-test-mode #'cperl-mode))
(let ((valid
`(" " "\t#\n" "\n# \n"
,(concat "# comment\n" "# comment\n" "\n" "#comment\n")))
@ -316,6 +334,7 @@ the whole string."
(ert-deftest cperl-test-version-regexp ()
"Tests the regexp for recommended syntax of versions in Perl."
(skip-unless (eq cperl-test-mode #'cperl-mode))
(let ((valid
'("1" "1.1" "1.1_1" "5.032001"
"v120.100.103"))
@ -331,6 +350,7 @@ the whole string."
(ert-deftest cperl-test-package-regexp ()
"Tests the regular expression of Perl package names with versions.
Also includes valid cases with whitespace in strange places."
(skip-unless (eq cperl-test-mode #'cperl-mode))
(let ((valid
'("package Foo"
"package Foo::Bar"
@ -346,6 +366,284 @@ Also includes valid cases with whitespace in strange places."
(cperl-test--validate-regexp (rx (eval cperl--package-rx))
valid invalid)))
(ert-deftest cperl-test-identifier-rx ()
"Test valid and invalid identifiers (no sigils)."
(skip-unless (eq cperl-test-mode #'cperl-mode))
(let ((valid
'("foo" "FOO" "f_oo" "a123"
"manĝis")) ; Unicode is allowed!
(invalid
'("$foo" ; no sigils allowed (yet)
"Foo::bar" ; no package qualifiers allowed
"lots_of_€"))) ; € is not alphabetic
(cperl-test--validate-regexp (rx (eval cperl--basic-identifier-rx))
valid invalid)))
;;; Test unicode identifier in various places
(defun cperl--test-unicode-setup (code string)
"Insert CODE, prepare it for tests, and find STRING.
Invoke the appropriate major mode, ensure fontification, and set
point after the first occurrence of STRING (no regexp!)."
(insert code)
(funcall cperl-test-mode)
(font-lock-ensure)
(goto-char (point-min))
(search-forward string))
(ert-deftest cperl-test-unicode-labels ()
"Verify that non-ASCII labels are processed correctly."
(with-temp-buffer
(cperl--test-unicode-setup "LABEł: for ($manĝi) { say; }" "LAB")
(should (equal (get-text-property (point) 'face)
'font-lock-constant-face))))
(ert-deftest cperl-test-unicode-sub ()
(with-temp-buffer
(cperl--test-unicode-setup
(concat "use strict;\n" ; distinguish bob from b-o-f
"sub ℏ {\n"
" 6.62607015e-34\n"
"};")
"sub ") ; point is before "ℏ"
;; Testing fontification
;; FIXME 2021-09-10: This tests succeeds because cperl-mode
;; accepts almost anything as a sub name for fontification. For
;; example, it fontifies "sub @ {...;}" which is a syntax error in
;; Perl. I let this pass for the moment.
(should (equal (get-text-property (point) 'face)
'font-lock-function-name-face))
;; Testing `beginning-of-defun'. Not available in perl-mode,
;; where it jumps to the beginning of the buffer.
(when (eq cperl-test-mode #'cperl-mode)
(goto-char (point-min))
(search-forward "-34")
(beginning-of-defun)
(should (looking-at "sub")))))
(ert-deftest cperl-test-unicode-varname ()
(with-temp-buffer
(cperl--test-unicode-setup
(concat "use strict;\n"
"my $π = 3.1415926535897932384626433832795028841971;\n"
"\n"
"my $manĝi = $π;\n"
"__END__\n")
"my $") ; perl-mode doesn't fontify the sigil, so include it here
;; Testing fontification
;; FIXME 2021-09-10: This test succeeds in cperl-mode because the
;; π character is "not ASCII alphabetic", so it treats $π as a
;; punctuation variable. The following two `should' forms with a
;; longer variable name were added for stronger verification.
(should (equal (get-text-property (point) 'face)
'font-lock-variable-name-face))
;; Test both ends of a longer variable name
(search-forward "my $") ; again skip the sigil
(should (equal (get-text-property (point) 'face)
'font-lock-variable-name-face))
(search-forward "manĝi")
(should (equal (get-text-property (1- (match-end 0)) 'face)
'font-lock-variable-name-face))))
(ert-deftest cperl-test-unicode-varname-list ()
"Verify that all elements of a variable list are fontified."
(let ((hash-face (if (eq cperl-test-mode #'perl-mode)
'perl-non-scalar-variable
'cperl-hash-face))
(array-face (if (eq cperl-test-mode #'perl-mode)
'perl-non-scalar-variable
'cperl-array-face)))
(with-temp-buffer
(cperl--test-unicode-setup
"my (%äsh,@ärräy,$scâlâr);" "%")
(should (equal (get-text-property (point) 'face)
hash-face))
(search-forward "@")
(should (equal (get-text-property (point) 'face)
array-face))
(search-forward "scâlâr")
(should (equal (get-text-property (match-beginning 0) 'face)
'font-lock-variable-name-face))
(should (equal (get-text-property (1- (match-end 0)) 'face)
'font-lock-variable-name-face)))
;; Now with package-qualified variables
(with-temp-buffer
(cperl--test-unicode-setup
"local (%Søme::äsh,@Søme::ärräy,$Søme::scâlâr);" "%")
(should (equal (get-text-property (point) 'face)
hash-face))
(search-forward "Søme::") ; test basic identifier
(should (equal (get-text-property (point) 'face)
hash-face))
(search-forward "@") ; test package name
(should (equal (get-text-property (point) 'face)
array-face))
(search-forward "Søme::") ; test basic identifier
(should (equal (get-text-property (point) 'face)
array-face))
(search-forward "Søme") ; test package name
(should (equal (get-text-property (match-beginning 0) 'face)
'font-lock-variable-name-face))
(should (equal (get-text-property (1- (match-end 0)) 'face)
'font-lock-variable-name-face))
(search-forward "scâlâr") ; test basic identifier
(should (equal (get-text-property (match-beginning 0) 'face)
'font-lock-variable-name-face))
(should (equal (get-text-property (1- (match-end 0)) 'face)
'font-lock-variable-name-face)))))
(ert-deftest cperl-test-unicode-arrays ()
"Test fontification of array access."
;; Perl mode just looks at the sigil, for element access
(skip-unless (eq cperl-test-mode #'cperl-mode))
;; simple array element
(with-temp-buffer
(cperl--test-unicode-setup
"$ärräy[1] = 7;" "$")
(should (equal (get-text-property (point) 'face)
'cperl-array-face)))
;; array slice
(with-temp-buffer
(cperl--test-unicode-setup
"@ärräy[(1..3)] = (4..6);" "@")
(should (equal (get-text-property (point) 'face)
'cperl-array-face)))
;; array max index
(with-temp-buffer
(cperl--test-unicode-setup
"$#ärräy = 1;" "$")
(should (equal (get-text-property (point) 'face)
'cperl-array-face)))
;; array dereference
(with-temp-buffer
(cperl--test-unicode-setup
"@$ärräy = (1,2,3)" "@")
(should (equal (get-text-property (1- (point)) 'face)
'cperl-array-face))
(should (equal (get-text-property (1+ (point)) 'face)
'font-lock-variable-name-face))))
(ert-deftest cperl-test-unicode-hashes ()
"Test fontification of hash access."
;; Perl mode just looks at the sigil, for element access
(skip-unless (eq cperl-test-mode #'cperl-mode))
;; simple hash element
(with-temp-buffer
(cperl--test-unicode-setup
"$häsh{'a'} = 7;" "$")
(should (equal (get-text-property (point) 'face)
'cperl-hash-face)))
;; hash array slice
(with-temp-buffer
(cperl--test-unicode-setup
"@häsh{(1..3)} = (4..6);" "@")
(should (equal (get-text-property (point) 'face)
'cperl-hash-face)))
;; hash subset
(with-temp-buffer
(cperl--test-unicode-setup
"my %hash = %häsh{'a',2,3};" "= %")
(should (equal (get-text-property (point) 'face)
'cperl-hash-face)))
;; hash dereference
(with-temp-buffer
(cperl--test-unicode-setup
"%$äsh = (key => 'value');" "%")
(should (equal (get-text-property (1- (point)) 'face)
'cperl-hash-face))
(should (equal (get-text-property (1+ (point)) 'face)
'font-lock-variable-name-face))))
(ert-deftest cperl-test-unicode-hashref ()
"Verify that a hashref access disambiguates {s}.
CPerl mode takes the token \"s\" as a substitution unless
detected otherwise. Not for perl-mode: it doesn't stringify
bareword hash keys and doesn't recognize a substitution
\"s}foo}bar}\""
(skip-unless (eq cperl-test-mode #'cperl-mode))
(with-temp-buffer
(cperl--test-unicode-setup "$häshref->{s} # }}" "{")
(should (equal (get-text-property (point) 'face)
'font-lock-string-face))
(should (equal (get-text-property (1+ (point)) 'face)
nil))))
(ert-deftest cperl-test-unicode-proto ()
;; perl-mode doesn't fontify prototypes at all
(skip-unless (eq cperl-test-mode #'cperl-mode))
(with-temp-buffer
(cperl--test-unicode-setup
(concat "sub prötötyped ($) {\n"
" ...;"
"}\n")
"prötötyped (")
(should (equal (get-text-property (point) 'face)
'font-lock-string-face))))
(ert-deftest cperl-test-unicode-fhs ()
(with-temp-buffer
(cperl--test-unicode-setup
(concat "while (<BAREWÖRD>) {\n"
" ...;)\n"
"}\n")
"while (<") ; point is before the first char of the handle
;; Testing fontification
;; FIXME 2021-09-10: perl-mode.el and cperl-mode.el handle these
;; completely differently. perl-mode interprets barewords as
;; constants, cperl-mode does not fontify them. Both treat
;; non-barewords as globs, which are not fontified by perl-mode,
;; but fontified as strings in cperl-mode. We keep (and test)
;; that behavior "as is" because both bareword filehandles and
;; <glob> syntax are no longer recommended.
(let ((bareword-face
(if (equal cperl-test-mode 'perl-mode) 'font-lock-constant-face
nil)))
(should (equal (get-text-property (point) 'face)
bareword-face)))))
(ert-deftest cperl-test-unicode-hashkeys ()
"Test stringification of bareword hash keys. Not in perl-mode.
perl-mode generally does not stringify bareword hash keys."
(skip-unless (eq cperl-test-mode #'cperl-mode))
;; Plain hash key
(with-temp-buffer
(cperl--test-unicode-setup
"$häsh { kéy }" "{ ")
(should (equal (get-text-property (point) 'face)
'font-lock-string-face)))
;; Nested hash key
(with-temp-buffer
(cperl--test-unicode-setup
"$häsh { kéy } { kèy }" "} { ")
(should (equal (get-text-property (point) 'face)
'font-lock-string-face)))
;; Key => value
(with-temp-buffer
(cperl--test-unicode-setup
"( kéy => 'value'," "( ")
(should (equal (get-text-property (point) 'face)
'font-lock-string-face))))
(ert-deftest cperl-test-word-at-point ()
"Test whether the function captures non-ASCII words."
(skip-unless (eq cperl-test-mode #'cperl-mode))
(let ((words '("rôle" "café" "ångström"
"Data::Dump::dump"
"_underscore")))
(dolist (word words)
(with-temp-buffer
(insert " + ") ; this will be the suffix
(beginning-of-line)
(insert ")") ; A non-word char
(insert word)
(should (string= word (cperl-word-at-point-hard)))))))
;;; Function test: Building an index for imenu
(ert-deftest cperl-test-imenu-index ()
@ -369,7 +667,8 @@ created by CPerl mode, so skip it for Perl mode."
"Versioned::Package::outer"
"lexical"
"Versioned::Block::signatured"
"Package::in_package_again")))
"Package::in_package_again"
"Erdős::Number::erdős_number")))
(dolist (sub expected)
(should (assoc-string sub index)))))))