Improve some big regexps

* lisp/language/lao-util.el (lao-transcription-pattern):
Remove duplicate definition.
* lisp/language/tibetan.el (tibetan-regexp, tibetan-precomposed-regexp)
(tibetan-precomposition-rule-regexp): Use regexp-opt.
This commit is contained in:
Mattias Engdegård 2023-07-05 15:25:30 +02:00
parent 1dc2d5441e
commit d7168e8575
2 changed files with 11 additions and 49 deletions

View file

@ -298,25 +298,6 @@
("\\\\" . "")
("\\$" . "")))
(defconst lao-transcription-pattern
(concat
"\\("
(mapconcat 'car lao-transcription-consonant-alist "\\|")
"\\)\\("
(mapconcat 'car lao-transcription-semi-vowel-alist "\\|")
"\\)?\\(\\("
(mapconcat 'car lao-transcription-vowel-alist "\\|")
"\\)\\("
(mapconcat 'car lao-transcription-maa-sakod-alist "\\|")
"\\)?\\("
(mapconcat (lambda (x) (regexp-quote (car x)))
lao-transcription-tone-alist "\\|")
"\\)?\\)?\\|"
(mapconcat (lambda (x) (regexp-quote (car x)))
lao-transcription-punctuation-alist "\\|")
)
"Regexp of Roman transcription pattern for one Lao syllable.")
(defconst lao-transcription-pattern
(concat
"\\("

View file

@ -558,48 +558,29 @@
("སྨ" . "<EFBFBD><EFBFBD><EFBFBD><EFBFBD>")))
(defconst tibetan-regexp
(let (pattern)
(dolist (alist (list tibetan-precomposed-transcription-alist
tibetan-consonant-transcription-alist
tibetan-vowel-transcription-alist
tibetan-modifier-transcription-alist
tibetan-subjoined-transcription-alist)
(apply #'concat (nreverse (cdr pattern))))
(dolist (key-val alist)
(setq pattern (cons "\\|" (cons (regexp-quote (car key-val))
pattern))))))
(regexp-opt
(mapcar (lambda (x) (regexp-quote (car x)))
(append tibetan-precomposed-transcription-alist
tibetan-consonant-transcription-alist
tibetan-vowel-transcription-alist
tibetan-modifier-transcription-alist
tibetan-subjoined-transcription-alist)))
"Regexp matching a Tibetan transcription of a composable Tibetan sequence.
The result of matching is to be used for indexing alists at conversion
from a roman transcription to the corresponding Tibetan character.")
(defvar tibetan-precomposed-regexp
(purecopy
(let ((l tibetan-precomposed-transcription-alist)
temp)
(setq temp "^\\(")
(setq temp
(concat temp (car (car l))))
(setq l (cdr l))
(while l
(setq temp
(concat temp "\\|" (car (car l))))
(setq l (cdr l)))
(concat temp "\\)")))
(concat "^" (regexp-opt
(mapcar #'car tibetan-precomposed-transcription-alist)
t)))
"Regexp string to match a romanized Tibetan complex consonant.
The result of matching is to be used for indexing alists when the input key
from an input method is converted to the corresponding precomposed glyph.")
(defvar tibetan-precomposition-rule-regexp
(purecopy
(let ((l tibetan-precomposition-rule-alist)
temp)
(setq temp "\\(")
(setq temp (concat temp (car (car l))))
(setq l (cdr l))
(while l
(setq temp (concat temp "\\|" (car (car l))))
(setq l (cdr l)))
(concat temp "\\)")))
(regexp-opt (mapcar #'car tibetan-precomposition-rule-alist) t))
"Regexp string to match a sequence of Tibetan consonantic components.
That is, one base consonant and one or more subjoined consonants.
The result of matching is to be used for indexing alist when the component