Fix syntax of symbol and punctuation characters

* lisp/international/characters.el: Adjust syntax of punctuation
and symbol charcaters to follow that of Unicode properties.
(Bug#44974)
This commit is contained in:
Eli Zaretskii 2021-01-02 12:42:16 +02:00
parent 0f561ee553
commit 70484f92a1

View file

@ -317,6 +317,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(modify-syntax-entry #x5be ".") ; MAQAF
(modify-syntax-entry #x5c0 ".") ; PASEQ
(modify-syntax-entry #x5c3 ".") ; SOF PASUQ
(modify-syntax-entry #x5c6 ".") ; NUN HAFUKHA
(modify-syntax-entry #x5f3 ".") ; GERESH
(modify-syntax-entry #x5f4 ".") ; GERSHAYIM
@ -521,6 +522,9 @@ with L, LRE, or LRO Unicode bidi character type.")
;; syntax: ¢£¤¥¨ª¯²³´¶¸¹º.) There should be a well-defined way of
;; relating Unicode categories to Emacs syntax codes.
;; FIXME: We should probably just use the Unicode properties to set
;; up the syntax table.
;; NBSP isn't semantically interchangeable with other whitespace chars,
;; so it's more like punctuation.
(set-case-syntax ?  "." tbl)
@ -558,7 +562,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(setq c (1+ c)))
;; Latin Extended Additional
(modify-category-entry '(#x1e00 . #x1ef9) ?l)
(modify-category-entry '(#x1E00 . #x1EF9) ?l)
;; Latin Extended-C
(setq c #x2C60)
@ -579,13 +583,13 @@ with L, LRE, or LRO Unicode bidi character type.")
(setq c (1+ c)))
;; Greek
(modify-category-entry '(#x0370 . #x03ff) ?g)
(modify-category-entry '(#x0370 . #x03FF) ?g)
;; Armenian
(setq c #x531)
;; Greek Extended
(modify-category-entry '(#x1f00 . #x1fff) ?g)
(modify-category-entry '(#x1F00 . #x1FFF) ?g)
;; cyrillic
(modify-category-entry '(#x0400 . #x04FF) ?y)
@ -605,40 +609,43 @@ with L, LRE, or LRO Unicode bidi character type.")
(while (<= c #x200F)
(set-case-syntax c "." tbl)
(setq c (1+ c)))
;; Fixme: These aren't all right:
(setq c #x2010)
(while (<= c #x2016)
(set-case-syntax c "_" tbl)
(setq c (1+ c)))
;; Punctuation syntax for quotation marks (like `)
(while (<= c #x201f)
(set-case-syntax c "." tbl)
(setq c (1+ c)))
;; Fixme: These aren't all right:
(while (<= c #x2027)
(set-case-syntax c "_" tbl)
(setq c (1+ c)))
(while (<= c #x206F)
;; Fixme: What to do with characters that have Pi and Pf
;; Unicode properties?
(while (<= c #x2017)
(set-case-syntax c "." tbl)
(setq c (1+ c)))
;; Punctuation syntax for quotation marks (like `)
(while (<= c #x201F)
(set-case-syntax c "." tbl)
(setq c (1+ c)))
(while (<= c #x2027)
(set-case-syntax c "." tbl)
(setq c (1+ c)))
(setq c #x2030)
(while (<= c #x205E)
(set-case-syntax c "." tbl)
(setq c (1+ c)))
(let ((chars '(? ? ? ?⁒)))
(while chars
(modify-syntax-entry (car chars) "_")
(setq chars (cdr chars))))
;; Fixme: The following blocks might be better as symbol rather than
;; punctuation.
;; Arrows
(setq c #x2190)
(while (<= c #x21FF)
(set-case-syntax c "." tbl)
(set-case-syntax c "_" tbl)
(setq c (1+ c)))
;; Mathematical Operators
(while (<= c #x22FF)
(set-case-syntax c "." tbl)
(set-case-syntax c "_" tbl)
(setq c (1+ c)))
;; Miscellaneous Technical
(while (<= c #x23FF)
(set-case-syntax c "." tbl)
(set-case-syntax c "_" tbl)
(setq c (1+ c)))
;; Control Pictures
(while (<= c #x243F)
(while (<= c #x244F)
(set-case-syntax c "_" tbl)
(setq c (1+ c)))
@ -652,13 +659,13 @@ with L, LRE, or LRO Unicode bidi character type.")
;; Supplemental Mathematical Operators
(setq c #x2A00)
(while (<= c #x2AFF)
(set-case-syntax c "." tbl)
(set-case-syntax c "_" tbl)
(setq c (1+ c)))
;; Miscellaneous Symbols and Arrows
(setq c #x2B00)
(while (<= c #x2BFF)
(set-case-syntax c "." tbl)
(set-case-syntax c "_" tbl)
(setq c (1+ c)))
;; Coptic
@ -676,17 +683,34 @@ with L, LRE, or LRO Unicode bidi character type.")
;; Symbols for Legacy Computing
(setq c #x1FB00)
(while (<= c #x1FBCA)
(set-case-syntax c "_" tbl)
(setq c (1+ c)))
;; FIXME: Should these be digits?
(while (<= c #x1FBFF)
(set-case-syntax c "." tbl)
(setq c (1+ c)))
;; Fullwidth Latin
(setq c #xff21)
(while (<= c #xff3a)
(setq c #xFF01)
(while (<= c #xFF0F)
(set-case-syntax c "." tbl)
(setq c (1+ c)))
(set-case-syntax #xFF04 "_" tbl)
(set-case-syntax #xFF0B "_" tbl)
(setq c #xFF21)
(while (<= c #xFF3A)
(modify-category-entry c ?l)
(modify-category-entry (+ c #x20) ?l)
(setq c (1+ c)))
;; Halfwidth Latin
(setq c #xFF64)
(while (<= c #xFF65)
(set-case-syntax c "." tbl)
(setq c (1+ c)))
(set-case-syntax #xFF61 "." tbl)
;; Combining diacritics
(modify-category-entry '(#x300 . #x362) ?^)
;; Combining marks