Register tibetan-composition-function in
composition-function-table. (tibetan-composable-pattern): New variable. (tibetan-subjoined-transcription-alist): Change key "R" to "+R". (tibetan-precomposition-rule-alist): Move punctuations to tibetan-punctuation-transcription-alist and tibetan-obsolete-glyphs. (tibetan-punctuation-transcription-alist): New variable. (tibetan-obsolete-glyphs): New variable. (tibetan-regexp): Improve the initialization code.
This commit is contained in:
parent
3bdf8898b0
commit
d169c89e68
1 changed files with 97 additions and 78 deletions
|
@ -107,9 +107,19 @@
|
|||
(features tibet-util)
|
||||
(documentation . t)
|
||||
(sample-text .
|
||||
"Tibetan (2$(7"70"](B1$(7"2$(8!;(B2$(7%P`"Q(B1$(7"2$(8!;(B) 2$(7#RP#SP#S(B1$(7!>"7(B2$(7$P`"Q(B1$(8!;(B2$(7"E0"S(B1$(7"G$(8!;$(7"7(B2$(7"20"[(B1$(8!;(B2$(7"D0"[(B1$(7"#"G!>(B2$(7"I0"]0"_(B1$(8!;(B2$(7"9`"Q(B1$(8!;(B2$(7"/0"S(B1$(8!;(B2$(7"5`"Q(B12$(7#2`#90"[(B1$(8!;(B2$(7"H`#A`"U0"c(B1$(7!>(B")))
|
||||
"Tibetan (4$(7"7r'"]0"7"]1"2$(8!;4$(7%Px!"Q0%P"Q1"2$(8!;(B) 4$(7#Rv##Sv##S0#R#S#S1!>"74$Px!"Q0$P"Q1$(8!;4$(7"Er'"S0"E"S1"G$(8!;$(7"74"2r'"[0"2"[1$(8!;4$(7"Dr'"[0"D"[1"#"G!>4"Ir'"]r'"_0"I"]"_1$(8!;4$(7"9x!"Q0"9"Q1$(8!;4$(7"/r'"S0"/"S1$(8!;4$(7"5x!"Q0"5"Q14#2x!#9r'"[0#2#9"[1$(8!;4$(7"Hx!#Ax!"Ur'"c0"H#A"U"c1!>(B")))
|
||||
|
||||
|
||||
;; `$(7"A(B' is included in the pattern for subjoined consonants because we
|
||||
;; treat it specially in tibetan-add-components.
|
||||
(defconst tibetan-composable-pattern
|
||||
"[$(7"!(B-$(7"J(B][$(7"A#!(B-$(7#J(B]*[$(7"Q(B-$(7"^"a"e(B]?[$(7"_"c"d"g(B-$(7"l!I!e!g(B]?"
|
||||
"Regexp matching a composable sequence of Tibetan characters.")
|
||||
|
||||
;; Register a function to compose Tibetan characters.
|
||||
(aset composition-function-table (make-char 'tibetan)
|
||||
(list (cons tibetan-composable-pattern 'tibetan-composition-function)))
|
||||
|
||||
;;;
|
||||
;;; Definitions of conversion data.
|
||||
;;;
|
||||
|
@ -175,21 +185,23 @@
|
|||
("E" . "$(7"\(B")
|
||||
("O" . "$(7"^(B")
|
||||
("I" . "$(7"a(B")
|
||||
("M" . "$(7"_(B")
|
||||
("~" . "$(7"c(B") ; not specified in Ext.wylie
|
||||
("`" . "$(7"d(B") ; idem.
|
||||
("," . "$(7"e(B") ; idem.
|
||||
("v" . "$(7"g(B") ; idem.
|
||||
("V" . "$(7"h(B") ; idem.
|
||||
("x" . "$(7"i(B") ; idem.
|
||||
("X" . "$(7"j(B") ; idem.
|
||||
("q" . "$(7"k(B") ; idem.
|
||||
("Q" . "$(7"l(B") ; idem.
|
||||
("_o" . "$(7!g(B") ; idem.
|
||||
("_O" . "$(7!e(B") ; idem.
|
||||
("_/" . "$(7!I(B") ; idem.
|
||||
))
|
||||
|
||||
(defconst tibetan-modifier-transcription-alist
|
||||
'(("M" . "$(7"_(B")
|
||||
("~" . "$(7"c(B")
|
||||
("`" . "$(7"d(B")
|
||||
("x" . "$(7"i(B")
|
||||
("X" . "$(7"j(B")
|
||||
("v" . "$(7"g(B")
|
||||
("V" . "$(7"h(B")
|
||||
("q" . "$(7"k(B")
|
||||
("Q" . "$(7"l(B")
|
||||
("_/" . "$(7!I(B")
|
||||
("_o" . "$(7!g(B")
|
||||
("_O" . "$(7!e(B")))
|
||||
|
||||
(defconst tibetan-precomposed-transcription-alist
|
||||
'(("phyw" . "$(7$G(B")
|
||||
("tshw" . "$(7$)(B")
|
||||
|
@ -302,48 +314,49 @@
|
|||
("sm" . "$(7%Y(B")))
|
||||
|
||||
(defconst tibetan-subjoined-transcription-alist
|
||||
'(("+k" . "$(7#!(B")
|
||||
("+kh" . "$(7#"(B")
|
||||
("+g" . "$(7##(B")
|
||||
("+gh" . "$(7#$(B")
|
||||
("+ng" . "$(7#%(B")
|
||||
("+c" . "$(7#&(B")
|
||||
("+ch" . "$(7#'(B")
|
||||
("+j" . "$(7#((B")
|
||||
("+ny" . "$(7#*(B")
|
||||
("+T" . "$(7#+(B")
|
||||
("+TH" . "$(7#,(B")
|
||||
("+D" . "$(7#-(B")
|
||||
("+DH" . "$(7#.(B")
|
||||
("+N" . "$(7#/(B")
|
||||
("+t" . "$(7#0(B")
|
||||
("+th" . "$(7#1(B")
|
||||
("+d" . "$(7#2(B")
|
||||
("+dh" . "$(7#3(B")
|
||||
("+n" . "$(7#4(B")
|
||||
("+p" . "$(7#5(B")
|
||||
("+ph" . "$(7#6(B")
|
||||
("+b" . "$(7#7(B")
|
||||
("+bh" . "$(7#8(B")
|
||||
("+m" . "$(7#9(B")
|
||||
("+ts" . "$(7#:(B")
|
||||
("+tsh" . "$(7#;(B")
|
||||
("+dz" . "$(7#<(B")
|
||||
("+dzh" . "$(7#=(B")
|
||||
("+w" . "$(7#>(B")
|
||||
("+zh" . "$(7#?(B")
|
||||
("+z" . "$(7#@(B")
|
||||
("+'" . "$(7#A(B")
|
||||
("+y" . "$(7#B(B")
|
||||
("+r" . "$(7#C(B")
|
||||
("+l" . "$(7#D(B")
|
||||
("+sh" . "$(7#E(B")
|
||||
("+SH" . "$(7#F(B")
|
||||
("+s" . "$(7#G(B")
|
||||
("+h" . "$(7#H(B")
|
||||
("+A" . "$(7#I(B")
|
||||
("+kSH" . "$(7#J(B")
|
||||
("R" . "$(7#P(B")))
|
||||
(sort '(("+k" . "$(7#!(B")
|
||||
("+kh" . "$(7#"(B")
|
||||
("+g" . "$(7##(B")
|
||||
("+gh" . "$(7#$(B")
|
||||
("+ng" . "$(7#%(B")
|
||||
("+c" . "$(7#&(B")
|
||||
("+ch" . "$(7#'(B")
|
||||
("+j" . "$(7#((B")
|
||||
("+ny" . "$(7#*(B")
|
||||
("+T" . "$(7#+(B")
|
||||
("+TH" . "$(7#,(B")
|
||||
("+D" . "$(7#-(B")
|
||||
("+DH" . "$(7#.(B")
|
||||
("+N" . "$(7#/(B")
|
||||
("+t" . "$(7#0(B")
|
||||
("+th" . "$(7#1(B")
|
||||
("+d" . "$(7#2(B")
|
||||
("+dh" . "$(7#3(B")
|
||||
("+n" . "$(7#4(B")
|
||||
("+p" . "$(7#5(B")
|
||||
("+ph" . "$(7#6(B")
|
||||
("+b" . "$(7#7(B")
|
||||
("+bh" . "$(7#8(B")
|
||||
("+m" . "$(7#9(B")
|
||||
("+ts" . "$(7#:(B")
|
||||
("+tsh" . "$(7#;(B")
|
||||
("+dz" . "$(7#<(B")
|
||||
("+dzh" . "$(7#=(B")
|
||||
("+w" . "$(7#>(B")
|
||||
("+zh" . "$(7#?(B")
|
||||
("+z" . "$(7#@(B")
|
||||
("+'" . "$(7#A(B")
|
||||
("+y" . "$(7#B(B")
|
||||
("+r" . "$(7#C(B")
|
||||
("+l" . "$(7#D(B")
|
||||
("+sh" . "$(7#E(B")
|
||||
("+SH" . "$(7#F(B")
|
||||
("+s" . "$(7#G(B")
|
||||
("+h" . "$(7#H(B")
|
||||
("+A" . "$(7#I(B")
|
||||
("+kSH" . "$(7#J(B")
|
||||
("+R" . "$(7#P(B"))
|
||||
(lambda (x y) (> (length (car x)) (length (car y))))))
|
||||
|
||||
;;;
|
||||
;;; alist for Tibetan base consonant <-> subjoined consonant conversion.
|
||||
|
@ -396,7 +409,7 @@
|
|||
;;; (includes some punctuation conversion rules)
|
||||
;;;
|
||||
(defconst tibetan-precomposition-rule-alist
|
||||
'(("$(7"6#B#>(B" . "$(7$G(B")
|
||||
`(("$(7"6#B#>(B" . "$(7$G(B")
|
||||
("$(7"##C#>(B" . "$(7$_(B")
|
||||
("$(7";#>(B" . "$(7$)(B")
|
||||
("$(7"C#:#>(B" . "$(7%.(B")
|
||||
|
@ -490,36 +503,42 @@
|
|||
("$(7"G#4(B" . "$(7%V(B")
|
||||
("$(7"G#5(B" . "$(7%W(B")
|
||||
("$(7"G#7(B" . "$(7%X(B")
|
||||
("$(7"G#9(B" . "$(7%Y(B")
|
||||
("$(7!=(B" . "$(8!=(B") ; 2 col <-> 1 col
|
||||
("$(7"G#9(B" . "$(7%Y(B")))
|
||||
|
||||
(defconst tibetan-obsolete-glyphs
|
||||
`(("$(7!=(B" . "$(8!=(B") ; 2 col <-> 1 col
|
||||
("$(7!?(B" . "$(8!?(B")
|
||||
("$(7!@(B" . "$(8!@(B")
|
||||
("$(7!A(B" . "$(8!A(B")
|
||||
("$(7"`(B" . "$(8"`(B")
|
||||
("$(7!;(B" . "$(8!;(B")
|
||||
("$(7!D(B" . "$(8!D(B")
|
||||
("$(7!>(B $(7!>(B" . "2$(7!>P(B P$(7!>(B1") ; Yes this is dirty. But ...
|
||||
("$(7!4!5!5(B" . "2$(7#RP#SP#SP#S(B1")
|
||||
("$(7!4!5(B" . "2$(7#RP#SP#S(B1")
|
||||
("$(7!6(B" . "2$(7#RP#S_!I(B1")
|
||||
("$(7!4(B" . "2$(7#RP#S(B1")))
|
||||
;; Yes these are dirty. But ...
|
||||
("$(7!>(B $(7!>(B" . ,(compose-string "$(7!>(B $(7!>(B" 0 3 [?$(7!>(B (Br . Bl) ? (Br . Bl) ?$(7!>(B]))
|
||||
("$(7!4!5!5(B" . ,(compose-string
|
||||
"$(7#R#S#S#S(B" 0 4
|
||||
[?$(7#R(B (Br . Bl) ?$(7#S(B (Br . Bl) ?$(7#S(B (Br . Bl) ?$(7#S(B]))
|
||||
("$(7!4!5(B" . ,(compose-string "$(7#R#S#S(B" 0 3 [?$(7#R(B (Br . Bl) ?$(7#S(B (Br . Bl) ?$(7#S(B]))
|
||||
("$(7!6(B" . ,(compose-string "$(7#R#S!I(B" 0 3 [?$(7#R(B (Br . Bl) ?$(7#S(B (br . tr) ?$(7!I(B]))
|
||||
("$(7!4(B" . ,(compose-string "$(7#R#S(B" 0 2 [?$(7#R(B (Br . Bl) ?$(7#S(B]))))
|
||||
|
||||
(defvar tibetan-regexp
|
||||
(let ((l (append tibetan-consonant-transcription-alist
|
||||
tibetan-vowel-transcription-alist
|
||||
tibetan-subjoined-transcription-alist))
|
||||
temp)
|
||||
(setq temp "\\(")
|
||||
(setq temp (concat temp (car (car l))))
|
||||
(setq l (cdr l))
|
||||
(defconst tibetan-regexp
|
||||
(let ((l (list tibetan-precomposed-transcription-alist
|
||||
tibetan-consonant-transcription-alist
|
||||
tibetan-vowel-transcription-alist
|
||||
tibetan-modifier-transcription-alist
|
||||
tibetan-subjoined-transcription-alist))
|
||||
(separator "\\|")
|
||||
tail pattern)
|
||||
(while l
|
||||
(setq temp (concat temp "\\|" (car (car l))))
|
||||
(setq l (cdr l)))
|
||||
(concat temp "\\)$"))
|
||||
"Regexp string to match a romanized Tibetan character component, i.e.,
|
||||
base and subjoined consonant, vowel and vowel modifier. The result of matching
|
||||
is to be used for indexing alists at conversion from a roman transcription to
|
||||
the corresponding Tibetan character.")
|
||||
(setq tail (car l) l (cdr l))
|
||||
(while tail
|
||||
(setq pattern (cons separator (cons (car (car tail)) pattern))
|
||||
tail (cdr tail))))
|
||||
(apply 'concat (nreverse (cdr pattern))))
|
||||
"Regexp matching a Tibetan transcription of a composable Tibetan sequence.
|
||||
The result of matching is to be used for indexing alists at conversion
|
||||
from a roman transcription to the corresponding Tibetan character.")
|
||||
|
||||
(defvar tibetan-precomposed-regexp
|
||||
(let ((l tibetan-precomposed-transcription-alist)
|
||||
|
|
Loading…
Add table
Reference in a new issue