2021-01-04 20:57:42 -05:00
|
|
|
;;; thai.el --- support for Thai -*- coding: utf-8; lexical-binding: t; -*-
|
1997-02-20 07:02:49 +00:00
|
|
|
|
2022-01-01 02:45:51 -05:00
|
|
|
;; Copyright (C) 1997-1998, 2000-2022 Free Software Foundation, Inc.
|
2006-12-13 01:13:58 +00:00
|
|
|
;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
|
2011-01-02 15:50:46 -08:00
|
|
|
;; 2005, 2006, 2007, 2008, 2009, 2010, 2011
|
2003-09-08 12:53:41 +00:00
|
|
|
;; National Institute of Advanced Industrial Science and Technology (AIST)
|
|
|
|
;; Registration Number H13PRO009
|
2005-03-18 07:10:50 +00:00
|
|
|
;; Copyright (C) 2005
|
|
|
|
;; National Institute of Advanced Industrial Science and Technology (AIST)
|
|
|
|
;; Registration Number H14PRO021
|
1997-02-20 07:02:49 +00:00
|
|
|
|
2002-05-29 22:45:23 +00:00
|
|
|
;; Keywords: multilingual, Thai, i18n
|
1997-02-20 07:02:49 +00:00
|
|
|
|
|
|
|
;; This file is part of GNU Emacs.
|
|
|
|
|
2008-05-06 04:29:13 +00:00
|
|
|
;; GNU Emacs is free software: you can redistribute it and/or modify
|
1997-02-20 07:02:49 +00:00
|
|
|
;; it under the terms of the GNU General Public License as published by
|
2008-05-06 04:29:13 +00:00
|
|
|
;; the Free Software Foundation, either version 3 of the License, or
|
|
|
|
;; (at your option) any later version.
|
1997-02-20 07:02:49 +00:00
|
|
|
|
|
|
|
;; GNU Emacs is distributed in the hope that it will be useful,
|
|
|
|
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
;; GNU General Public License for more details.
|
|
|
|
|
|
|
|
;; You should have received a copy of the GNU General Public License
|
2017-09-13 15:52:52 -07:00
|
|
|
;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
|
1997-02-20 07:02:49 +00:00
|
|
|
|
|
|
|
;;; Commentary:
|
|
|
|
|
|
|
|
;; For Thai, the character set TIS620 is supported.
|
|
|
|
|
|
|
|
;;; Code:
|
|
|
|
|
2002-03-01 02:28:29 +00:00
|
|
|
(define-coding-system 'thai-tis620
|
|
|
|
"8-bit encoding for ASCII (MSB=0) and Thai TIS620 (MSB=1)."
|
|
|
|
:coding-type 'charset
|
|
|
|
:mnemonic ?T
|
|
|
|
:charset-list '(tis620-2533))
|
1997-02-26 13:11:47 +00:00
|
|
|
|
1997-07-01 23:29:26 +00:00
|
|
|
(define-coding-system-alias 'th-tis620 'thai-tis620)
|
|
|
|
(define-coding-system-alias 'tis620 'thai-tis620)
|
1999-03-09 11:57:05 +00:00
|
|
|
(define-coding-system-alias 'tis-620 'thai-tis620)
|
1997-02-20 07:02:49 +00:00
|
|
|
|
|
|
|
(set-language-info-alist
|
|
|
|
"Thai" '((tutorial . "TUTORIAL.th")
|
1998-01-22 01:46:28 +00:00
|
|
|
(charset thai-tis620)
|
2002-06-06 11:32:53 +00:00
|
|
|
(coding-system thai-tis620 iso-8859-11 cp874)
|
1998-01-22 01:46:28 +00:00
|
|
|
(coding-priority thai-tis620)
|
2002-03-01 02:28:29 +00:00
|
|
|
(nonascii-translation . tis620-2533)
|
1998-08-10 06:29:02 +00:00
|
|
|
(input-method . "thai-kesmanee")
|
|
|
|
(unibyte-display . thai-tis620)
|
|
|
|
(features thai-util)
|
2005-03-29 07:56:54 +00:00
|
|
|
(setup-function . setup-thai-language-environment-internal)
|
|
|
|
(exit-function . exit-thai-language-environment-internal)
|
2003-02-04 13:24:35 +00:00
|
|
|
(sample-text
|
2000-11-06 00:10:50 +00:00
|
|
|
. (thai-compose-string
|
Use UTF-8 for most files with non-ASCII characters.
* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit. Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.
Fixes: debbugs:13936
2013-04-01 18:18:40 -07:00
|
|
|
(copy-sequence "Thai (ภาษาไทย) สวัสดีครับ, สวัสดีค่ะ")))
|
1997-05-16 00:59:09 +00:00
|
|
|
(documentation . t)))
|
1997-02-20 07:02:49 +00:00
|
|
|
|
2002-05-23 18:23:57 +00:00
|
|
|
(define-coding-system 'cp874
|
|
|
|
"DOS codepage 874 (Thai)"
|
|
|
|
:coding-type 'charset
|
|
|
|
:mnemonic ?D
|
|
|
|
:charset-list '(cp874)
|
|
|
|
:mime-charset 'cp874)
|
|
|
|
(define-coding-system-alias 'ibm874 'cp874)
|
|
|
|
|
2002-05-29 22:45:23 +00:00
|
|
|
(define-coding-system 'iso-8859-11
|
|
|
|
"ISO/IEC 8859/11 (Latin/Thai)
|
|
|
|
This is the same as `thai-tis620' with the addition of no-break-space."
|
|
|
|
:coding-type 'charset
|
|
|
|
:mnemonic ?*
|
|
|
|
:mime-charset 'iso-8859-11 ; not actually registered as of 2002-05-24
|
|
|
|
:charset-list '(iso-8859-11))
|
|
|
|
|
2002-11-07 06:32:02 +00:00
|
|
|
;; For automatic composition.
|
Use UTF-8 for most files with non-ASCII characters.
* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit. Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.
Fixes: debbugs:13936
2013-04-01 18:18:40 -07:00
|
|
|
(let ((chars "ัิีึืฺุู็่้๊๋์ํ๎")
|
|
|
|
(elt '(["[ก-ฯ].[่้๊๋์]?ำ?" 1 thai-composition-function]
|
2008-08-29 07:59:57 +00:00
|
|
|
[nil 0 thai-composition-function])))
|
2002-11-07 06:32:02 +00:00
|
|
|
(dotimes (i (length chars))
|
2008-08-29 07:59:57 +00:00
|
|
|
(aset composition-function-table (aref chars i) elt)))
|
Use UTF-8 for most files with non-ASCII characters.
* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit. Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.
Fixes: debbugs:13936
2013-04-01 18:18:40 -07:00
|
|
|
(aset composition-function-table ?ำ '(["[ก-ฯ]." 1 thai-composition-function]))
|
1999-12-15 00:48:14 +00:00
|
|
|
|
2022-02-16 17:07:58 +02:00
|
|
|
;; Tai-Tham
|
|
|
|
|
|
|
|
(set-language-info-alist
|
|
|
|
"Northern Thai" '((charset unicode)
|
|
|
|
(coding-system utf-8)
|
|
|
|
(coding-priority utf-8)
|
|
|
|
(sample-text .
|
|
|
|
"Northern Thai (ᨣᩣᩴᨾᩮᩬᩥᨦ / ᨽᩣᩈᩣᩃ᩶ᩣ᩠ᨶᨶᩣ) ᩈ᩠ᩅᩢᩔ᩠ᨯᩦᨣᩕᩢ᩠ᨸ")
|
|
|
|
(documentation . t)))
|
|
|
|
|
|
|
|
;; From Richard Wordingham <richard.wordingham@ntlworld.com>:
|
|
|
|
(defvar tai-tham-composable-pattern
|
|
|
|
(let ((table
|
|
|
|
;; C is letters, independent vowels, digits, punctuation and symbols.
|
|
|
|
'(("C" . "[\u1A20-\u1A54\u1A80-\u1A89\u1A90-\u1A99\u1AA0-\u1AAD]")
|
|
|
|
("M" . ; Marks, CGJ, ZWNJ, ZWJ
|
|
|
|
"[\u0324\u034F\u0E49\u0E4A\u0E4B\u1A55-\u1A57\u1A59-\u1A5E\u1A61-\u1A7C\u1A7F\u200C\200D]")
|
|
|
|
("H" . "\u1A60") ; Sakot
|
|
|
|
("S" . ; Marks commuting with sakot
|
|
|
|
"[\u0E49-\u0E4B\u0EC9\u0ECB\u1A75-\u1A7C]")
|
|
|
|
("N" . "\u1A58"))) ; mai kang lai
|
|
|
|
(basic-syllable "C\\(N*\\(M\\|HS*C\\)\\)*")
|
|
|
|
(regexp "X\\(N\\(X\\)?\\)*H?")) ; where X is basic syllable
|
|
|
|
(let ((case-fold-search nil))
|
|
|
|
(setq regexp (replace-regexp-in-string "X" basic-syllable regexp t t))
|
|
|
|
(dolist (elt table)
|
|
|
|
(setq regexp (replace-regexp-in-string (car elt) (cdr elt)
|
|
|
|
regexp t t))))
|
|
|
|
regexp))
|
|
|
|
|
|
|
|
(let ((elt (list (vector tai-tham-composable-pattern 0 'font-shape-gstring)
|
|
|
|
)))
|
|
|
|
(set-char-table-range composition-function-table '(#x1A20 . #x1A54) elt)
|
|
|
|
(set-char-table-range composition-function-table '(#x1A80 . #x1A89) elt)
|
|
|
|
(set-char-table-range composition-function-table '(#x1A90 . #x1A99) elt)
|
|
|
|
(set-char-table-range composition-function-table '(#x1AA0 . #x1AAD) elt))
|
|
|
|
|
2000-11-22 19:28:28 +00:00
|
|
|
(provide 'thai)
|
|
|
|
|
1997-02-20 07:02:49 +00:00
|
|
|
;;; thai.el ends here
|