emacs/lisp/language/tibet-util.el

;;; tibet-util.el --- utilities for Tibetan   -*- coding: utf-8-emacs; -*-

;; Copyright (C) 1997, 2001-2013 Free Software Foundation, Inc.
;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
;;   2005, 2006, 2007, 2008, 2009, 2010, 2011
;;   National Institute of Advanced Industrial Science and Technology (AIST)
;;   Registration Number H14PRO021

;; Author: Toru TOMABECHI <Toru.Tomabechi@orient.unil.ch>
;; Keywords: multilingual, Tibetan
;; Created: Feb. 17. 1997

;; This file is part of GNU Emacs.

;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.

;;; History:
;; 1997.03.13 Modification in treatment of text properties;
;;            Support for some special signs and punctuation.
;; 1999.10.25 Modification for a new composition way by K.Handa.

;;; Commentary:

;;; Code:

(defconst tibetan-obsolete-glyphs
  `(("།" . "།")			; 2 col <-> 1 col
    ("༏" . "༏")
    ("༐" . "༐")
    ("༑" . "༑")
    ("ཿ" . "ཿ")
    ("་" . "་")
    ("༔" . "༔")
    ;; Yes these are dirty. But ...
    ("༎ ༎" . ,(compose-string "༎ ༎" 0 3 [?༎ (Br . Bl) ?  (Br . Bl) ?༎]))
    ("༄༅༅" . ,(compose-string
		  "࿁࿂࿂࿂" 0 4
		  [?࿁ (Br . Bl) ?࿂ (Br . Bl) ?࿂ (Br . Bl) ?࿂]))
    ("༄༅" . ,(compose-string "࿁࿂࿂" 0 3 [?࿁ (Br . Bl) ?࿂ (Br . Bl) ?࿂]))
    ("༆" . ,(compose-string "࿁࿂༙" 0 3 [?࿁ (Br . Bl) ?࿂ (br . tr) ?༙]))
    ("༄"   . ,(compose-string "࿁࿂" 0 2 [?࿁ (Br . Bl) ?࿂]))))

;;;###autoload
(defun tibetan-char-p (ch)
  "Check if char CH is Tibetan character.
Returns non-nil if CH is Tibetan. Otherwise, returns nil."
  (memq (char-charset ch) '(tibetan tibetan-1-column)))

;;; Functions for Tibetan <-> Tibetan-transcription.

;;;###autoload
(defun tibetan-tibetan-to-transcription (str)
  "Transcribe Tibetan string STR and return the corresponding Roman string."
  (let (;; Accumulate transcriptions here in reverse order.
	(trans nil)
	(len (length str))
	(i 0)
	ch this-trans)
    (while (< i len)
      (let ((idx (string-match tibetan-precomposition-rule-regexp str i)))
	(if (eq idx i)
	    ;; Ith character and the followings matches precomposable
	    ;; Tibetan sequence.
	    (setq i (match-end 0)
		  this-trans
		  (car (rassoc
			(cdr (assoc (match-string 0 str)
				    tibetan-precomposition-rule-alist))
			tibetan-precomposed-transcription-alist)))
	  (setq ch (substring str i (1+ i))
		i (1+ i)
		this-trans
		(car (or (rassoc ch tibetan-consonant-transcription-alist)
			 (rassoc ch tibetan-vowel-transcription-alist)
			 (rassoc ch tibetan-subjoined-transcription-alist)))))
	(setq trans (cons this-trans trans))))
    (apply 'concat (nreverse trans))))

;;;###autoload
(defun tibetan-transcription-to-tibetan (str)
  "Convert Tibetan Roman string STR to Tibetan character string.
The returned string has no composition information."
  (let (;; Case is significant.
	(case-fold-search nil)
	(idx 0)
	;; Accumulate Tibetan strings here in reverse order.
	(t-str-list nil)
	i subtrans)
    (while (setq i (string-match tibetan-regexp str idx))
      (if (< idx i)
	  ;; STR contains a pattern that doesn't match Tibetan
	  ;; transcription.  Include the pattern as is.
	  (setq t-str-list (cons (substring str idx i) t-str-list)))
      (setq subtrans (match-string 0 str)
	    idx (match-end 0))
      (let ((t-char (cdr (assoc subtrans
				tibetan-precomposed-transcription-alist))))
	(if t-char
	    ;; SUBTRANS corresponds to a transcription for
	    ;; precomposable Tibetan sequence.
	    (setq t-char (car (rassoc t-char
				      tibetan-precomposition-rule-alist)))
	  (setq t-char
		(cdr
		 (or (assoc subtrans tibetan-consonant-transcription-alist)
		     (assoc subtrans tibetan-vowel-transcription-alist)
		     (assoc subtrans tibetan-modifier-transcription-alist)
		     (assoc subtrans tibetan-subjoined-transcription-alist)))))
	(setq t-str-list (cons t-char t-str-list))))
    (if (< idx (length str))
	(setq t-str-list (cons (substring str idx) t-str-list)))
    (apply 'concat (nreverse t-str-list))))

;;;
;;; Functions for composing/decomposing Tibetan sequence.
;;;
;;; A Tibetan syllable is typically structured as follows:
;;;
;;;      [Prefix] C [C+] V [M] [Suffix [Post suffix]]
;;;
;;; where C's are all vertically stacked, V appears below or above
;;; consonant cluster and M is always put above the C[C+]V combination.
;;; (Sanskrit visarga, though it is a vowel modifier, is considered
;;;  to be a punctuation.)
;;;
;;; Here are examples of the words "bsgrubs" and "hfauM"
;;;
;;;            བསྒྲུབས            ཧཱུཾ
;;;
;;;                             M
;;;             b s b s         h
;;;               g             fa
;;;               r             u
;;;               u
;;;
;;; Consonants `'' (འ), `w' (ཝ), `y' (ཡ), `r' (ར) take special
;;; forms when they are used as subjoined consonant.  Consonant `r'
;;; takes another special form when used as superjoined in such a case
;;; as "rka", while it does not change its form when conjoined with
;;; subjoined `'', `w' or `y' as in "rwa", "rya".

;; Append a proper composition rule and glyph to COMPONENTS to compose
;; CHAR with a composition that has COMPONENTS.

(defun tibetan-add-components (components char)
  (let ((last (last components))
	(stack-upper '(tc . bc))
	(stack-under '(bc . tc))
	rule comp-vowel tmp)
    ;; Special treatment for 'a chung.
    ;; If 'a follows a consonant, turn it into the subjoined form.
    ;; * Disabled by Tomabechi 2000/06/09 *
    ;; Because in Unicode, འ may follow directly a consonant without
    ;; any intervening vowel, as in མཁ<E0BD98><E0BD81><EFBFBD><EFBFBD>འ་=མ ཁ འ not མ ཁ <20><><EFBFBD><EFBFBD> འ
    ;;(if (and (= char ?འ)
    ;;	     (aref (char-category-set (car last)) ?0))
    ;;	(setq char ?ཱ)) ;; modified for new font by Tomabechi 1999/12/10

    ;; Composite vowel signs are decomposed before being added
    ;; Added by Tomabechi 2000/06/08
    (if (memq char '(?ཱི ?ཱུ ?ྲྀ ?ཷ ?ླྀ ?ཹ ?ཱྀ))
	(setq comp-vowel
	      (copy-sequence
	       (cddr (assoc (char-to-string char)
			    tibetan-composite-vowel-alist)))
	      char
	      (cadr (assoc (char-to-string char)
			   tibetan-composite-vowel-alist))))
    (cond
     ;; Compose upper vowel sign vertically over.
     ((aref (char-category-set char) ?2)
      (setq rule stack-upper))

     ;; Compose lower vowel sign vertically under.
     ((aref (char-category-set char) ?3)
      (if (or (eq char ?<3F><EFBFBD><EFBFBD><EFBFBD>) ;; `<60><><EFBFBD><EFBFBD>' and `཰' should not visible when composed.
	      (eq char #xF70))
	  (setq rule nil)
	(setq rule stack-under)))
     ;; Transform ra-mgo (superscribed r) if followed by a subjoined
     ;; consonant other than w, ', y, r.
     ((and (= (car last) ?ར)
	   (not (memq char '(?ྭ ?ཱ ?ྱ ?ྲ))))
      (setcar last ?<3F><EFBFBD><EFBFBD><EFBFBD>) ;; modified for newfont by Tomabechi 1999/12/10
      (setq rule stack-under))
     ;; Transform initial base consonant if followed by a subjoined
     ;; consonant but 'a.
     (t
      (let ((laststr (char-to-string (car last))))
	(if (and (/= char ?ཱ) ;; modified for new font by Tomabechi
		 (string-match "[ཀ-ཛྷཞཟལ-ཀྵཪ]" laststr))
	    (setcar last (string-to-char
			  (cdr (assoc (char-to-string (car last))
				      tibetan-base-to-subjoined-alist)))))
	(setq rule stack-under))))

    (if rule
	(setcdr last (list rule char)))
    ;; Added by Tomabechi 2000/06/08
    (if comp-vowel
	(nconc last comp-vowel))
    ))

;;;###autoload
(defun tibetan-compose-string (str)
  "Compose Tibetan string STR."
  (let ((idx 0))
    ;; `འ' is included in the pattern for subjoined consonants
    ;; because we treat it specially in tibetan-add-components.
    ;; (This feature is removed by Tomabechi 2000/06/08)
    (while (setq idx (string-match tibetan-composable-pattern str idx))
      (let ((from idx)
	    (to (match-end 0))
	    components)
	(if (eq (string-match tibetan-precomposition-rule-regexp str idx) idx)
	    (setq idx (match-end 0)
		  components
		  (list (string-to-char
			 (cdr
			  (assoc (match-string 0 str)
				 tibetan-precomposition-rule-alist)))))
	  (setq components (list (aref str idx))
		idx (1+ idx)))
	(while (< idx to)
	  (tibetan-add-components components (aref str idx))
	  (setq idx (1+ idx)))
	(compose-string str from to components))))
  str)

;;;###autoload
(defun tibetan-compose-region (beg end)
  "Compose Tibetan text the region BEG and END."
  (interactive "r")
  (let (str result chars)
    (save-excursion
      (save-restriction
	(narrow-to-region beg end)
	(goto-char (point-min))
	;; `འ' is included in the pattern for subjoined consonants
	;; because we treat it specially in tibetan-add-components.
	;; (This feature is removed by Tomabechi 2000/06/08)
	(while (re-search-forward tibetan-composable-pattern nil t)
	  (let ((from (match-beginning 0))
		(to (match-end 0))
		components)
	    (goto-char from)
	    (if (looking-at tibetan-precomposition-rule-regexp)
		(progn
		  (setq components
			(list (string-to-char
			       (cdr
				(assoc (match-string 0)
				       tibetan-precomposition-rule-alist)))))
		  (goto-char (match-end 0)))
	      (setq components (list (char-after from)))
	      (forward-char 1))
	    (while (< (point) to)
	      (tibetan-add-components components (following-char))
	      (forward-char 1))
	    (compose-region from to components)))))))

(defvar tibetan-decompose-precomposition-alist
  (mapcar (function (lambda (x) (cons (string-to-char (cdr x)) (car x))))
	  tibetan-precomposition-rule-alist))

;;;###autoload
(defun tibetan-decompose-region (from to)
  "Decompose Tibetan text in the region FROM and TO.
This is different from decompose-region because precomposed Tibetan characters
are decomposed into normal Tibetan character sequences."
  (interactive "r")
  (save-restriction
    (narrow-to-region from to)
    (decompose-region from to)
    (goto-char from)
    (while (not (eobp))
      (let* ((char (following-char))
	     (slot (assq char tibetan-decompose-precomposition-alist)))
	(if slot
	    (progn
	      (delete-char 1)
	      (insert (cdr slot)))
	  (forward-char 1))))))


;;;###autoload
(defun tibetan-decompose-string (str)
  "Decompose Tibetan string STR.
This is different from decompose-string because precomposed Tibetan characters
are decomposed into normal Tibetan character sequences."
  (let ((new "")
	(len (length str))
	(idx 0)
	char slot)
    (while (< idx len)
      (setq char (aref str idx)
	    slot (assq (aref str idx) tibetan-decompose-precomposition-alist)
	    new (concat new (if slot (cdr slot) (char-to-string char)))
	    idx (1+ idx)))
    new))

;;;
;;; This variable is used to avoid repeated decomposition.
;;;
(setq-default tibetan-decomposed nil)

;;;###autoload
(defun tibetan-decompose-buffer ()
  "Decomposes Tibetan characters in the buffer into their components.
See also the documentation of the function `tibetan-decompose-region'."
  (interactive)
  (make-local-variable 'tibetan-decomposed)
  (cond ((not tibetan-decomposed)
	 (tibetan-decompose-region (point-min) (point-max))
	 (setq tibetan-decomposed t))))

;;;###autoload
(defun tibetan-compose-buffer ()
  "Composes Tibetan character components in the buffer.
See also docstring of the function tibetan-compose-region."
  (interactive)
  (make-local-variable 'tibetan-decomposed)
  (tibetan-compose-region (point-min) (point-max))
  (setq tibetan-decomposed nil))

;;;###autoload
(defun tibetan-post-read-conversion (len)
  (save-excursion
    (save-restriction
      (let ((buffer-modified-p (buffer-modified-p)))
	(narrow-to-region (point) (+ (point) len))
	(tibetan-compose-region (point-min) (point-max))
	(set-buffer-modified-p buffer-modified-p)
	(make-local-variable 'tibetan-decomposed)
	(setq tibetan-decomposed nil)
	(- (point-max) (point-min))))))


;;;###autoload
(defun tibetan-pre-write-conversion (from to)
  (setq tibetan-decomposed-temp tibetan-decomposed)
  (let ((old-buf (current-buffer)))
    (set-buffer (generate-new-buffer " *temp*"))
    (if (stringp from)
	(insert from)
      (insert-buffer-substring old-buf from to))
    (if (not tibetan-decomposed-temp)
	(tibetan-decompose-region (point-min) (point-max)))
    ;; Should return nil as annotations.
    nil))


;;;
;;; Unicode-related definitions.
;;;

(defvar tibetan-canonicalize-for-unicode-alist
  '(("<EFBFBD><EFBFBD><EFBFBD><EFBFBD>" . "")	;; remove vowel a
    ("ཱི" . "ཱི") ;; decompose vowels whose use is ``discouraged'' in Unicode 3.0
    ("ཱུ" . "ཱུ")
    ("ྲྀ" . "ྲྀ")
    ("ཷ" . "ྲཱྀ")
    ("ླྀ" . "ླྀ")
    ("ཹ" . "ླཱྀ")
    ("ཱྀ" . "ཱྀ"))
  "Rules for canonicalizing Tibetan vowels for Unicode.")

(defvar tibetan-canonicalize-for-unicode-regexp
  "[<5B><><EFBFBD><EFBFBD>ཱཱིུྲྀཷླྀཹཱྀ]"
  "Regexp for Tibetan vowels to be canonicalized in Unicode.")

(defun tibetan-canonicalize-for-unicode-region (from to)
  (save-restriction
    (narrow-to-region from to)
    (goto-char from)
    (while (re-search-forward tibetan-canonicalize-for-unicode-regexp nil t)
      (let (
	    ;;(from (match-beginning 0))
	    ;;(to (match-end 0))
	    (canonical-form
	     (cdr (assoc (match-string 0)
			 tibetan-canonicalize-for-unicode-alist))))
	;;(goto-char from)
	;;(delete-region from to)
	;;(insert canonical-form)
	(replace-match canonical-form)
	))))

(defvar tibetan-strict-unicode t
  "Flag to control Tibetan canonicalizing for Unicode.

If non-nil, the vowel a is removed and composite vowels are decomposed
before writing buffer in Unicode.  See also
`tibetan-canonicalize-for-unicode-regexp' and
`tibetan-canonicalize-for-unicode-alist'.")

;;;###autoload
(defun tibetan-pre-write-canonicalize-for-unicode (from to)
  (let ((old-buf (current-buffer))
	(strict-unicode tibetan-strict-unicode))
    (set-buffer (generate-new-buffer " *temp*"))
    (if (stringp from)
	(insert from)
      (insert-buffer-substring old-buf from to))
    (if strict-unicode
	(tibetan-canonicalize-for-unicode-region (point-min) (point-max)))
    ;; Should return nil as annotations.
    nil))

(provide 'tibet-util)

;;; tibet-util.el ends here
-												Use UTF-8 for most files with non-ASCII characters.

* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit.  Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.

Fixes: debbugs:13936

											
										
										
											2013-04-01 18:18:40 -07:00
+								;;; tibet-util.el --- utilities for Tibetan   -*- coding: utf-8-emacs; -*-
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
-												Update copyright notices for 2013.

											
										
										
											2013-01-01 09:11:05 +00:00
+								;; Copyright (C) 1997, 2001-2013 Free Software Foundation, Inc.
-												Update AIST copyright years.

											
										
										
											2006-12-13 01:13:58 +00:00
+								;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
-												Add 2011 to FSF/AIST copyright years.

											
										
										
											2011-01-02 15:50:46 -08:00
+								;;   2005, 2006, 2007, 2008, 2009, 2010, 2011
-												Fix copyrights.

											
										
										
											2005-05-16 07:03:28 +00:00
+								;;   National Institute of Advanced Industrial Science and Technology (AIST)
 								;;   Registration Number H14PRO021
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
-												Header format fix.

											
										
										
											2008-12-19 05:29:04 +00:00
+								;; Author: Toru TOMABECHI <Toru.Tomabechi@orient.unil.ch>
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
+								;; Keywords: multilingual, Tibetan
-												Header format fix.

											
										
										
											2008-12-19 05:29:04 +00:00
+								;; Created: Feb. 17. 1997
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
 								;; This file is part of GNU Emacs.
-												Switch to recommended form of GPLv3 permissions notice.

											
										
										
											2008-05-06 04:29:13 +00:00
+								;; GNU Emacs is free software: you can redistribute it and/or modify
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
+								;; it under the terms of the GNU General Public License as published by
-												Switch to recommended form of GPLv3 permissions notice.

											
										
										
											2008-05-06 04:29:13 +00:00
+								;; the Free Software Foundation, either version 3 of the License, or
 								;; (at your option) any later version.
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
 								;; GNU Emacs is distributed in the hope that it will be useful,
 								;; but WITHOUT ANY WARRANTY; without even the implied warranty of
 								;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 								;; GNU General Public License for more details.
 								;; You should have received a copy of the GNU General Public License
-												Switch to recommended form of GPLv3 permissions notice.

											
										
										
											2008-05-06 04:29:13 +00:00
+								;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
-												Some fixes to follow coding conventions.

											
										
										
											2001-07-16 12:23:00 +00:00
+								;;; History:
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
+								;; 1997.03.13 Modification in treatment of text properties;
-												Spelling fixes.

											
										
										
											2011-11-24 23:14:48 -08:00
+								;;            Support for some special signs and punctuation.
-												Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.

											
										
										
											1999-12-15 00:50:18 +00:00
+								;; 1999.10.25 Modification for a new composition way by K.Handa.
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
-												Some fixes to follow coding conventions.

											
										
										
											2001-07-16 12:23:00 +00:00
+								;;; Commentary:
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
+								;;; Code:
-												(tibetan-obsolete-glyphs): From tibetan.el.

											
										
										
											2002-04-30 17:24:10 +00:00
+								(defconst tibetan-obsolete-glyphs
-												Use UTF-8 for most files with non-ASCII characters.

* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit.  Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.

Fixes: debbugs:13936

											
										
										
											2013-04-01 18:18:40 -07:00
+								  `(("།" . "།")			; 2 col <-> 1 col
 								    ("༏" . "༏")
 								    ("༐" . "༐")
 								    ("༑" . "༑")
 								    ("ཿ" . "ཿ")
 								    ("་" . "་")
 								    ("༔" . "༔")
-												(tibetan-obsolete-glyphs): From tibetan.el.

											
										
										
											2002-04-30 17:24:10 +00:00
+								    ;; Yes these are dirty. But ...
-												Use UTF-8 for most files with non-ASCII characters.

* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit.  Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.

Fixes: debbugs:13936

											
										
										
											2013-04-01 18:18:40 -07:00
+								    ("༎ ༎" . ,(compose-string "༎ ༎" 0 3 [?༎ (Br . Bl) ?  (Br . Bl) ?༎]))
 								    ("༄༅༅" . ,(compose-string
 										  "࿁࿂࿂࿂" 0 4
 										  [?࿁ (Br . Bl) ?࿂ (Br . Bl) ?࿂ (Br . Bl) ?࿂]))
 								    ("༄༅" . ,(compose-string "࿁࿂࿂" 0 3 [?࿁ (Br . Bl) ?࿂ (Br . Bl) ?࿂]))
 								    ("༆" . ,(compose-string "࿁࿂༙" 0 3 [?࿁ (Br . Bl) ?࿂ (br . tr) ?༙]))
 								    ("༄"   . ,(compose-string "࿁࿂" 0 2 [?࿁ (Br . Bl) ?࿂]))))
-												(tibetan-obsolete-glyphs): From tibetan.el.

											
										
										
											2002-04-30 17:24:10 +00:00
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
+								;;;###autoload
-												Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.

											
										
										
											1999-12-15 00:50:18 +00:00
+								(defun tibetan-char-p (ch)
 								  "Check if char CH is Tibetan character.
 								Returns non-nil if CH is Tibetan. Otherwise, returns nil."
 								  (memq (char-charset ch) '(tibetan tibetan-1-column)))
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
-												Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.

											
										
										
											1999-12-15 00:50:18 +00:00
+								;;; Functions for Tibetan <-> Tibetan-transcription.
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
 								;;;###autoload
-												Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.

											
										
										
											1999-12-15 00:50:18 +00:00
+								(defun tibetan-tibetan-to-transcription (str)
 								  "Transcribe Tibetan string STR and return the corresponding Roman string."
 								  (let (;; Accumulate transcriptions here in reverse order.
 									(trans nil)
 									(len (length str))
 									(i 0)
 									ch this-trans)
 								    (while (< i len)
-												*** empty log message ***

											
										
										
											2000-06-21 02:05:10 +00:00
+								      (let ((idx (string-match tibetan-precomposition-rule-regexp str i)))
-												Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.

											
										
										
											1999-12-15 00:50:18 +00:00
+									(if (eq idx i)
 									    ;; Ith character and the followings matches precomposable
 									    ;; Tibetan sequence.
 									    (setq i (match-end 0)
 										  this-trans
 										  (car (rassoc
 											(cdr (assoc (match-string 0 str)
 												    tibetan-precomposition-rule-alist))
 											tibetan-precomposed-transcription-alist)))
 									  (setq ch (substring str i (1+ i))
 										i (1+ i)
 										this-trans
 										(car (or (rassoc ch tibetan-consonant-transcription-alist)
 											 (rassoc ch tibetan-vowel-transcription-alist)
 											 (rassoc ch tibetan-subjoined-transcription-alist)))))
 									(setq trans (cons this-trans trans))))
 								    (apply 'concat (nreverse trans))))
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
-												Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.

											
										
										
											1999-12-15 00:50:18 +00:00
+								;;;###autoload
 								(defun tibetan-transcription-to-tibetan (str)
 								  "Convert Tibetan Roman string STR to Tibetan character string.
 								The returned string has no composition information."
 								  (let (;; Case is significant.
 									(case-fold-search nil)
 									(idx 0)
 									;; Accumulate Tibetan strings here in reverse order.
 									(t-str-list nil)
 									i subtrans)
 								    (while (setq i (string-match tibetan-regexp str idx))
 								      (if (< idx i)
 									  ;; STR contains a pattern that doesn't match Tibetan
 									  ;; transcription.  Include the pattern as is.
 									  (setq t-str-list (cons (substring str idx i) t-str-list)))
 								      (setq subtrans (match-string 0 str)
 									    idx (match-end 0))
 								      (let ((t-char (cdr (assoc subtrans
 												tibetan-precomposed-transcription-alist))))
 									(if t-char
 									    ;; SUBTRANS corresponds to a transcription for
 									    ;; precomposable Tibetan sequence.
 									    (setq t-char (car (rassoc t-char
 												      tibetan-precomposition-rule-alist)))
 									  (setq t-char
 										(cdr
 										 (or (assoc subtrans tibetan-consonant-transcription-alist)
 										     (assoc subtrans tibetan-vowel-transcription-alist)
 										     (assoc subtrans tibetan-modifier-transcription-alist)
 										     (assoc subtrans tibetan-subjoined-transcription-alist)))))
 									(setq t-str-list (cons t-char t-str-list))))
 								    (if (< idx (length str))
 									(setq t-str-list (cons (substring str idx) t-str-list)))
 								    (apply 'concat (nreverse t-str-list))))
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
 								;;;
-												Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.

											
										
										
											1999-12-15 00:50:18 +00:00
+								;;; Functions for composing/decomposing Tibetan sequence.
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
+								;;;
 								;;; A Tibetan syllable is typically structured as follows:
 								;;;
 								;;;      [Prefix] C [C+] V [M] [Suffix [Post suffix]]
 								;;;
 								;;; where C's are all vertically stacked, V appears below or above
 								;;; consonant cluster and M is always put above the C[C+]V combination.
 								;;; (Sanskrit visarga, though it is a vowel modifier, is considered
 								;;;  to be a punctuation.)
 								;;;
-												*** empty log message ***

											
										
										
											2000-06-21 02:05:10 +00:00
+								;;; Here are examples of the words "bsgrubs" and "hfauM"
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
+								;;;
-												Use UTF-8 for most files with non-ASCII characters.

* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit.  Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.

Fixes: debbugs:13936

											
										
										
											2013-04-01 18:18:40 -07:00
+								;;;            བསྒྲུབས            ཧཱུཾ
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
+								;;;
 								;;;                             M
 								;;;             b s b s         h
-												*** empty log message ***

											
										
										
											2000-06-21 02:05:10 +00:00
+								;;;               g             fa
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
+								;;;               r             u
 								;;;               u
 								;;;
-												Use UTF-8 for most files with non-ASCII characters.

* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit.  Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.

Fixes: debbugs:13936

											
										
										
											2013-04-01 18:18:40 -07:00
+								;;; Consonants `'' (འ), `w' (ཝ), `y' (ཡ), `r' (ར) take special
-												Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.

											
										
										
											1999-12-15 00:50:18 +00:00
+								;;; forms when they are used as subjoined consonant.  Consonant `r'
 								;;; takes another special form when used as superjoined in such a case
 								;;; as "rka", while it does not change its form when conjoined with
 								;;; subjoined `'', `w' or `y' as in "rwa", "rya".
 								;; Append a proper composition rule and glyph to COMPONENTS to compose
 								;; CHAR with a composition that has COMPONENTS.
 								(defun tibetan-add-components (components char)
 								  (let ((last (last components))
 									(stack-upper '(tc . bc))
 									(stack-under '(bc . tc))
-												Convert all tibetan-1-column characters
to the corresponding tibetan characters.
(tibetan-add-components): Delete code for the special treatment of
'a chung.

											
										
										
											2000-06-12 06:11:56 +00:00
+									rule comp-vowel tmp)
-												Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.

											
										
										
											1999-12-15 00:50:18 +00:00
+								    ;; Special treatment for 'a chung.
 								    ;; If 'a follows a consonant, turn it into the subjoined form.
-												Convert all tibetan-1-column characters
to the corresponding tibetan characters.
(tibetan-add-components): Delete code for the special treatment of
'a chung.

											
										
										
											2000-06-12 06:11:56 +00:00
+								    ;; * Disabled by Tomabechi 2000/06/09 *
-												Use UTF-8 for most files with non-ASCII characters.

* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit.  Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.

Fixes: debbugs:13936

											
										
										
											2013-04-01 18:18:40 -07:00
+								    ;; Because in Unicode, འ may follow directly a consonant without
 								    ;; any intervening vowel, as in མཁ<E0BD98><E0BD81><EFBFBD><EFBFBD>འ་=མ ཁ འ not མ ཁ <20><><EFBFBD><EFBFBD> འ
 								    ;;(if (and (= char ?འ)
-												Convert all tibetan-1-column characters
to the corresponding tibetan characters.
(tibetan-add-components): Delete code for the special treatment of
'a chung.

											
										
										
											2000-06-12 06:11:56 +00:00
+								    ;;	     (aref (char-category-set (car last)) ?0))
-												Use UTF-8 for most files with non-ASCII characters.

* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit.  Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.

Fixes: debbugs:13936

											
										
										
											2013-04-01 18:18:40 -07:00
+								    ;;	(setq char ?ཱ)) ;; modified for new font by Tomabechi 1999/12/10
-												Convert all tibetan-1-column characters
to the corresponding tibetan characters.
(tibetan-add-components): Delete code for the special treatment of
'a chung.

											
										
										
											2000-06-12 06:11:56 +00:00
 								    ;; Composite vowel signs are decomposed before being added
 								    ;; Added by Tomabechi 2000/06/08
-												Use UTF-8 for most files with non-ASCII characters.

* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit.  Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.

Fixes: debbugs:13936

											
										
										
											2013-04-01 18:18:40 -07:00
+								    (if (memq char '(?ཱི ?ཱུ ?ྲྀ ?ཷ ?ླྀ ?ཹ ?ཱྀ))
-												Convert all tibetan-1-column characters
to the corresponding tibetan characters.
(tibetan-add-components): Delete code for the special treatment of
'a chung.

											
										
										
											2000-06-12 06:11:56 +00:00
+									(setq comp-vowel
-												*** empty log message ***

											
										
										
											2000-06-21 02:05:10 +00:00
+									      (copy-sequence
 									       (cddr (assoc (char-to-string char)
 											    tibetan-composite-vowel-alist)))
-												Convert all tibetan-1-column characters
to the corresponding tibetan characters.
(tibetan-add-components): Delete code for the special treatment of
'a chung.

											
										
										
											2000-06-12 06:11:56 +00:00
+									      char
 									      (cadr (assoc (char-to-string char)
 											   tibetan-composite-vowel-alist))))
-												Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.

											
										
										
											1999-12-15 00:50:18 +00:00
+								    (cond
 								     ;; Compose upper vowel sign vertically over.
 								     ((aref (char-category-set char) ?2)
 								      (setq rule stack-upper))
 								     ;; Compose lower vowel sign vertically under.
 								     ((aref (char-category-set char) ?3)
-												Use UTF-8 for most files with non-ASCII characters.

* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit.  Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.

Fixes: debbugs:13936

											
										
										
											2013-04-01 18:18:40 -07:00
+								      (if (or (eq char ?<3F><EFBFBD><EFBFBD><EFBFBD>) ;; `<60><><EFBFBD><EFBFBD>' and `཰' should not visible when composed.
-												*** empty log message ***

											
										
										
											2003-09-08 12:53:41 +00:00
+									      (eq char #xF70))
-												(tibetan-add-components): Fixes for new
encoding of Tibetan characters.
(tibetan-decompose-precomposition-alist): New variable.
(tibetan-decompose-region): Convert precomposed characters to
non-precomposed characters.
(tibetan-decompose-string): Likewise.
(tibetan-composition-function): Fix args to
thibetan-compose-string.

											
										
										
											2000-06-01 10:59:56 +00:00
+									  (setq rule nil)
 									(setq rule stack-under)))
-												Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.

											
										
										
											1999-12-15 00:50:18 +00:00
+								     ;; Transform ra-mgo (superscribed r) if followed by a subjoined
 								     ;; consonant other than w, ', y, r.
-												Use UTF-8 for most files with non-ASCII characters.

* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit.  Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.

Fixes: debbugs:13936

											
										
										
											2013-04-01 18:18:40 -07:00
+								     ((and (= (car last) ?ར)
 									   (not (memq char '(?ྭ ?ཱ ?ྱ ?ྲ))))
 								      (setcar last ?<3F><EFBFBD><EFBFBD><EFBFBD>) ;; modified for newfont by Tomabechi 1999/12/10
-												Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.

											
										
										
											1999-12-15 00:50:18 +00:00
+								      (setq rule stack-under))
 								     ;; Transform initial base consonant if followed by a subjoined
 								     ;; consonant but 'a.
 								     (t
 								      (let ((laststr (char-to-string (car last))))
-												Use UTF-8 for most files with non-ASCII characters.

* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit.  Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.

Fixes: debbugs:13936

											
										
										
											2013-04-01 18:18:40 -07:00
+									(if (and (/= char ?ཱ) ;; modified for new font by Tomabechi
 										 (string-match "[ཀ-ཛྷཞཟལ-ཀྵཪ]" laststr))
-												Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.

											
										
										
											1999-12-15 00:50:18 +00:00
+									    (setcar last (string-to-char
 											  (cdr (assoc (char-to-string (car last))
 												      tibetan-base-to-subjoined-alist)))))
 									(setq rule stack-under))))
-												(tibetan-add-components): Fixes for new
encoding of Tibetan characters.
(tibetan-decompose-precomposition-alist): New variable.
(tibetan-decompose-region): Convert precomposed characters to
non-precomposed characters.
(tibetan-decompose-string): Likewise.
(tibetan-composition-function): Fix args to
thibetan-compose-string.

											
										
										
											2000-06-01 10:59:56 +00:00
+								    (if rule
-												Convert all tibetan-1-column characters
to the corresponding tibetan characters.
(tibetan-add-components): Delete code for the special treatment of
'a chung.

											
										
										
											2000-06-12 06:11:56 +00:00
+									(setcdr last (list rule char)))
 								    ;; Added by Tomabechi 2000/06/08
 								    (if comp-vowel
 									(nconc last comp-vowel))
 								    ))
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
 								;;;###autoload
 								(defun tibetan-compose-string (str)
-												Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.

											
										
										
											1999-12-15 00:50:18 +00:00
+								  "Compose Tibetan string STR."
 								  (let ((idx 0))
-												Use UTF-8 for most files with non-ASCII characters.

* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit.  Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.

Fixes: debbugs:13936

											
										
										
											2013-04-01 18:18:40 -07:00
+								    ;; `འ' is included in the pattern for subjoined consonants
-												Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.

											
										
										
											1999-12-15 00:50:18 +00:00
+								    ;; because we treat it specially in tibetan-add-components.
-												Convert all tibetan-1-column characters
to the corresponding tibetan characters.
(tibetan-add-components): Delete code for the special treatment of
'a chung.

											
										
										
											2000-06-12 06:11:56 +00:00
+								    ;; (This feature is removed by Tomabechi 2000/06/08)
-												Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.

											
										
										
											1999-12-15 00:50:18 +00:00
+								    (while (setq idx (string-match tibetan-composable-pattern str idx))
 								      (let ((from idx)
 									    (to (match-end 0))
 									    components)
 									(if (eq (string-match tibetan-precomposition-rule-regexp str idx) idx)
 									    (setq idx (match-end 0)
 										  components
 										  (list (string-to-char
 											 (cdr
 											  (assoc (match-string 0 str)
 												 tibetan-precomposition-rule-alist)))))
 									  (setq components (list (aref str idx))
 										idx (1+ idx)))
 									(while (< idx to)
 									  (tibetan-add-components components (aref str idx))
 									  (setq idx (1+ idx)))
 									(compose-string str from to components))))
 								  str)
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
 								;;;###autoload
-												Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.

											
										
										
											1999-12-15 00:50:18 +00:00
+								(defun tibetan-compose-region (beg end)
 								  "Compose Tibetan text the region BEG and END."
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
+								  (interactive "r")
-												Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.

											
										
										
											1999-12-15 00:50:18 +00:00
+								  (let (str result chars)
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
+								    (save-excursion
 								      (save-restriction
 									(narrow-to-region beg end)
 									(goto-char (point-min))
-												Use UTF-8 for most files with non-ASCII characters.

* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit.  Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.

Fixes: debbugs:13936

											
										
										
											2013-04-01 18:18:40 -07:00
+									;; `འ' is included in the pattern for subjoined consonants
-												Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.

											
										
										
											1999-12-15 00:50:18 +00:00
+									;; because we treat it specially in tibetan-add-components.
-												Convert all tibetan-1-column characters
to the corresponding tibetan characters.
(tibetan-add-components): Delete code for the special treatment of
'a chung.

											
										
										
											2000-06-12 06:11:56 +00:00
+									;; (This feature is removed by Tomabechi 2000/06/08)
-												Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.

											
										
										
											1999-12-15 00:50:18 +00:00
+									(while (re-search-forward tibetan-composable-pattern nil t)
 									  (let ((from (match-beginning 0))
 										(to (match-end 0))
 										components)
 									    (goto-char from)
 									    (if (looking-at tibetan-precomposition-rule-regexp)
 										(progn
 										  (setq components
 											(list (string-to-char
 											       (cdr
 												(assoc (match-string 0)
 												       tibetan-precomposition-rule-alist)))))
 										  (goto-char (match-end 0)))
 									      (setq components (list (char-after from)))
 									      (forward-char 1))
 									    (while (< (point) to)
 									      (tibetan-add-components components (following-char))
 									      (forward-char 1))
 									    (compose-region from to components)))))))
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
-												(tibetan-add-components): Fixes for new
encoding of Tibetan characters.
(tibetan-decompose-precomposition-alist): New variable.
(tibetan-decompose-region): Convert precomposed characters to
non-precomposed characters.
(tibetan-decompose-string): Likewise.
(tibetan-composition-function): Fix args to
thibetan-compose-string.

											
										
										
											2000-06-01 10:59:56 +00:00
+								(defvar tibetan-decompose-precomposition-alist
 								  (mapcar (function (lambda (x) (cons (string-to-char (cdr x)) (car x))))
 									  tibetan-precomposition-rule-alist))
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
+								;;;###autoload
-												(tibetan-add-components): Fixes for new
encoding of Tibetan characters.
(tibetan-decompose-precomposition-alist): New variable.
(tibetan-decompose-region): Convert precomposed characters to
non-precomposed characters.
(tibetan-decompose-string): Likewise.
(tibetan-composition-function): Fix args to
thibetan-compose-string.

											
										
										
											2000-06-01 10:59:56 +00:00
+								(defun tibetan-decompose-region (from to)
 								  "Decompose Tibetan text in the region FROM and TO.
 								This is different from decompose-region because precomposed Tibetan characters
-												(tibetan-decompose-region)
(tibetan-decompose-string): Fix typo in docstring.

											
										
										
											2002-01-19 19:15:11 +00:00
+								are decomposed into normal Tibetan character sequences."
-												(tibetan-add-components): Fixes for new
encoding of Tibetan characters.
(tibetan-decompose-precomposition-alist): New variable.
(tibetan-decompose-region): Convert precomposed characters to
non-precomposed characters.
(tibetan-decompose-string): Likewise.
(tibetan-composition-function): Fix args to
thibetan-compose-string.

											
										
										
											2000-06-01 10:59:56 +00:00
+								  (interactive "r")
 								  (save-restriction
 								    (narrow-to-region from to)
 								    (decompose-region from to)
 								    (goto-char from)
 								    (while (not (eobp))
 								      (let* ((char (following-char))
 									     (slot (assq char tibetan-decompose-precomposition-alist)))
 									(if slot
 									    (progn
 									      (delete-char 1)
 									      (insert (cdr slot)))
 									  (forward-char 1))))))
-												Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.

											
										
										
											1999-12-15 00:50:18 +00:00
+								;;;###autoload
-												(tibetan-add-components): Fixes for new
encoding of Tibetan characters.
(tibetan-decompose-precomposition-alist): New variable.
(tibetan-decompose-region): Convert precomposed characters to
non-precomposed characters.
(tibetan-decompose-string): Likewise.
(tibetan-composition-function): Fix args to
thibetan-compose-string.

											
										
										
											2000-06-01 10:59:56 +00:00
+								(defun tibetan-decompose-string (str)
 								  "Decompose Tibetan string STR.
 								This is different from decompose-string because precomposed Tibetan characters
-												(tibetan-decompose-region)
(tibetan-decompose-string): Fix typo in docstring.

											
										
										
											2002-01-19 19:15:11 +00:00
+								are decomposed into normal Tibetan character sequences."
-												(tibetan-add-components): Fixes for new
encoding of Tibetan characters.
(tibetan-decompose-precomposition-alist): New variable.
(tibetan-decompose-region): Convert precomposed characters to
non-precomposed characters.
(tibetan-decompose-string): Likewise.
(tibetan-composition-function): Fix args to
thibetan-compose-string.

											
										
										
											2000-06-01 10:59:56 +00:00
+								  (let ((new "")
 									(len (length str))
 									(idx 0)
 									char slot)
 								    (while (< idx len)
 								      (setq char (aref str idx)
 									    slot (assq (aref str idx) tibetan-decompose-precomposition-alist)
 									    new (concat new (if slot (cdr slot) (char-to-string char)))
 									    idx (1+ idx)))
 								    new))
-												Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.

											
										
										
											1999-12-15 00:50:18 +00:00
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
+								;;;
 								;;; This variable is used to avoid repeated decomposition.
 								;;;
 								(setq-default tibetan-decomposed nil)
 								;;;###autoload
 								(defun tibetan-decompose-buffer ()
 								  "Decomposes Tibetan characters in the buffer into their components.
-												Most functions rewritten.
(tibetan-char-p): Renamed from tibetan-char-examin.
(tibetan-composable-examin) (tibetan-complete-char-examin)
(tibetan-vertical-stacking) (tibetan-composition): Deleted.
(tibetan-add-components): New function.
(tibetan-composition-function): New function.

											
										
										
											1999-12-15 00:50:18 +00:00
+								See also the documentation of the function `tibetan-decompose-region'."
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
+								  (interactive)
 								  (make-local-variable 'tibetan-decomposed)
 								  (cond ((not tibetan-decomposed)
 									 (tibetan-decompose-region (point-min) (point-max))
 									 (setq tibetan-decomposed t))))
 								;;;###autoload
 								(defun tibetan-compose-buffer ()
 								  "Composes Tibetan character components in the buffer.
 								See also docstring of the function tibetan-compose-region."
 								  (interactive)
 								  (make-local-variable 'tibetan-decomposed)
 								  (tibetan-compose-region (point-min) (point-max))
 								  (setq tibetan-decomposed nil))
 								;;;###autoload
 								(defun tibetan-post-read-conversion (len)
 								  (save-excursion
 								    (save-restriction
 								      (let ((buffer-modified-p (buffer-modified-p)))
 									(narrow-to-region (point) (+ (point) len))
 									(tibetan-compose-region (point-min) (point-max))
 									(set-buffer-modified-p buffer-modified-p)
-												(tibetan-post-read-conversion): Return
the length of converted region.

											
										
										
											1997-10-21 10:45:26 +00:00
+									(make-local-variable 'tibetan-decomposed)
 									(setq tibetan-decomposed nil)
 									(- (point-max) (point-min))))))
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
 								;;;###autoload
 								(defun tibetan-pre-write-conversion (from to)
 								  (setq tibetan-decomposed-temp tibetan-decomposed)
-												(tibetan-pre-write-conversion): Use with-temp-buffer.

											
										
										
											1998-10-21 11:50:56 +00:00
+								  (let ((old-buf (current-buffer)))
-												(tibetan-pre-write-conversion): Cancel previous
change, use generate-new-buffer instead of get-buffer-create.

											
										
										
											1998-10-26 08:00:11 +00:00
+								    (set-buffer (generate-new-buffer " *temp*"))
 								    (if (stringp from)
 									(insert from)
 								      (insert-buffer-substring old-buf from to))
 								    (if (not tibetan-decomposed-temp)
 									(tibetan-decompose-region (point-min) (point-max)))
-												(tibetan-pre-write-conversion): Make it work
for the case the arg FROM is a string.

											
										
										
											1997-05-12 07:00:26 +00:00
+								    ;; Should return nil as annotations.
 								    nil))
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
-												(tibetan-canonicalize-for-unicode-alist)
(tibetan-canonicalize-for-unicode-regexp): New variables.
(tibetan-canonicalize-for-unicode-region): New function.
(tibetan-strict-unicode): New variable.
(tibetan-pre-write-canonicalize-for-unicode): New function.

											
										
										
											2002-05-08 01:50:08 +00:00
 								;;;
 								;;; Unicode-related definitions.
-												Trailing whitepace deleted.

											
										
										
											2003-02-04 13:24:35 +00:00
+								;;;
-												(tibetan-canonicalize-for-unicode-alist)
(tibetan-canonicalize-for-unicode-regexp): New variables.
(tibetan-canonicalize-for-unicode-region): New function.
(tibetan-strict-unicode): New variable.
(tibetan-pre-write-canonicalize-for-unicode): New function.

											
										
										
											2002-05-08 01:50:08 +00:00
 								(defvar tibetan-canonicalize-for-unicode-alist
-												Use UTF-8 for most files with non-ASCII characters.

* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit.  Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.

Fixes: debbugs:13936

											
										
										
											2013-04-01 18:18:40 -07:00
+								  '(("<EFBFBD><EFBFBD><EFBFBD><EFBFBD>" . "")	;; remove vowel a
 								    ("ཱི" . "ཱི") ;; decompose vowels whose use is ``discouraged'' in Unicode 3.0
 								    ("ཱུ" . "ཱུ")
 								    ("ྲྀ" . "ྲྀ")
 								    ("ཷ" . "ྲཱྀ")
 								    ("ླྀ" . "ླྀ")
 								    ("ཹ" . "ླཱྀ")
 								    ("ཱྀ" . "ཱྀ"))
-												(tibetan-canonicalize-for-unicode-alist)
(tibetan-canonicalize-for-unicode-regexp): New variables.
(tibetan-canonicalize-for-unicode-region): New function.
(tibetan-strict-unicode): New variable.
(tibetan-pre-write-canonicalize-for-unicode): New function.

											
										
										
											2002-05-08 01:50:08 +00:00
+								  "Rules for canonicalizing Tibetan vowels for Unicode.")
 								(defvar tibetan-canonicalize-for-unicode-regexp
-												Use UTF-8 for most files with non-ASCII characters.

* admin/notes/unicode (etc/tutorials/TUTORIAL.ko, leim/quail/hanja.el)
(leim/quail/hanja3.el, leim/quail/symbol-ksc.el):
Now utf-8, not iso-2022-7bit.  Also, files that contain non-UTF-8
characters are now encoded in utf-8-emacs, not iso-2022-7bit.
* etc/tutorials/TUTORIAL.ko, tutorials/TUTORIAL.th:
Switch from iso-2022-7bit to utf-8.
* leim/quail/cyrillic.el, leim/quail/czech.el, leim/quail/ethiopic.el:
* leim/quail/greek.el, leim/quail/hanja.el, leim/quail/hanja3.el:
* leim/quail/hebrew.el, leim/quail/lao.el, leim/quail/lrt.el:
* leim/quail/slovak.el, leim/quail/symbol-ksc.el, leim/quail/thai.el:
* leim/quail/tibetan.el, leim/quail/viqr.el, leim/quail/vntelex.el:
* leim/quail/vnvni.el, leim/quail/welsh.el:
* lisp/international/latin1-disp.el, lisp/international/mule-util.el:
* lisp/language/cyril-util.el, lisp/language/european.el:
* lisp/language/ind-util.el, lisp/language/lao-util.el, lisp/language/thai.el:
* lisp/language/tibet-util.el, lisp/language/tibetan.el:
* lisp/language/viet-util.el:
Switch from iso-2022-7bit to utf-8 or (if needed) utf-8-emacs.

Fixes: debbugs:13936

											
										
										
											2013-04-01 18:18:40 -07:00
+								  "[<5B><><EFBFBD><EFBFBD>ཱཱིུྲྀཷླྀཹཱྀ]"
-												(tibetan-canonicalize-for-unicode-alist)
(tibetan-canonicalize-for-unicode-regexp): New variables.
(tibetan-canonicalize-for-unicode-region): New function.
(tibetan-strict-unicode): New variable.
(tibetan-pre-write-canonicalize-for-unicode): New function.

											
										
										
											2002-05-08 01:50:08 +00:00
+								  "Regexp for Tibetan vowels to be canonicalized in Unicode.")
 								(defun tibetan-canonicalize-for-unicode-region (from to)
 								  (save-restriction
 								    (narrow-to-region from to)
 								    (goto-char from)
 								    (while (re-search-forward tibetan-canonicalize-for-unicode-regexp nil t)
 								      (let (
 									    ;;(from (match-beginning 0))
 									    ;;(to (match-end 0))
 									    (canonical-form
 									     (cdr (assoc (match-string 0)
 											 tibetan-canonicalize-for-unicode-alist))))
 									;;(goto-char from)
 									;;(delete-region from to)
 									;;(insert canonical-form)
 									(replace-match canonical-form)
 									))))
 								(defvar tibetan-strict-unicode t
-												Remove * characters from the front of variable docstrings.

											
										
										
											2012-04-09 21:05:48 +08:00
+								  "Flag to control Tibetan canonicalizing for Unicode.
-												(tibetan-canonicalize-for-unicode-alist)
(tibetan-canonicalize-for-unicode-regexp): New variables.
(tibetan-canonicalize-for-unicode-region): New function.
(tibetan-strict-unicode): New variable.
(tibetan-pre-write-canonicalize-for-unicode): New function.

											
										
										
											2002-05-08 01:50:08 +00:00
 								If non-nil, the vowel a is removed and composite vowels are decomposed
 								before writing buffer in Unicode.  See also
 								`tibetan-canonicalize-for-unicode-regexp' and
 								`tibetan-canonicalize-for-unicode-alist'.")
 								;;;###autoload
 								(defun tibetan-pre-write-canonicalize-for-unicode (from to)
 								  (let ((old-buf (current-buffer))
 									(strict-unicode tibetan-strict-unicode))
 								    (set-buffer (generate-new-buffer " *temp*"))
 								    (if (stringp from)
 									(insert from)
 								      (insert-buffer-substring old-buf from to))
 								    (if strict-unicode
 									(tibetan-canonicalize-for-unicode-region (point-min) (point-max)))
 								    ;; Should return nil as annotations.
 								    nil))
-												Provide XXX-util instead of
language/XXX-util.  Delete local variable declartion.
Delete the code of calling register-input-method form
all files under this directory.

											
										
										
											1997-06-18 13:06:17 +00:00
+								(provide 'tibet-util)
-												Initial revision

											
										
										
											1997-04-05 02:44:02 +00:00
-												Some fixes to follow coding conventions.

											
										
										
											2001-07-16 12:23:00 +00:00
+								;;; tibet-util.el ends here