Optimize skkdic conversion (Bug#28043)
The primary speedup comes from the optimizing lookup-nested-alist and set-nested-alist for the case where the key is a string. This brings the time down to less than half the original. * lisp/international/mule-util.el (lookup-nested-alist) (set-nested-alist): Use `assq' instead of `assoc' when KEYSEQ is a string. * lisp/international/ja-dic-cnv.el (skkdic-collect-okuri-nasi) (skkdic-convert-okuri-nasi): Use progress-reporter functions instead of calculating ratio of work done inline. (skkdic-reduced-candidates): Call `char-category-set' on the first character of the string directly, instead of using a regexp for the character category. (skkdic--japanese-category-set): New constant. (skkdic-collect-okuri-nasi): Just set `skkdic-okuri-nasi-entries-count' at once at the end rather than updating it throughout the loop. (skkdic-convert-postfix skkdic-convert-prefix) skkdic-get-candidate-list, skkdic-collect-okuri-nasi) (skkdic-extract-conversion-data): Use `match-string-no-properties' instead of `match-string'.
This commit is contained in:
parent
ba0bb332dd
commit
9d7973530f
2 changed files with 83 additions and 55 deletions
|
@ -125,10 +125,10 @@
|
|||
|
||||
;; Search postfix entries.
|
||||
(while (re-search-forward "^[#<>?]\\(\\(\\cH\\|$B!<(B\\)+\\) " nil t)
|
||||
(let ((kana (match-string 1))
|
||||
(let ((kana (match-string-no-properties 1))
|
||||
str candidates)
|
||||
(while (looking-at "/[#0-9 ]*\\([^/\n]*\\)/")
|
||||
(setq str (match-string 1))
|
||||
(setq str (match-string-no-properties 1))
|
||||
(if (not (member str candidates))
|
||||
(setq candidates (cons str candidates)))
|
||||
(goto-char (match-end 1)))
|
||||
|
@ -158,10 +158,10 @@
|
|||
"(skkdic-set-prefix\n"))
|
||||
(save-excursion
|
||||
(while (re-search-forward "^\\(\\(\\cH\\|$B!<(B\\)+\\)[<>?] " nil t)
|
||||
(let ((kana (match-string 1))
|
||||
(let ((kana (match-string-no-properties 1))
|
||||
str candidates)
|
||||
(while (looking-at "/\\([^/\n]+\\)/")
|
||||
(setq str (match-string 1))
|
||||
(setq str (match-string-no-properties 1))
|
||||
(if (not (member str candidates))
|
||||
(setq candidates (cons str candidates)))
|
||||
(goto-char (match-end 1)))
|
||||
|
@ -180,8 +180,8 @@
|
|||
(let (candidates)
|
||||
(goto-char from)
|
||||
(while (re-search-forward "/[^/ \n]+" to t)
|
||||
(setq candidates (cons (buffer-substring (1+ (match-beginning 0))
|
||||
(match-end 0))
|
||||
(setq candidates (cons (buffer-substring-no-properties
|
||||
(1+ (match-beginning 0)) (match-end 0))
|
||||
candidates)))
|
||||
candidates))
|
||||
|
||||
|
@ -251,12 +251,16 @@
|
|||
;; Return list of candidates which excludes some from CANDIDATES.
|
||||
;; Excluded candidates can be derived from another entry.
|
||||
|
||||
(defconst skkdic--japanese-category-set (make-category-set "j"))
|
||||
|
||||
(defun skkdic-reduced-candidates (skkbuf kana candidates)
|
||||
(let (elt l)
|
||||
(while candidates
|
||||
(setq elt (car candidates))
|
||||
(if (or (= (length elt) 1)
|
||||
(and (string-match "^\\cj" elt)
|
||||
(and (bool-vector-subsetp
|
||||
skkdic--japanese-category-set
|
||||
(char-category-set (aref elt 0)))
|
||||
(not (skkdic-breakup-string skkbuf kana elt 0 (length elt)
|
||||
'first))))
|
||||
(setq l (cons elt l)))
|
||||
|
@ -267,24 +271,18 @@
|
|||
(defvar skkdic-okuri-nasi-entries-count 0)
|
||||
|
||||
(defun skkdic-collect-okuri-nasi ()
|
||||
(message "Collecting OKURI-NASI entries ...")
|
||||
(save-excursion
|
||||
(let ((prev-ratio 0)
|
||||
ratio)
|
||||
(let ((progress (make-progress-reporter "Collecting OKURI-NASI entries"
|
||||
(point) (point-max)
|
||||
nil 10)))
|
||||
(while (re-search-forward "^\\(\\(\\cH\\|$B!<(B\\)+\\) \\(/\\cj.*\\)/$"
|
||||
nil t)
|
||||
(let ((kana (match-string 1))
|
||||
(let ((kana (match-string-no-properties 1))
|
||||
(candidates (skkdic-get-candidate-list (match-beginning 3)
|
||||
(match-end 3))))
|
||||
(setq skkdic-okuri-nasi-entries
|
||||
(cons (cons kana candidates) skkdic-okuri-nasi-entries)
|
||||
skkdic-okuri-nasi-entries-count
|
||||
(1+ skkdic-okuri-nasi-entries-count))
|
||||
(setq ratio (floor (* (point) 100.0) (point-max)))
|
||||
(if (/= (/ prev-ratio 10) (/ ratio 10))
|
||||
(progn
|
||||
(message "collected %2d%% ..." ratio)
|
||||
(setq prev-ratio ratio)))
|
||||
(cons (cons kana candidates) skkdic-okuri-nasi-entries))
|
||||
(progress-reporter-update progress (point))
|
||||
(while candidates
|
||||
(let ((entry (lookup-nested-alist (car candidates)
|
||||
skkdic-word-list nil nil t)))
|
||||
|
@ -292,26 +290,24 @@
|
|||
(setcar entry (cons kana (car entry)))
|
||||
(set-nested-alist (car candidates) (list kana)
|
||||
skkdic-word-list)))
|
||||
(setq candidates (cdr candidates))))))))
|
||||
(setq candidates (cdr candidates)))))
|
||||
(setq skkdic-okuri-nasi-entries-count (length skkdic-okuri-nasi-entries))
|
||||
(progress-reporter-done progress))))
|
||||
|
||||
(defun skkdic-convert-okuri-nasi (skkbuf buf)
|
||||
(message "Processing OKURI-NASI entries ...")
|
||||
(with-current-buffer buf
|
||||
(insert ";; Setting okuri-nasi entries.\n"
|
||||
"(skkdic-set-okuri-nasi\n")
|
||||
(let ((l (nreverse skkdic-okuri-nasi-entries))
|
||||
(count 0)
|
||||
(prev-ratio 0)
|
||||
ratio)
|
||||
(progress (make-progress-reporter "Processing OKURI-NASI entries"
|
||||
0 skkdic-okuri-nasi-entries-count
|
||||
nil 10))
|
||||
(count 0))
|
||||
(while l
|
||||
(let ((kana (car (car l)))
|
||||
(candidates (cdr (car l))))
|
||||
(setq ratio (floor (* count 100.0) skkdic-okuri-nasi-entries-count)
|
||||
count (1+ count))
|
||||
(if (/= (/ prev-ratio 10) (/ ratio 10))
|
||||
(progn
|
||||
(message "processed %2d%% ..." ratio)
|
||||
(setq prev-ratio ratio)))
|
||||
(setq count (1+ count))
|
||||
(progress-reporter-update progress count)
|
||||
(if (setq candidates
|
||||
(skkdic-reduced-candidates skkbuf kana candidates))
|
||||
(progn
|
||||
|
@ -320,7 +316,8 @@
|
|||
(insert " " (car candidates))
|
||||
(setq candidates (cdr candidates)))
|
||||
(insert "\"\n"))))
|
||||
(setq l (cdr l))))
|
||||
(setq l (cdr l)))
|
||||
(progress-reporter-done progress))
|
||||
(insert ")\n\n")))
|
||||
|
||||
(defun skkdic-convert (filename &optional dirname)
|
||||
|
@ -467,7 +464,7 @@ To get complete usage, invoke:
|
|||
(i (match-end 0))
|
||||
candidates)
|
||||
(while (string-match "[^ ]+" entry i)
|
||||
(setq candidates (cons (match-string 0 entry) candidates))
|
||||
(setq candidates (cons (match-string-no-properties 0 entry) candidates))
|
||||
(setq i (match-end 0)))
|
||||
(cons (skkdic-get-kana-compact-codes kana) candidates)))
|
||||
|
||||
|
|
|
@ -143,20 +143,43 @@ longer than KEYSEQ.
|
|||
See the documentation of `nested-alist-p' for more detail."
|
||||
(or (nested-alist-p alist)
|
||||
(error "Invalid argument %s" alist))
|
||||
(let ((islist (listp keyseq))
|
||||
(len (or len (length keyseq)))
|
||||
(i 0)
|
||||
key-elt slot)
|
||||
(while (< i len)
|
||||
(if (null (nested-alist-p alist))
|
||||
(error "Keyseq %s is too long for this nested alist" keyseq))
|
||||
(setq key-elt (if islist (nth i keyseq) (aref keyseq i)))
|
||||
(setq slot (assoc key-elt (cdr alist)))
|
||||
(unless slot
|
||||
(setq slot (cons key-elt (list t)))
|
||||
(setcdr alist (cons slot (cdr alist))))
|
||||
(setq alist (cdr slot))
|
||||
(setq i (1+ i)))
|
||||
(let ((len (or len (length keyseq)))
|
||||
(i 0))
|
||||
(cond
|
||||
((stringp keyseq) ; We can use `assq' for characters.
|
||||
(while (< i len)
|
||||
(if (null (nested-alist-p alist))
|
||||
(error "Keyseq %s is too long for this nested alist" keyseq))
|
||||
(let* ((key-elt (aref keyseq i))
|
||||
(slot (assq key-elt (cdr alist))))
|
||||
(unless slot
|
||||
(setq slot (list key-elt t))
|
||||
(push slot (cdr alist)))
|
||||
(setq alist (cdr slot)))
|
||||
(setq i (1+ i))))
|
||||
((arrayp keyseq)
|
||||
(while (< i len)
|
||||
(if (null (nested-alist-p alist))
|
||||
(error "Keyseq %s is too long for this nested alist" keyseq))
|
||||
(let* ((key-elt (aref keyseq i))
|
||||
(slot (assoc key-elt (cdr alist))))
|
||||
(unless slot
|
||||
(setq slot (list key-elt t))
|
||||
(push slot (cdr alist)))
|
||||
(setq alist (cdr slot)))
|
||||
(setq i (1+ i))))
|
||||
((listp keyseq)
|
||||
(while (< i len)
|
||||
(if (null (nested-alist-p alist))
|
||||
(error "Keyseq %s is too long for this nested alist" keyseq))
|
||||
(let* ((key-elt (pop keyseq))
|
||||
(slot (assoc key-elt (cdr alist))))
|
||||
(unless slot
|
||||
(setq slot (list key-elt t))
|
||||
(push slot (cdr alist)))
|
||||
(setq alist (cdr slot)))
|
||||
(setq i (1+ i))))
|
||||
(t (signal 'wrong-type-argument (list keyseq))))
|
||||
(setcar alist entry)
|
||||
(if branches
|
||||
(setcdr (last alist) branches))))
|
||||
|
@ -179,15 +202,23 @@ Optional 5th argument NIL-FOR-TOO-LONG non-nil means return nil
|
|||
(setq len (length keyseq)))
|
||||
(let ((i (or start 0)))
|
||||
(if (catch 'lookup-nested-alist-tag
|
||||
(if (listp keyseq)
|
||||
(while (< i len)
|
||||
(if (setq alist (cdr (assoc (nth i keyseq) (cdr alist))))
|
||||
(setq i (1+ i))
|
||||
(throw 'lookup-nested-alist-tag t))))
|
||||
(while (< i len)
|
||||
(if (setq alist (cdr (assoc (aref keyseq i) (cdr alist))))
|
||||
(setq i (1+ i))
|
||||
(throw 'lookup-nested-alist-tag t))))
|
||||
(cond ((stringp keyseq) ; We can use `assq' for characters.
|
||||
(while (< i len)
|
||||
(if (setq alist (cdr (assq (aref keyseq i) (cdr alist))))
|
||||
(setq i (1+ i))
|
||||
(throw 'lookup-nested-alist-tag t))))
|
||||
((arrayp keyseq)
|
||||
(while (< i len)
|
||||
(if (setq alist (cdr (assoc (aref keyseq i) (cdr alist))))
|
||||
(setq i (1+ i))
|
||||
(throw 'lookup-nested-alist-tag t))))
|
||||
((listp keyseq)
|
||||
(setq keyseq (nthcdr i keyseq))
|
||||
(while (< i len)
|
||||
(if (setq alist (cdr (assoc (pop keyseq) (cdr alist))))
|
||||
(setq i (1+ i))
|
||||
(throw 'lookup-nested-alist-tag t))))
|
||||
(t (signal 'wrong-type-argument (list keyseq)))))
|
||||
;; KEYSEQ is too long.
|
||||
(if nil-for-too-long nil i)
|
||||
alist)))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue