From 158cf528c4aa690be84f9d1c3bc96b750e8b37b3 Mon Sep 17 00:00:00 2001 From: Stephane Zermatten Date: Mon, 31 Mar 2025 16:41:08 +0300 Subject: [PATCH] Fix term.el bug with very short multibyte character chunk Before this change, a chunk containing only a part of a multibyte character would be discarded and displayed undecoded on the terminal. * lisp/term.el (term-emulate-terminal): Fix handling chunks with part of a multibyte character. (Bug#77410) * test/lisp/term-tests.el (term-decode-partial) (term-undecodable-input): Fix and enhance tests. Copyright-paperwork-exempt: yes --- lisp/term.el | 2 +- test/lisp/term-tests.el | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/lisp/term.el b/lisp/term.el index 862103d88e6..a971300c055 100644 --- a/lisp/term.el +++ b/lisp/term.el @@ -3116,7 +3116,7 @@ See `term-prompt-regexp'." (- count 1 partial))) 'eight-bit)) (incf partial)) - (when (> count partial 0) + (when (> partial 0) (setq term-terminal-undecoded-bytes (substring decoded-substring (- partial))) (setq decoded-substring diff --git a/test/lisp/term-tests.el b/test/lisp/term-tests.el index 5ef8c1174df..ffb341f3b52 100644 --- a/test/lisp/term-tests.el +++ b/test/lisp/term-tests.el @@ -402,17 +402,18 @@ This is a reduced example from GNU nano's initial screen." (ert-deftest term-decode-partial () ;; Bug#25288. "Test multibyte characters sent into multiple chunks." ;; Set `locale-coding-system' so test will be deterministic. - (let* ((locale-coding-system 'utf-8-unix) - (string (make-string 7 ?ш)) - (bytes (encode-coding-string string locale-coding-system))) - (should (equal string - (term-test-screen-from-input - 40 1 `(,(substring bytes 0 (/ (length bytes) 2)) - ,(substring bytes (/ (length bytes) 2)))))))) + (let ((locale-coding-system 'utf-8-unix)) + (should (equal "шшш" (term-test-screen-from-input + 40 1 '("\321" "\210\321\210\321\210")))) + (should (equal "шшш" (term-test-screen-from-input + 40 1 '("\321\210\321" "\210\321\210")))) + (should (equal "шшш" (term-test-screen-from-input + 40 1 '("\321\210\321\210\321" "\210")))))) + (ert-deftest term-undecodable-input () ;; Bug#29918. "Undecodable bytes should be passed through without error." (let* ((locale-coding-system 'utf-8-unix) ; As above. - (bytes "\376\340\360\370") + (bytes "\376\340\360\370.") (string (decode-coding-string bytes locale-coding-system))) (should (equal string (term-test-screen-from-input