python.el: Handle file encoding for shell.

* lisp/progmodes/python.el (python-rx-constituents): Add coding-cookie.
(python-shell--save-temp-file): Write file with proper encoding.
(python-shell-buffer-substring): Add coding cookie for detected
encoding to generated content.  Fix blank lines when removing
if-name-main block.
(python-shell-send-file): Handle file encoding.
(python-info-encoding-from-cookie)
(python-info-encoding): New functions.

* test/automated/python-tests.el (python-shell-buffer-substring-1)
(python-shell-buffer-substring-2, python-shell-buffer-substring-3)
(python-shell-buffer-substring-4, python-shell-buffer-substring-5)
(python-shell-buffer-substring-6, python-shell-buffer-substring-7)
(python-shell-buffer-substring-8)
(python-info-encoding-from-cookie-1)
(python-info-encoding-from-cookie-2)
(python-info-encoding-from-cookie-3)
(python-info-encoding-from-cookie-4)
(python-info-encoding-from-cookie-5)
(python-info-encoding-from-cookie-6)
(python-info-encoding-from-cookie-7, python-info-encoding-1)
(python-info-encoding-2): New tests.
This commit is contained in:
Fabián Ezequiel Gallina 2014-12-27 01:30:21 -03:00
parent 7aa506eed8
commit 2dd5163d76
4 changed files with 385 additions and 25 deletions

View file

@ -1,3 +1,16 @@
2014-12-27 Fabián Ezequiel Gallina <fgallina@gnu.org>
python.el: Handle file encoding for shell.
* progmodes/python.el (python-rx-constituents): Add coding-cookie.
(python-shell--save-temp-file): Write file with proper encoding.
(python-shell-buffer-substring): Add coding cookie for detected
encoding to generated content. Fix blank lines when removing
if-name-main block.
(python-shell-send-file): Handle file encoding.
(python-info-encoding-from-cookie)
(python-info-encoding): New functions.
2014-12-24 Michael Albinus <michael.albinus@gmx.de> 2014-12-24 Michael Albinus <michael.albinus@gmx.de>
* net/tramp-sh.el (tramp-do-copy-or-rename-file-out-of-band): * net/tramp-sh.el (tramp-do-copy-or-rename-file-out-of-band):

View file

@ -386,7 +386,18 @@
(* ?\\ ?\\) (any ?\' ?\"))) (* ?\\ ?\\) (any ?\' ?\")))
(* ?\\ ?\\) (* ?\\ ?\\)
;; Match single or triple quotes of any kind. ;; Match single or triple quotes of any kind.
(group (or "\"" "\"\"\"" "'" "'''")))))) (group (or "\"" "\"\"\"" "'" "'''")))))
(coding-cookie . ,(rx line-start ?# (* space)
(or
;; # coding=<encoding name>
(: "coding" (or ?: ?=) (* space) (group-n 1 (+ (or word ?-))))
;; # -*- coding: <encoding name> -*-
(: "-*-" (* space) "coding:" (* space)
(group-n 1 (+ (or word ?-))) (* space) "-*-")
;; # vim: set fileencoding=<encoding name> :
(: "vim:" (* space) "set" (+ space)
"fileencoding" (* space) ?= (* space)
(group-n 1 (+ (or word ?-))) (* space) ":")))))
"Additional Python specific sexps for `python-rx'") "Additional Python specific sexps for `python-rx'")
(defmacro python-rx (&rest regexps) (defmacro python-rx (&rest regexps)
@ -2400,11 +2411,7 @@ there for compatibility with CEDET.")
(concat (file-remote-p default-directory) "/tmp") (concat (file-remote-p default-directory) "/tmp")
temporary-file-directory)) temporary-file-directory))
(temp-file-name (make-temp-file "py")) (temp-file-name (make-temp-file "py"))
;; XXX: Python's built-in compile function accepts utf-8 as (coding-system-for-write (python-info-encoding)))
;; input so there's no need to enforce a coding cookie. In
;; the future making `coding-system-for-write' match the
;; current buffer's coding may be a good idea.
(coding-system-for-write 'utf-8))
(with-temp-file temp-file-name (with-temp-file temp-file-name
(insert string) (insert string)
(delete-trailing-whitespace)) (delete-trailing-whitespace))
@ -2511,16 +2518,28 @@ the python shell:
\"if __name__ == '__main__'\" block will be removed. \"if __name__ == '__main__'\" block will be removed.
2. When a subregion of the buffer is sent, it takes care of 2. When a subregion of the buffer is sent, it takes care of
appending extra empty lines so tracebacks are correct. appending extra empty lines so tracebacks are correct.
3. Wraps indented regions under an \"if True:\" block so the 3. When the region sent is a substring of the current buffer, a
coding cookie is added.
4. Wraps indented regions under an \"if True:\" block so the
interpreter evaluates them correctly." interpreter evaluates them correctly."
(let ((substring (buffer-substring-no-properties start end)) (let* ((substring (buffer-substring-no-properties start end))
(fillstr (make-string (1- (line-number-at-pos start)) ?\n)) (buffer-substring-p (save-restriction
(toplevel-block-p (save-excursion (widen)
(goto-char start) (not (equal (list (point-min) (point-max))
(or (zerop (line-number-at-pos start)) (list start end)))))
(progn (encoding (python-info-encoding))
(python-util-forward-comment 1) (fillstr (concat
(zerop (current-indentation))))))) (when buffer-substring-p
(format "# -*- coding: %s -*-\n" encoding))
(make-string
(- (line-number-at-pos start)
(if buffer-substring-p 2 1)) ?\n)))
(toplevel-block-p (save-excursion
(goto-char start)
(or (zerop (line-number-at-pos start))
(progn
(python-util-forward-comment 1)
(zerop (current-indentation)))))))
(with-temp-buffer (with-temp-buffer
(python-mode) (python-mode)
(if fillstr (insert fillstr)) (if fillstr (insert fillstr))
@ -2536,17 +2555,26 @@ the python shell:
(when (python-nav-if-name-main) (when (python-nav-if-name-main)
(cons (point) (cons (point)
(progn (python-nav-forward-sexp-safe) (progn (python-nav-forward-sexp-safe)
;; Include ending newline
(forward-line 1)
(point))))))) (point)))))))
;; Oh destructuring bind, how I miss you. ;; Oh destructuring bind, how I miss you.
(if-name-main-start (car if-name-main-start-end)) (if-name-main-start (car if-name-main-start-end))
(if-name-main-end (cdr if-name-main-start-end))) (if-name-main-end (cdr if-name-main-start-end))
(fillstr (make-string
(- (line-number-at-pos if-name-main-end)
(line-number-at-pos if-name-main-start)) ?\n)))
(when if-name-main-start-end (when if-name-main-start-end
(goto-char if-name-main-start) (goto-char if-name-main-start)
(delete-region if-name-main-start if-name-main-end) (delete-region if-name-main-start if-name-main-end)
(insert (insert fillstr))))
(make-string ;; Ensure there's only one coding cookie in the generated string.
(- (line-number-at-pos if-name-main-end) (goto-char (point-min))
(line-number-at-pos if-name-main-start)) ?\n))))) (when (looking-at-p (python-rx coding-cookie))
(forward-line 1)
(when (looking-at-p (python-rx coding-cookie))
(delete-region
(line-beginning-position) (line-end-position))))
(buffer-substring-no-properties (point-min) (point-max))))) (buffer-substring-no-properties (point-min) (point-max)))))
(defun python-shell-send-region (start end &optional nomain) (defun python-shell-send-region (start end &optional nomain)
@ -2604,15 +2632,21 @@ If DELETE is non-nil, delete the file afterwards."
(expand-file-name (expand-file-name
(or (file-remote-p file-name 'localname) (or (file-remote-p file-name 'localname)
file-name))) file-name)))
temp-file-name))) temp-file-name))
(encoding
(with-temp-buffer
(insert-file-contents
(or temp-file-name file-name))
(python-info-encoding))))
(when (not file-name) (when (not file-name)
(error "If FILE-NAME is nil then TEMP-FILE-NAME must be non-nil")) (error "If FILE-NAME is nil then TEMP-FILE-NAME must be non-nil"))
(python-shell-send-string (python-shell-send-string
(format (format
(concat "__pyfile = open('''%s''');" (concat
"exec(compile(__pyfile.read(), '''%s''', 'exec'));" "import codecs; __pyfile = codecs.open('''%s''', encoding='''%s''');"
"__pyfile.close()%s") "exec(compile(__pyfile.read().encode('''%s'''), '''%s''', 'exec'));"
(or temp-file-name file-name) file-name "__pyfile.close()%s")
(or temp-file-name file-name) encoding encoding file-name
(if delete (format "; import os; os.remove('''%s''')" (if delete (format "; import os; os.remove('''%s''')"
(or temp-file-name file-name)) (or temp-file-name file-name))
"")) ""))
@ -3912,6 +3946,32 @@ operator."
(* whitespace) line-end)) (* whitespace) line-end))
(string-equal "" (match-string-no-properties 1)))) (string-equal "" (match-string-no-properties 1))))
(defun python-info-encoding-from-cookie ()
"Detect current buffer's encoding from its coding cookie.
Returns the enconding as a symbol."
(let ((first-two-lines
(save-excursion
(save-restriction
(widen)
(goto-char (point-min))
(forward-line 2)
(buffer-substring-no-properties
(point)
(point-min))))))
(when (string-match (python-rx coding-cookie) first-two-lines)
(intern (match-string-no-properties 1 first-two-lines)))))
(defun python-info-encoding ()
"Return encoding for file.
Try `python-info-encoding-from-cookie', if none is found then
default to utf-8."
;; If no enconding is defined, then it's safe to use UTF-8: Python 2
;; uses ASCII as default while Python 3 uses UTF-8. This means that
;; in the worst case escenario python.el will make things work for
;; Python 2 files with unicode data and no encoding defined.
(or (python-info-encoding-from-cookie)
'utf-8))
;;; Utility functions ;;; Utility functions

View file

@ -1,3 +1,19 @@
2014-12-27 Fabián Ezequiel Gallina <fgallina@gnu.org>
* automated/python-tests.el (python-shell-buffer-substring-1)
(python-shell-buffer-substring-2, python-shell-buffer-substring-3)
(python-shell-buffer-substring-4, python-shell-buffer-substring-5)
(python-shell-buffer-substring-6, python-shell-buffer-substring-7)
(python-shell-buffer-substring-8)
(python-info-encoding-from-cookie-1)
(python-info-encoding-from-cookie-2)
(python-info-encoding-from-cookie-3)
(python-info-encoding-from-cookie-4)
(python-info-encoding-from-cookie-5)
(python-info-encoding-from-cookie-6)
(python-info-encoding-from-cookie-7, python-info-encoding-1)
(python-info-encoding-2): New tests.
2014-12-25 Michael Albinus <michael.albinus@gmx.de> 2014-12-25 Michael Albinus <michael.albinus@gmx.de>
* automated/tramp-tests.el (tramp-test17-insert-directory): Do not * automated/tramp-tests.el (tramp-test17-insert-directory): Do not

View file

@ -2459,6 +2459,198 @@ and `python-shell-interpreter-args' in the new shell buffer."
"^\\(o\\.t \\|\\)"))) "^\\(o\\.t \\|\\)")))
(ignore-errors (delete-file startup-file))))) (ignore-errors (delete-file startup-file)))))
(ert-deftest python-shell-buffer-substring-1 ()
"Selecting a substring of the whole buffer must match its contents."
(python-tests-with-temp-buffer
"
class Foo(models.Model):
pass
class Bar(models.Model):
pass
"
(should (string= (buffer-string)
(python-shell-buffer-substring (point-min) (point-max))))))
(ert-deftest python-shell-buffer-substring-2 ()
"Main block should be removed if NOMAIN is non-nil."
(python-tests-with-temp-buffer
"
class Foo(models.Model):
pass
class Bar(models.Model):
pass
if __name__ == \"__main__\":
foo = Foo()
print (foo)
"
(should (string= (python-shell-buffer-substring (point-min) (point-max) t)
"
class Foo(models.Model):
pass
class Bar(models.Model):
pass
"))))
(ert-deftest python-shell-buffer-substring-3 ()
"Main block should be removed if NOMAIN is non-nil."
(python-tests-with-temp-buffer
"
class Foo(models.Model):
pass
if __name__ == \"__main__\":
foo = Foo()
print (foo)
class Bar(models.Model):
pass
"
(should (string= (python-shell-buffer-substring (point-min) (point-max) t)
"
class Foo(models.Model):
pass
class Bar(models.Model):
pass
"))))
(ert-deftest python-shell-buffer-substring-4 ()
"Coding cookie should be added for substrings."
(python-tests-with-temp-buffer
"# coding: latin-1
class Foo(models.Model):
pass
if __name__ == \"__main__\":
foo = Foo()
print (foo)
class Bar(models.Model):
pass
"
(should (string= (python-shell-buffer-substring
(python-tests-look-at "class Foo(models.Model):")
(progn (python-nav-forward-sexp) (point)))
"# -*- coding: latin-1 -*-
class Foo(models.Model):
pass"))))
(ert-deftest python-shell-buffer-substring-5 ()
"The proper amount of blank lines is added for a substring."
(python-tests-with-temp-buffer
"# coding: latin-1
class Foo(models.Model):
pass
if __name__ == \"__main__\":
foo = Foo()
print (foo)
class Bar(models.Model):
pass
"
(should (string= (python-shell-buffer-substring
(python-tests-look-at "class Bar(models.Model):")
(progn (python-nav-forward-sexp) (point)))
"# -*- coding: latin-1 -*-
class Bar(models.Model):
pass"))))
(ert-deftest python-shell-buffer-substring-6 ()
"Handle substring with coding cookie in the second line."
(python-tests-with-temp-buffer
"
# coding: latin-1
class Foo(models.Model):
pass
if __name__ == \"__main__\":
foo = Foo()
print (foo)
class Bar(models.Model):
pass
"
(should (string= (python-shell-buffer-substring
(python-tests-look-at "# coding: latin-1")
(python-tests-look-at "if __name__ == \"__main__\":"))
"# -*- coding: latin-1 -*-
class Foo(models.Model):
pass
"))))
(ert-deftest python-shell-buffer-substring-7 ()
"Ensure first coding cookie gets precedence."
(python-tests-with-temp-buffer
"# coding: utf-8
# coding: latin-1
class Foo(models.Model):
pass
if __name__ == \"__main__\":
foo = Foo()
print (foo)
class Bar(models.Model):
pass
"
(should (string= (python-shell-buffer-substring
(python-tests-look-at "# coding: latin-1")
(python-tests-look-at "if __name__ == \"__main__\":"))
"# -*- coding: utf-8 -*-
class Foo(models.Model):
pass
"))))
(ert-deftest python-shell-buffer-substring-8 ()
"Ensure first coding cookie gets precedence when sending whole buffer."
(python-tests-with-temp-buffer
"# coding: utf-8
# coding: latin-1
class Foo(models.Model):
pass
"
(should (string= (python-shell-buffer-substring (point-min) (point-max))
"# coding: utf-8
class Foo(models.Model):
pass
"))))
;;; Shell completion ;;; Shell completion
@ -3773,6 +3965,85 @@ foo = True # another comment
(forward-line 1) (forward-line 1)
(should (python-info-current-line-empty-p)))) (should (python-info-current-line-empty-p))))
(ert-deftest python-info-encoding-from-cookie-1 ()
"Should detect it on first line."
(python-tests-with-temp-buffer
"# coding=latin-1
foo = True # another comment
"
(should (eq (python-info-encoding-from-cookie) 'latin-1))))
(ert-deftest python-info-encoding-from-cookie-2 ()
"Should detect it on second line."
(python-tests-with-temp-buffer
"
# coding=latin-1
foo = True # another comment
"
(should (eq (python-info-encoding-from-cookie) 'latin-1))))
(ert-deftest python-info-encoding-from-cookie-3 ()
"Should not be detected on third line (and following ones)."
(python-tests-with-temp-buffer
"
# coding=latin-1
foo = True # another comment
"
(should (not (python-info-encoding-from-cookie)))))
(ert-deftest python-info-encoding-from-cookie-4 ()
"Should detect Emacs style."
(python-tests-with-temp-buffer
"# -*- coding: latin-1 -*-
foo = True # another comment"
(should (eq (python-info-encoding-from-cookie) 'latin-1))))
(ert-deftest python-info-encoding-from-cookie-5 ()
"Should detect Vim style."
(python-tests-with-temp-buffer
"# vim: set fileencoding=latin-1 :
foo = True # another comment"
(should (eq (python-info-encoding-from-cookie) 'latin-1))))
(ert-deftest python-info-encoding-from-cookie-6 ()
"First cookie wins."
(python-tests-with-temp-buffer
"# -*- coding: iso-8859-1 -*-
# vim: set fileencoding=latin-1 :
foo = True # another comment"
(should (eq (python-info-encoding-from-cookie) 'iso-8859-1))))
(ert-deftest python-info-encoding-from-cookie-7 ()
"First cookie wins."
(python-tests-with-temp-buffer
"# vim: set fileencoding=latin-1 :
# -*- coding: iso-8859-1 -*-
foo = True # another comment"
(should (eq (python-info-encoding-from-cookie) 'latin-1))))
(ert-deftest python-info-encoding-1 ()
"Should return the detected encoding from cookie."
(python-tests-with-temp-buffer
"# vim: set fileencoding=latin-1 :
foo = True # another comment"
(should (eq (python-info-encoding) 'latin-1))))
(ert-deftest python-info-encoding-2 ()
"Should default to utf-8."
(python-tests-with-temp-buffer
"# No encoding for you
foo = True # another comment"
(should (eq (python-info-encoding) 'utf-8))))
;;; Utility functions ;;; Utility functions