python.el: Handle file encoding for shell.

* lisp/progmodes/python.el (python-rx-constituents): Add coding-cookie.
(python-shell--save-temp-file): Write file with proper encoding.
(python-shell-buffer-substring): Add coding cookie for detected
encoding to generated content.  Fix blank lines when removing
if-name-main block.
(python-shell-send-file): Handle file encoding.
(python-info-encoding-from-cookie)
(python-info-encoding): New functions.

* test/automated/python-tests.el (python-shell-buffer-substring-1)
(python-shell-buffer-substring-2, python-shell-buffer-substring-3)
(python-shell-buffer-substring-4, python-shell-buffer-substring-5)
(python-shell-buffer-substring-6, python-shell-buffer-substring-7)
(python-shell-buffer-substring-8)
(python-info-encoding-from-cookie-1)
(python-info-encoding-from-cookie-2)
(python-info-encoding-from-cookie-3)
(python-info-encoding-from-cookie-4)
(python-info-encoding-from-cookie-5)
(python-info-encoding-from-cookie-6)
(python-info-encoding-from-cookie-7, python-info-encoding-1)
(python-info-encoding-2): New tests.
This commit is contained in:
Fabián Ezequiel Gallina 2014-12-27 01:30:21 -03:00
parent 7aa506eed8
commit 2dd5163d76
4 changed files with 385 additions and 25 deletions

View file

@ -1,3 +1,16 @@
2014-12-27 Fabián Ezequiel Gallina <fgallina@gnu.org>
python.el: Handle file encoding for shell.
* progmodes/python.el (python-rx-constituents): Add coding-cookie.
(python-shell--save-temp-file): Write file with proper encoding.
(python-shell-buffer-substring): Add coding cookie for detected
encoding to generated content. Fix blank lines when removing
if-name-main block.
(python-shell-send-file): Handle file encoding.
(python-info-encoding-from-cookie)
(python-info-encoding): New functions.
2014-12-24 Michael Albinus <michael.albinus@gmx.de>
* net/tramp-sh.el (tramp-do-copy-or-rename-file-out-of-band):

View file

@ -386,7 +386,18 @@
(* ?\\ ?\\) (any ?\' ?\")))
(* ?\\ ?\\)
;; Match single or triple quotes of any kind.
(group (or "\"" "\"\"\"" "'" "'''"))))))
(group (or "\"" "\"\"\"" "'" "'''")))))
(coding-cookie . ,(rx line-start ?# (* space)
(or
;; # coding=<encoding name>
(: "coding" (or ?: ?=) (* space) (group-n 1 (+ (or word ?-))))
;; # -*- coding: <encoding name> -*-
(: "-*-" (* space) "coding:" (* space)
(group-n 1 (+ (or word ?-))) (* space) "-*-")
;; # vim: set fileencoding=<encoding name> :
(: "vim:" (* space) "set" (+ space)
"fileencoding" (* space) ?= (* space)
(group-n 1 (+ (or word ?-))) (* space) ":")))))
"Additional Python specific sexps for `python-rx'")
(defmacro python-rx (&rest regexps)
@ -2400,11 +2411,7 @@ there for compatibility with CEDET.")
(concat (file-remote-p default-directory) "/tmp")
temporary-file-directory))
(temp-file-name (make-temp-file "py"))
;; XXX: Python's built-in compile function accepts utf-8 as
;; input so there's no need to enforce a coding cookie. In
;; the future making `coding-system-for-write' match the
;; current buffer's coding may be a good idea.
(coding-system-for-write 'utf-8))
(coding-system-for-write (python-info-encoding)))
(with-temp-file temp-file-name
(insert string)
(delete-trailing-whitespace))
@ -2511,16 +2518,28 @@ the python shell:
\"if __name__ == '__main__'\" block will be removed.
2. When a subregion of the buffer is sent, it takes care of
appending extra empty lines so tracebacks are correct.
3. Wraps indented regions under an \"if True:\" block so the
3. When the region sent is a substring of the current buffer, a
coding cookie is added.
4. Wraps indented regions under an \"if True:\" block so the
interpreter evaluates them correctly."
(let ((substring (buffer-substring-no-properties start end))
(fillstr (make-string (1- (line-number-at-pos start)) ?\n))
(toplevel-block-p (save-excursion
(goto-char start)
(or (zerop (line-number-at-pos start))
(progn
(python-util-forward-comment 1)
(zerop (current-indentation)))))))
(let* ((substring (buffer-substring-no-properties start end))
(buffer-substring-p (save-restriction
(widen)
(not (equal (list (point-min) (point-max))
(list start end)))))
(encoding (python-info-encoding))
(fillstr (concat
(when buffer-substring-p
(format "# -*- coding: %s -*-\n" encoding))
(make-string
(- (line-number-at-pos start)
(if buffer-substring-p 2 1)) ?\n)))
(toplevel-block-p (save-excursion
(goto-char start)
(or (zerop (line-number-at-pos start))
(progn
(python-util-forward-comment 1)
(zerop (current-indentation)))))))
(with-temp-buffer
(python-mode)
(if fillstr (insert fillstr))
@ -2536,17 +2555,26 @@ the python shell:
(when (python-nav-if-name-main)
(cons (point)
(progn (python-nav-forward-sexp-safe)
;; Include ending newline
(forward-line 1)
(point)))))))
;; Oh destructuring bind, how I miss you.
(if-name-main-start (car if-name-main-start-end))
(if-name-main-end (cdr if-name-main-start-end)))
(if-name-main-end (cdr if-name-main-start-end))
(fillstr (make-string
(- (line-number-at-pos if-name-main-end)
(line-number-at-pos if-name-main-start)) ?\n)))
(when if-name-main-start-end
(goto-char if-name-main-start)
(delete-region if-name-main-start if-name-main-end)
(insert
(make-string
(- (line-number-at-pos if-name-main-end)
(line-number-at-pos if-name-main-start)) ?\n)))))
(insert fillstr))))
;; Ensure there's only one coding cookie in the generated string.
(goto-char (point-min))
(when (looking-at-p (python-rx coding-cookie))
(forward-line 1)
(when (looking-at-p (python-rx coding-cookie))
(delete-region
(line-beginning-position) (line-end-position))))
(buffer-substring-no-properties (point-min) (point-max)))))
(defun python-shell-send-region (start end &optional nomain)
@ -2604,15 +2632,21 @@ If DELETE is non-nil, delete the file afterwards."
(expand-file-name
(or (file-remote-p file-name 'localname)
file-name)))
temp-file-name)))
temp-file-name))
(encoding
(with-temp-buffer
(insert-file-contents
(or temp-file-name file-name))
(python-info-encoding))))
(when (not file-name)
(error "If FILE-NAME is nil then TEMP-FILE-NAME must be non-nil"))
(python-shell-send-string
(format
(concat "__pyfile = open('''%s''');"
"exec(compile(__pyfile.read(), '''%s''', 'exec'));"
"__pyfile.close()%s")
(or temp-file-name file-name) file-name
(concat
"import codecs; __pyfile = codecs.open('''%s''', encoding='''%s''');"
"exec(compile(__pyfile.read().encode('''%s'''), '''%s''', 'exec'));"
"__pyfile.close()%s")
(or temp-file-name file-name) encoding encoding file-name
(if delete (format "; import os; os.remove('''%s''')"
(or temp-file-name file-name))
""))
@ -3912,6 +3946,32 @@ operator."
(* whitespace) line-end))
(string-equal "" (match-string-no-properties 1))))
(defun python-info-encoding-from-cookie ()
"Detect current buffer's encoding from its coding cookie.
Returns the enconding as a symbol."
(let ((first-two-lines
(save-excursion
(save-restriction
(widen)
(goto-char (point-min))
(forward-line 2)
(buffer-substring-no-properties
(point)
(point-min))))))
(when (string-match (python-rx coding-cookie) first-two-lines)
(intern (match-string-no-properties 1 first-two-lines)))))
(defun python-info-encoding ()
"Return encoding for file.
Try `python-info-encoding-from-cookie', if none is found then
default to utf-8."
;; If no enconding is defined, then it's safe to use UTF-8: Python 2
;; uses ASCII as default while Python 3 uses UTF-8. This means that
;; in the worst case escenario python.el will make things work for
;; Python 2 files with unicode data and no encoding defined.
(or (python-info-encoding-from-cookie)
'utf-8))
;;; Utility functions

View file

@ -1,3 +1,19 @@
2014-12-27 Fabián Ezequiel Gallina <fgallina@gnu.org>
* automated/python-tests.el (python-shell-buffer-substring-1)
(python-shell-buffer-substring-2, python-shell-buffer-substring-3)
(python-shell-buffer-substring-4, python-shell-buffer-substring-5)
(python-shell-buffer-substring-6, python-shell-buffer-substring-7)
(python-shell-buffer-substring-8)
(python-info-encoding-from-cookie-1)
(python-info-encoding-from-cookie-2)
(python-info-encoding-from-cookie-3)
(python-info-encoding-from-cookie-4)
(python-info-encoding-from-cookie-5)
(python-info-encoding-from-cookie-6)
(python-info-encoding-from-cookie-7, python-info-encoding-1)
(python-info-encoding-2): New tests.
2014-12-25 Michael Albinus <michael.albinus@gmx.de>
* automated/tramp-tests.el (tramp-test17-insert-directory): Do not

View file

@ -2459,6 +2459,198 @@ and `python-shell-interpreter-args' in the new shell buffer."
"^\\(o\\.t \\|\\)")))
(ignore-errors (delete-file startup-file)))))
(ert-deftest python-shell-buffer-substring-1 ()
"Selecting a substring of the whole buffer must match its contents."
(python-tests-with-temp-buffer
"
class Foo(models.Model):
pass
class Bar(models.Model):
pass
"
(should (string= (buffer-string)
(python-shell-buffer-substring (point-min) (point-max))))))
(ert-deftest python-shell-buffer-substring-2 ()
"Main block should be removed if NOMAIN is non-nil."
(python-tests-with-temp-buffer
"
class Foo(models.Model):
pass
class Bar(models.Model):
pass
if __name__ == \"__main__\":
foo = Foo()
print (foo)
"
(should (string= (python-shell-buffer-substring (point-min) (point-max) t)
"
class Foo(models.Model):
pass
class Bar(models.Model):
pass
"))))
(ert-deftest python-shell-buffer-substring-3 ()
"Main block should be removed if NOMAIN is non-nil."
(python-tests-with-temp-buffer
"
class Foo(models.Model):
pass
if __name__ == \"__main__\":
foo = Foo()
print (foo)
class Bar(models.Model):
pass
"
(should (string= (python-shell-buffer-substring (point-min) (point-max) t)
"
class Foo(models.Model):
pass
class Bar(models.Model):
pass
"))))
(ert-deftest python-shell-buffer-substring-4 ()
"Coding cookie should be added for substrings."
(python-tests-with-temp-buffer
"# coding: latin-1
class Foo(models.Model):
pass
if __name__ == \"__main__\":
foo = Foo()
print (foo)
class Bar(models.Model):
pass
"
(should (string= (python-shell-buffer-substring
(python-tests-look-at "class Foo(models.Model):")
(progn (python-nav-forward-sexp) (point)))
"# -*- coding: latin-1 -*-
class Foo(models.Model):
pass"))))
(ert-deftest python-shell-buffer-substring-5 ()
"The proper amount of blank lines is added for a substring."
(python-tests-with-temp-buffer
"# coding: latin-1
class Foo(models.Model):
pass
if __name__ == \"__main__\":
foo = Foo()
print (foo)
class Bar(models.Model):
pass
"
(should (string= (python-shell-buffer-substring
(python-tests-look-at "class Bar(models.Model):")
(progn (python-nav-forward-sexp) (point)))
"# -*- coding: latin-1 -*-
class Bar(models.Model):
pass"))))
(ert-deftest python-shell-buffer-substring-6 ()
"Handle substring with coding cookie in the second line."
(python-tests-with-temp-buffer
"
# coding: latin-1
class Foo(models.Model):
pass
if __name__ == \"__main__\":
foo = Foo()
print (foo)
class Bar(models.Model):
pass
"
(should (string= (python-shell-buffer-substring
(python-tests-look-at "# coding: latin-1")
(python-tests-look-at "if __name__ == \"__main__\":"))
"# -*- coding: latin-1 -*-
class Foo(models.Model):
pass
"))))
(ert-deftest python-shell-buffer-substring-7 ()
"Ensure first coding cookie gets precedence."
(python-tests-with-temp-buffer
"# coding: utf-8
# coding: latin-1
class Foo(models.Model):
pass
if __name__ == \"__main__\":
foo = Foo()
print (foo)
class Bar(models.Model):
pass
"
(should (string= (python-shell-buffer-substring
(python-tests-look-at "# coding: latin-1")
(python-tests-look-at "if __name__ == \"__main__\":"))
"# -*- coding: utf-8 -*-
class Foo(models.Model):
pass
"))))
(ert-deftest python-shell-buffer-substring-8 ()
"Ensure first coding cookie gets precedence when sending whole buffer."
(python-tests-with-temp-buffer
"# coding: utf-8
# coding: latin-1
class Foo(models.Model):
pass
"
(should (string= (python-shell-buffer-substring (point-min) (point-max))
"# coding: utf-8
class Foo(models.Model):
pass
"))))
;;; Shell completion
@ -3773,6 +3965,85 @@ foo = True # another comment
(forward-line 1)
(should (python-info-current-line-empty-p))))
(ert-deftest python-info-encoding-from-cookie-1 ()
"Should detect it on first line."
(python-tests-with-temp-buffer
"# coding=latin-1
foo = True # another comment
"
(should (eq (python-info-encoding-from-cookie) 'latin-1))))
(ert-deftest python-info-encoding-from-cookie-2 ()
"Should detect it on second line."
(python-tests-with-temp-buffer
"
# coding=latin-1
foo = True # another comment
"
(should (eq (python-info-encoding-from-cookie) 'latin-1))))
(ert-deftest python-info-encoding-from-cookie-3 ()
"Should not be detected on third line (and following ones)."
(python-tests-with-temp-buffer
"
# coding=latin-1
foo = True # another comment
"
(should (not (python-info-encoding-from-cookie)))))
(ert-deftest python-info-encoding-from-cookie-4 ()
"Should detect Emacs style."
(python-tests-with-temp-buffer
"# -*- coding: latin-1 -*-
foo = True # another comment"
(should (eq (python-info-encoding-from-cookie) 'latin-1))))
(ert-deftest python-info-encoding-from-cookie-5 ()
"Should detect Vim style."
(python-tests-with-temp-buffer
"# vim: set fileencoding=latin-1 :
foo = True # another comment"
(should (eq (python-info-encoding-from-cookie) 'latin-1))))
(ert-deftest python-info-encoding-from-cookie-6 ()
"First cookie wins."
(python-tests-with-temp-buffer
"# -*- coding: iso-8859-1 -*-
# vim: set fileencoding=latin-1 :
foo = True # another comment"
(should (eq (python-info-encoding-from-cookie) 'iso-8859-1))))
(ert-deftest python-info-encoding-from-cookie-7 ()
"First cookie wins."
(python-tests-with-temp-buffer
"# vim: set fileencoding=latin-1 :
# -*- coding: iso-8859-1 -*-
foo = True # another comment"
(should (eq (python-info-encoding-from-cookie) 'latin-1))))
(ert-deftest python-info-encoding-1 ()
"Should return the detected encoding from cookie."
(python-tests-with-temp-buffer
"# vim: set fileencoding=latin-1 :
foo = True # another comment"
(should (eq (python-info-encoding) 'latin-1))))
(ert-deftest python-info-encoding-2 ()
"Should default to utf-8."
(python-tests-with-temp-buffer
"# No encoding for you
foo = True # another comment"
(should (eq (python-info-encoding) 'utf-8))))
;;; Utility functions