Fix bug #15260 with building and installing Emacs in non-ASCII directories.

src/xdisp.c (message3_nolog, message_with_string): Encode the string
 before writing it to the terminal in a non-interactive session.
 src/lread.c (openp): If both FILENAME and SUFFIX are unibyte, make
 sure we concatenate them into a unibyte string.
 src/fileio.c (make_temp_name): Encode PREFIX, and decode the
 resulting temporary name before returning it to the caller.
 (Fexpand_file_name): If NAME is pure-ASCII and DEFAULT_DIRECTORY
 is a unibyte string, convert NAME to a unibyte string to ensure
 that the result is also a unibyte string.
 src/emacs.c (init_cmdargs): Use build_unibyte_string to make sure we
 create unibyte strings from default paths and directory/file
 names.
 src/coding.h (ENCODE_FILE): Do not attempt to encode a unibyte
 string.
 src/callproc.c (init_callproc): Use build_unibyte_string to make
 sure we create unibyte strings from default paths and
 directory/file names.
 src/buffer.c (init_buffer): Don't store default-directory of
 *scratch* in multibyte form.  The original problem which led to
 that is described in
 http://lists.gnu.org/archive/html/emacs-pretest-bug/2004-11/msg00532.html,
 but it was solved long ago.

 lisp/startup.el (normal-top-level): Move setting eol-mnemonic-unix,
 eol-mnemonic-mac, eol-mnemonic-dos, and also setup of the locale
 environment and decoding all of the default-directory's to here
 from command-line.
 (command-line): Decode also argv[0].
 lisp/loadup.el: Error out if default-directory is a multibyte string
 when we are dumping.
 lisp/Makefile.in (emacs): Don't set LC_ALL=C.

 leim/Makefile.in (RUN_EMACS): Don't set LC_ALL=C.

 configure.ac: Don't disallow builds in non-ASCII directories.
This commit is contained in:
Eli Zaretskii 2013-11-04 19:30:33 +02:00
parent 7397c58760
commit d0065ff124
16 changed files with 213 additions and 87 deletions

View file

@ -1,3 +1,8 @@
2013-11-04 Eli Zaretskii <eliz@gnu.org>
* configure.ac: Don't disallow builds in non-ASCII directories.
(Bug#15260)
2013-11-04 Paul Eggert <eggert@cs.ucla.edu>
Port to stricter C99 platforms.

View file

@ -73,30 +73,6 @@ dnl Support for --program-prefix, --program-suffix and
dnl --program-transform-name options
AC_ARG_PROGRAM
dnl http://debbugs.gnu.org/15260
dnl I think we have to check, eg, both exec_prefix and bindir,
dnl because the latter by default is not yet expanded, but the user
dnl may have specified a value for it via --bindir.
dnl At first glance, _installing_ in non-ASCII seems ok, but in fact
dnl it is not; see http://debbugs.gnu.org/15260#61
dnl Note that abs_srcdir and abs_builddir are not yet defined. :(
dnl "`cd \"$srcdir\"`" is not portable.
dnl See autoconf manual "Shell Substitutions":
dnl "There is just no portable way to use double-quoted strings inside
dnl double-quoted back-quoted expressions (pfew!)."
temp_srcdir=`cd "$srcdir"; pwd`
for var in "`pwd`" "$temp_srcdir" "$prefix" "$exec_prefix" \
"$datarootdir" "$bindir" "$datadir" "$sharedstatedir" "$libexecdir"; do
dnl configure sets LC_ALL=C early on, so this range should work.
case "$var" in
*[[!\ -~]]*) AC_MSG_ERROR([Emacs cannot be built or installed in a directory whose name contains non-ASCII characters: $var]) ;;
esac
done
dnl It is important that variables on the RHS not be expanded here,
dnl hence the single quotes. This is per the GNU coding standards, see
dnl (autoconf) Installation Directory Variables

View file

@ -1,3 +1,7 @@
2013-11-04 Eli Zaretskii <eliz@gnu.org>
* Makefile.in (RUN_EMACS): Don't set LC_ALL=C. (Bug#15260)
2013-11-03 Glenn Morris <rgm@gnu.org>
* Makefile.in (abs_srcdir): Remove.

View file

@ -32,7 +32,7 @@ EMACS = ../src/emacs
# How to run Emacs.
# Prevent any setting of EMACSLOADPATH in user environment causing problems.
RUN_EMACS = unset EMACSLOADPATH; LC_ALL=C "${EMACS}" -batch \
RUN_EMACS = unset EMACSLOADPATH; "${EMACS}" -batch \
--no-site-file --no-site-lisp
MKDIR_P = @MKDIR_P@

View file

@ -1,3 +1,16 @@
2013-11-04 Eli Zaretskii <eliz@gnu.org>
* startup.el (normal-top-level): Move setting eol-mnemonic-unix,
eol-mnemonic-mac, eol-mnemonic-dos, and also setup of the locale
environment and decoding all of the default-directory's to here
from command-line.
(command-line): Decode also argv[0].
* loadup.el: Error out if default-directory is a multibyte string
when we are dumping.
* Makefile.in (emacs): Don't set LC_ALL=C. (Bug#15260)
2013-11-04 Teodor Zlatanov <tzz@lifelogs.com>
* emacs-lisp/package.el (package-menu-mode)

View file

@ -106,7 +106,7 @@ COMPILE_FIRST = \
# The actual Emacs command run in the targets below.
# Prevent any setting of EMACSLOADPATH in user environment causing problems.
emacs = unset EMACSLOADPATH; LC_ALL=C "$(EMACS)" $(EMACSOPT)
emacs = unset EMACSLOADPATH; "$(EMACS)" $(EMACSOPT)
# Common command to find subdirectories
setwins=subdirs=`find . -type d -print`; \

View file

@ -286,6 +286,20 @@
;For other systems, you must edit ../src/Makefile.in.
(load "site-load" t)
;; Make sure default-directory is unibyte when dumping. This is
;; because we cannot decode and encode it correctly (since the locale
;; environment is not, and should not be, set up). default-directory
;; is used every time we call expand-file-name, which we do in every
;; file primitive. So the only workable solution to support building
;; in non-ASCII directories is to manipulate unibyte strings in the
;; current locale's encoding.
(if (and (or (equal (nth 3 command-line-args) "dump")
(equal (nth 4 command-line-args) "dump")
(equal (nth 3 command-line-args) "bootstrap")
(equal (nth 4 command-line-args) "bootstrap"))
(multibyte-string-p default-directory))
(error "default-directory must be unibyte when dumping Emacs!"))
;; Determine which last version number to use
;; based on the executables that now exist.
(if (and (or (equal (nth 3 command-line-args) "dump")

View file

@ -489,6 +489,63 @@ It is the default value of the variable `top-level'."
(if command-line-processed
(message "Back to top level.")
(setq command-line-processed t)
;; Set the default strings to display in mode line for end-of-line
;; formats that aren't native to this platform. This should be
;; done before calling set-locale-environment, as the latter might
;; use these mnemonics.
(cond
((memq system-type '(ms-dos windows-nt))
(setq eol-mnemonic-unix "(Unix)"
eol-mnemonic-mac "(Mac)"))
(t ; this is for Unix/GNU/Linux systems
(setq eol-mnemonic-dos "(DOS)"
eol-mnemonic-mac "(Mac)")))
(set-locale-environment nil)
;; Decode all default-directory's (probably, only *scratch* exists
;; at this point). default-directory of *scratch* is the basis
;; for many other file-name variables and directory lists, so it
;; is important to decode it ASAP.
(when locale-coding-system
(save-excursion
(dolist (elt (buffer-list))
(set-buffer elt)
(if default-directory
(setq default-directory
(decode-coding-string default-directory
locale-coding-system t)))))
;; Decode all the important variables and directory lists, now
;; that we know the locale's encoding. This is because the
;; values of these variables are until here unibyte undecoded
;; strings created by build_unibyte_string. data-directory in
;; particular is used to construct many other standard directory
;; names, so it must be decoded ASAP.
;; Note that charset-map-path cannot be decoded here, since we
;; could then be trapped in infinite recursion below, when we
;; load subdirs.el, because encoding a directory name might need
;; to load a charset map, which will want to encode
;; charset-map-path, which will want to load the same charset
;; map... So decoding of charset-map-path is delayed until
;; further down below.
(dolist (pathsym '(load-path exec-path))
(let ((path (symbol-value pathsym)))
(if (listp path)
(set pathsym (mapcar (lambda (dir)
(decode-coding-string
dir
locale-coding-system t))
path)))))
(dolist (filesym '(data-directory doc-directory exec-directory
installation-directory
invocation-directory invocation-name
source-directory
shared-game-score-directory))
(let ((file (symbol-value filesym)))
(if (stringp file)
(set filesym (decode-coding-string file locale-coding-system t))))))
(let ((dir default-directory))
(with-current-buffer "*Messages*"
(messages-buffer-mode)
@ -536,6 +593,16 @@ It is the default value of the variable `top-level'."
(setq process-environment
(delete (concat "PWD=" pwd)
process-environment)))))
;; Now, that other directories were searched, and any charsets we
;; need for encoding them are already loaded, we are ready to
;; decode charset-map-path.
(if (listp charset-map-path)
(setq charset-map-path
(mapcar (lambda (dir)
(decode-coding-string
dir
locale-coding-system t))
charset-map-path)))
(setq default-directory (abbreviate-file-name default-directory))
(let ((old-face-font-rescale-alist face-font-rescale-alist))
(unwind-protect
@ -756,18 +823,6 @@ Amongst another things, it parses the command-line arguments."
;;! ;; Choose a good default value for split-window-keep-point.
;;! (setq split-window-keep-point (> baud-rate 2400))
;; Set the default strings to display in mode line for
;; end-of-line formats that aren't native to this platform.
(cond
((memq system-type '(ms-dos windows-nt))
(setq eol-mnemonic-unix "(Unix)"
eol-mnemonic-mac "(Mac)"))
(t ; this is for Unix/GNU/Linux systems
(setq eol-mnemonic-dos "(DOS)"
eol-mnemonic-mac "(Mac)")))
(set-locale-environment nil)
;; Convert preloaded file names in load-history to absolute.
(let ((simple-file-name
;; Look for simple.el or simple.elc and use their directory
@ -801,7 +856,7 @@ please check its value")
load-history))))
;; Convert the arguments to Emacs internal representation.
(let ((args (cdr command-line-args)))
(let ((args command-line-args))
(while args
(setcar args
(decode-coding-string (car args) locale-coding-system t))
@ -1211,19 +1266,6 @@ the `--debug-init' option to view a complete error backtrace."
(setq after-init-time (current-time))
(run-hooks 'after-init-hook)
;; Decode all default-directory.
(if (and (default-value 'enable-multibyte-characters) locale-coding-system)
(save-excursion
(dolist (elt (buffer-list))
(set-buffer elt)
(if default-directory
(setq default-directory
(decode-coding-string default-directory
locale-coding-system t))))
(setq command-line-default-directory
(decode-coding-string command-line-default-directory
locale-coding-system t))))
;; If *scratch* exists and init file didn't change its mode, initialize it.
(if (get-buffer "*scratch*")
(with-current-buffer "*scratch*"

View file

@ -1,3 +1,34 @@
2013-11-04 Eli Zaretskii <eliz@gnu.org>
* xdisp.c (message3_nolog, message_with_string): Encode the string
before writing it to the terminal in a non-interactive session.
* lread.c (openp): If both FILENAME and SUFFIX are unibyte, make
sure we concatenate them into a unibyte string.
* fileio.c (make_temp_name): Encode PREFIX, and decode the
resulting temporary name before returning it to the caller.
(Fexpand_file_name): If NAME is pure-ASCII and DEFAULT_DIRECTORY
is a unibyte string, convert NAME to a unibyte string to ensure
that the result is also a unibyte string.
* emacs.c (init_cmdargs): Use build_unibyte_string to make sure we
create unibyte strings from default paths and directory/file
names.
* coding.h (ENCODE_FILE): Do not attempt to encode a unibyte
string.
* callproc.c (init_callproc): Use build_unibyte_string to make
sure we create unibyte strings from default paths and
directory/file names.
* buffer.c (init_buffer): Don't store default-directory of
8scratch* in multibyte form. The original problem which led to
that is described in
http://lists.gnu.org/archive/html/emacs-pretest-bug/2004-11/msg00532.html,
but it was solved long ago. (Bug#15260)
2013-11-04 Paul Eggert <eggert@cs.ucla.edu>
Port to stricter C99 platforms.

View file

@ -5349,13 +5349,10 @@ init_buffer (void)
len++;
}
/* At this moment, we still don't know how to decode the directory
name. So, we keep the bytes in unibyte form so that file I/O
routines correctly get the original bytes. */
bset_directory (current_buffer, make_unibyte_string (pwd, len));
if (! NILP (BVAR (&buffer_defaults, enable_multibyte_characters)))
/* At this moment, we still don't know how to decode the
directory name. So, we keep the bytes in multibyte form so
that ENCODE_FILE correctly gets the original bytes. */
bset_directory
(current_buffer, string_to_multibyte (BVAR (current_buffer, directory)));
/* Add /: to the front of the name
if it would otherwise be treated as magic. */

View file

@ -1612,14 +1612,14 @@ init_callproc (void)
Lisp_Object tem, tem1, srcdir;
srcdir = Fexpand_file_name (build_string ("../src/"),
build_string (PATH_DUMPLOADSEARCH));
build_unibyte_string (PATH_DUMPLOADSEARCH));
tem = Fexpand_file_name (build_string ("GNU"), Vdata_directory);
tem1 = Ffile_exists_p (tem);
if (!NILP (Fequal (srcdir, Vinvocation_directory)) || NILP (tem1))
{
Lisp_Object newdir;
newdir = Fexpand_file_name (build_string ("../etc/"),
build_string (PATH_DUMPLOADSEARCH));
build_unibyte_string (PATH_DUMPLOADSEARCH));
tem = Fexpand_file_name (build_string ("GNU"), newdir);
tem1 = Ffile_exists_p (tem);
if (!NILP (tem1))
@ -1646,7 +1646,7 @@ init_callproc (void)
#ifdef DOS_NT
Vshared_game_score_directory = Qnil;
#else
Vshared_game_score_directory = build_string (PATH_GAME);
Vshared_game_score_directory = build_unibyte_string (PATH_GAME);
if (NILP (Ffile_accessible_directory_p (Vshared_game_score_directory)))
Vshared_game_score_directory = Qnil;
#endif

View file

@ -670,14 +670,16 @@ struct coding_system
(code) = (s1 << 8) | s2; \
} while (0)
/* Encode the file name NAME using the specified coding system
for file names, if any. */
/* Encode the file name NAME using the specified coding system for
file names, if any. If NAME is a unibyte string, return NAME. */
#define ENCODE_FILE(name) \
(! NILP (Vfile_name_coding_system) \
(! STRING_MULTIBYTE (name) \
? name \
: (! NILP (Vfile_name_coding_system) \
? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \
: (! NILP (Vdefault_file_name_coding_system) \
? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \
: name))
: name)))
/* Decode the file name NAME using the specified coding system

View file

@ -393,7 +393,7 @@ init_cmdargs (int argc, char **argv, int skip_args, char *original_pwd)
initial_argv = argv;
initial_argc = argc;
raw_name = build_string (argv[0]);
raw_name = build_unibyte_string (argv[0]);
/* Add /: to the front of the name
if it would otherwise be treated as magic. */
@ -427,7 +427,9 @@ init_cmdargs (int argc, char **argv, int skip_args, char *original_pwd)
/* Emacs was started with relative path, like ./emacs.
Make it absolute. */
{
Lisp_Object odir = original_pwd ? build_string (original_pwd) : Qnil;
Lisp_Object odir =
original_pwd ? build_unibyte_string (original_pwd) : Qnil;
Vinvocation_directory = Fexpand_file_name (Vinvocation_directory, odir);
}
@ -2206,7 +2208,7 @@ decode_env_path (const char *evarname, const char *defalt)
p = strchr (path, SEPCHAR);
if (!p)
p = path + strlen (path);
element = (p - path ? make_string (path, p - path)
element = (p - path ? make_unibyte_string (path, p - path)
: build_string ("."));
#ifdef WINDOWSNT
/* Relative file names in the default path are interpreted as
@ -2216,7 +2218,7 @@ decode_env_path (const char *evarname, const char *defalt)
element = Fexpand_file_name (Fsubstring (element,
make_number (emacs_dir_len),
Qnil),
build_string (emacs_dir));
build_unibyte_string (emacs_dir));
#endif
/* Add /: to the front of the name

View file

@ -732,8 +732,8 @@ static unsigned make_temp_name_count, make_temp_name_count_initialized_p;
Lisp_Object
make_temp_name (Lisp_Object prefix, bool base64_p)
{
Lisp_Object val;
int len, clen;
Lisp_Object val, encoded_prefix;
int len;
printmax_t pid;
char *p, *data;
char pidbuf[INT_BUFSIZE_BOUND (printmax_t)];
@ -767,12 +767,11 @@ make_temp_name (Lisp_Object prefix, bool base64_p)
#endif
}
len = SBYTES (prefix); clen = SCHARS (prefix);
val = make_uninit_multibyte_string (clen + 3 + pidlen, len + 3 + pidlen);
if (!STRING_MULTIBYTE (prefix))
STRING_SET_UNIBYTE (val);
encoded_prefix = ENCODE_FILE (prefix);
len = SBYTES (encoded_prefix);
val = make_uninit_string (len + 3 + pidlen);
data = SSDATA (val);
memcpy (data, SSDATA (prefix), len);
memcpy (data, SSDATA (encoded_prefix), len);
p = data + len;
memcpy (p, pidbuf, pidlen);
@ -810,7 +809,7 @@ make_temp_name (Lisp_Object prefix, bool base64_p)
{
/* We want to return only if errno is ENOENT. */
if (errno == ENOENT)
return val;
return DECODE_FILE (val);
else
/* The error here is dubious, but there is little else we
can do. The alternatives are to return nil, which is
@ -987,7 +986,26 @@ filesystem tree, not (expand-file-name ".." dirname). */)
if (multibyte != STRING_MULTIBYTE (default_directory))
{
if (multibyte)
{
unsigned char *p = SDATA (name);
while (*p && ASCII_BYTE_P (*p))
p++;
if (*p == '\0')
{
/* NAME is a pure ASCII string, and DEFAULT_DIRECTORY is
unibyte. Do not convert DEFAULT_DIRECTORY to
multibyte; instead, convert NAME to a unibyte string,
so that the result of this function is also a unibyte
string. This is needed during bootstraping and
dumping, when Emacs cannot decode file names, because
the locale environment is not set up. */
name = make_unibyte_string (SSDATA (name), SBYTES (name));
multibyte = 0;
}
else
default_directory = string_to_multibyte (default_directory);
}
else
{
name = string_to_multibyte (name);

View file

@ -1500,7 +1500,8 @@ openp (Lisp_Object path, Lisp_Object str, Lisp_Object suffixes,
for (tail = NILP (suffixes) ? list1 (empty_unibyte_string) : suffixes;
CONSP (tail); tail = XCDR (tail))
{
ptrdiff_t fnlen, lsuffix = SBYTES (XCAR (tail));
Lisp_Object suffix = XCAR (tail);
ptrdiff_t fnlen, lsuffix = SBYTES (suffix);
Lisp_Object handler;
/* Concatenate path element/specified name with the suffix.
@ -1511,7 +1512,7 @@ openp (Lisp_Object path, Lisp_Object str, Lisp_Object suffixes,
? 2 : 0);
fnlen = SBYTES (filename) - prefixlen;
memcpy (fn, SDATA (filename) + prefixlen, fnlen);
memcpy (fn + fnlen, SDATA (XCAR (tail)), lsuffix + 1);
memcpy (fn + fnlen, SDATA (suffix), lsuffix + 1);
fnlen += lsuffix;
/* Check that the file exists and is not a directory. */
/* We used to only check for handlers on non-absolute file names:
@ -1521,6 +1522,17 @@ openp (Lisp_Object path, Lisp_Object str, Lisp_Object suffixes,
handler = Ffind_file_name_handler (filename, Qfile_exists_p);
It's not clear why that was the case and it breaks things like
(load "/bar.el") where the file is actually "/bar.el.gz". */
/* make_string has its own ideas on when to return a unibyte
string and when a multibyte string, but we know better.
We must have a unibyte string when dumping, since
file-name encoding is shaky at best at that time, and in
particular default-file-name-coding-system is reset
several times during loadup. We therefore don't want to
encode the file before passing it to file I/O library
functions. */
if (!STRING_MULTIBYTE (filename) && !STRING_MULTIBYTE (suffix))
string = make_unibyte_string (fn, fnlen);
else
string = make_string (fn, fnlen);
handler = Ffind_file_name_handler (string, Qfile_exists_p);
if ((!NILP (handler) || !NILP (predicate)) && !NATNUMP (predicate))

View file

@ -9728,7 +9728,11 @@ message3_nolog (Lisp_Object m)
putc ('\n', stderr);
noninteractive_need_newline = 0;
if (STRINGP (m))
fwrite (SDATA (m), SBYTES (m), 1, stderr);
{
Lisp_Object s = ENCODE_SYSTEM (m);
fwrite (SDATA (s), SBYTES (s), 1, stderr);
}
if (cursor_in_echo_area == 0)
fprintf (stderr, "\n");
fflush (stderr);
@ -9803,13 +9807,19 @@ message_with_string (const char *m, Lisp_Object string, int log)
{
if (m)
{
/* ENCODE_SYSTEM below can GC and/or relocate the Lisp
String whose data pointer might be passed to us in M. So
we use a local copy. */
char *fmt = xstrdup (m);
if (noninteractive_need_newline)
putc ('\n', stderr);
noninteractive_need_newline = 0;
fprintf (stderr, m, SDATA (string));
fprintf (stderr, fmt, SDATA (ENCODE_SYSTEM (string)));
if (!cursor_in_echo_area)
fprintf (stderr, "\n");
fflush (stderr);
xfree (fmt);
}
}
else if (INTERACTIVE)