Fix a race condition that causes Emacs to mess up glib.
The symptom is a diagnostic "GLib-WARNING **: In call to g_spawn_sync(), exit status of a child process was requested but SIGCHLD action was set to SIG_IGN and ECHILD was received by waitpid(), so exit status can't be returned." The diagnostic is partly wrong, as the SIGCHLD action is not set to SIG_IGN. The real bug is a race condition between Emacs and glib: Emacs does a waitpid (-1, ...) and reaps glib's subprocess by mistake, so that glib can't find it. Work around the bug by invoking waitpid only on subprocesses that Emacs itself creates. * process.c (create_process, record_child_status_change): Don't use special value -1 in pid field, as the caller now must know the pid rather than having the callee infer it. The inference was sometimes incorrect anyway, due to another race. (create_process): Set new 'alive' member if child is created. (process_status_retrieved): New function. (record_child_status_change): Use it. Accept negative 1st argument, which means to wait for the processes that Emacs already knows about. Move special-case code for DOS_NT (which lacks WNOHANG) here, from caller. Keep track of processes that have already been waited for, by testing and clearing new 'alive' member. (CAN_HANDLE_MULTIPLE_CHILDREN): Remove, as record_child_status_change now does this internally. (handle_child_signal): Let record_child_status_change do all the work, since we do not want to reap all exited child processes, only the child processes that Emacs itself created. * process.h (Lisp_Process): New boolean member 'alive'. Fixes: debbugs:8855
This commit is contained in:
parent
0d879dca46
commit
0b3d4a4756
3 changed files with 135 additions and 98 deletions
|
@ -1,5 +1,34 @@
|
|||
2012-11-03 Paul Eggert <eggert@cs.ucla.edu>
|
||||
|
||||
Fix a race condition that causes Emacs to mess up glib (Bug#8855).
|
||||
The symptom is a diagnostic "GLib-WARNING **: In call to
|
||||
g_spawn_sync(), exit status of a child process was requested but
|
||||
SIGCHLD action was set to SIG_IGN and ECHILD was received by
|
||||
waitpid(), so exit status can't be returned." The diagnostic
|
||||
is partly wrong, as the SIGCHLD action is not set to SIG_IGN.
|
||||
The real bug is a race condition between Emacs and glib: Emacs
|
||||
does a waitpid (-1, ...) and reaps glib's subprocess by mistake,
|
||||
so that glib can't find it. Work around the bug by invoking
|
||||
waitpid only on subprocesses that Emacs itself creates.
|
||||
* process.c (create_process, record_child_status_change):
|
||||
Don't use special value -1 in pid field, as the caller now must
|
||||
know the pid rather than having the callee infer it. The
|
||||
inference was sometimes incorrect anyway, due to another race.
|
||||
(create_process): Set new 'alive' member if child is created.
|
||||
(process_status_retrieved): New function.
|
||||
(record_child_status_change): Use it.
|
||||
Accept negative 1st argument, which means to wait for the
|
||||
processes that Emacs already knows about. Move special-case code
|
||||
for DOS_NT (which lacks WNOHANG) here, from caller. Keep track of
|
||||
processes that have already been waited for, by testing and
|
||||
clearing new 'alive' member.
|
||||
(CAN_HANDLE_MULTIPLE_CHILDREN): Remove, as record_child_status_change
|
||||
now does this internally.
|
||||
(handle_child_signal): Let record_child_status_change do all
|
||||
the work, since we do not want to reap all exited child processes,
|
||||
only the child processes that Emacs itself created.
|
||||
* process.h (Lisp_Process): New boolean member 'alive'.
|
||||
|
||||
Omit duplicate definitions no longer needed with gcc -g3.
|
||||
* lisp.h (GCTYPEBITS, GCALIGNMENT, ARRAY_MARK_FLAG, PSEUDOVECTOR_FLAG)
|
||||
(VALMASK, MOST_POSITIVE_FIXNUM, MOST_NEGATIVE_FIXNUM):
|
||||
|
|
201
src/process.c
201
src/process.c
|
@ -130,6 +130,10 @@ extern int sys_select (int, SELECT_TYPE *, SELECT_TYPE *, SELECT_TYPE *,
|
|||
EMACS_TIME *, void *);
|
||||
#endif
|
||||
|
||||
/* This is for DOS_NT ports. FIXME: Remove this old portability cruft
|
||||
by having DOS_NT ports implement waitpid instead of wait. Nowadays
|
||||
POSIXish hosts all define waitpid, WNOHANG, and WUNTRACED, as these
|
||||
have been standard since POSIX.1-1988. */
|
||||
#ifndef WNOHANG
|
||||
# undef waitpid
|
||||
# define waitpid(pid, status, options) wait (status)
|
||||
|
@ -795,9 +799,8 @@ get_process (register Lisp_Object name)
|
|||
#ifdef SIGCHLD
|
||||
/* Fdelete_process promises to immediately forget about the process, but in
|
||||
reality, Emacs needs to remember those processes until they have been
|
||||
treated by the SIGCHLD handler; otherwise this handler would consider the
|
||||
process as being synchronous and say that the synchronous process is
|
||||
dead. */
|
||||
treated by the SIGCHLD handler and waitpid has been invoked on them;
|
||||
otherwise they might fill up the kernel's process table. */
|
||||
static Lisp_Object deleted_pid_list;
|
||||
#endif
|
||||
|
||||
|
@ -1704,16 +1707,7 @@ create_process (Lisp_Object process, char **new_argv, Lisp_Object current_dir)
|
|||
if (inchannel > max_process_desc)
|
||||
max_process_desc = inchannel;
|
||||
|
||||
/* Until we store the proper pid, enable the SIGCHLD handler
|
||||
to recognize an unknown pid as standing for this process.
|
||||
It is very important not to let this `marker' value stay
|
||||
in the table after this function has returned; if it does
|
||||
it might cause call-process to hang and subsequent asynchronous
|
||||
processes to get their return values scrambled. */
|
||||
XPROCESS (process)->pid = -1;
|
||||
|
||||
/* This must be called after the above line because it may signal an
|
||||
error. */
|
||||
/* This may signal an error. */
|
||||
setup_process_coding_systems (process);
|
||||
|
||||
encoded_current_dir = ENCODE_FILE (current_dir);
|
||||
|
@ -1880,6 +1874,8 @@ create_process (Lisp_Object process, char **new_argv, Lisp_Object current_dir)
|
|||
#endif
|
||||
|
||||
XPROCESS (process)->pid = pid;
|
||||
if (0 <= pid)
|
||||
XPROCESS (process)->alive = 1;
|
||||
|
||||
/* Stop blocking signals in the parent. */
|
||||
#ifdef SIGCHLD
|
||||
|
@ -6271,9 +6267,35 @@ process has been transmitted to the serial port. */)
|
|||
return process;
|
||||
}
|
||||
|
||||
/* On receipt of a signal that a child status has changed, loop asking
|
||||
about children with changed statuses until the system says there
|
||||
are no more.
|
||||
/* If the status of the process DESIRED has changed, return true and
|
||||
set *STATUS to its exit status; otherwise, return false.
|
||||
If HAVE is nonnegative, assume that HAVE = waitpid (HAVE, STATUS, ...)
|
||||
has already been invoked, and do not invoke waitpid again. */
|
||||
|
||||
static bool
|
||||
process_status_retrieved (pid_t desired, pid_t have, int *status)
|
||||
{
|
||||
if (have < 0)
|
||||
{
|
||||
/* Invoke waitpid only with a known process ID; do not invoke
|
||||
waitpid with a nonpositive argument. Otherwise, Emacs might
|
||||
reap an unwanted process by mistake. For example, invoking
|
||||
waitpid (-1, ...) can mess up glib by reaping glib's subprocesses,
|
||||
so that another thread running glib won't find them. */
|
||||
do
|
||||
have = waitpid (desired, status, WNOHANG | WUNTRACED);
|
||||
while (have < 0 && errno == EINTR);
|
||||
}
|
||||
|
||||
return have == desired;
|
||||
}
|
||||
|
||||
/* If PID is nonnegative, the child process PID with wait status W has
|
||||
changed its status; record this and return true.
|
||||
|
||||
If PID is negative, ignore W, and look for known child processes
|
||||
of Emacs whose status have changed. For each one found, record its new
|
||||
status.
|
||||
|
||||
All we do is change the status; we do not run sentinels or print
|
||||
notifications. That is saved for the next time keyboard input is
|
||||
|
@ -6296,13 +6318,23 @@ process has been transmitted to the serial port. */)
|
|||
** Malloc WARNING: This should never call malloc either directly or
|
||||
indirectly; if it does, that is a bug */
|
||||
|
||||
/* Record the changed status of the child process PID with wait status W. */
|
||||
void
|
||||
record_child_status_change (pid_t pid, int w)
|
||||
{
|
||||
#ifdef SIGCHLD
|
||||
Lisp_Object proc;
|
||||
struct Lisp_Process *p;
|
||||
|
||||
# ifdef WNOHANG
|
||||
/* On POSIXish hosts, record at most one child only if we already
|
||||
know one child that has exited. */
|
||||
bool record_at_most_one_child = 0 <= pid;
|
||||
# else
|
||||
/* On DOS_NT (the only porting target that lacks WNOHANG),
|
||||
record the status of at most one child process, since the SIGCHLD
|
||||
handler must return right away. If any more processes want to
|
||||
signal us, we will get another signal. */
|
||||
bool record_at_most_one_child = 1;
|
||||
# endif
|
||||
|
||||
Lisp_Object tail;
|
||||
|
||||
/* Find the process that signaled us, and record its status. */
|
||||
|
@ -6310,68 +6342,69 @@ record_child_status_change (pid_t pid, int w)
|
|||
/* The process can have been deleted by Fdelete_process. */
|
||||
for (tail = deleted_pid_list; CONSP (tail); tail = XCDR (tail))
|
||||
{
|
||||
bool all_pids_are_fixnums
|
||||
= (MOST_NEGATIVE_FIXNUM <= TYPE_MINIMUM (pid_t)
|
||||
&& TYPE_MAXIMUM (pid_t) <= MOST_POSITIVE_FIXNUM);
|
||||
Lisp_Object xpid = XCAR (tail);
|
||||
if ((INTEGERP (xpid) && pid == XINT (xpid))
|
||||
|| (FLOATP (xpid) && pid == XFLOAT_DATA (xpid)))
|
||||
if (all_pids_are_fixnums ? INTEGERP (xpid) : NUMBERP (xpid))
|
||||
{
|
||||
XSETCAR (tail, Qnil);
|
||||
return;
|
||||
pid_t deleted_pid;
|
||||
if (INTEGERP (xpid))
|
||||
deleted_pid = XINT (xpid);
|
||||
else
|
||||
deleted_pid = XFLOAT_DATA (xpid);
|
||||
if (process_status_retrieved (deleted_pid, pid, &w))
|
||||
{
|
||||
XSETCAR (tail, Qnil);
|
||||
if (record_at_most_one_child)
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Otherwise, if it is asynchronous, it is in Vprocess_alist. */
|
||||
p = 0;
|
||||
for (tail = Vprocess_alist; CONSP (tail); tail = XCDR (tail))
|
||||
{
|
||||
proc = XCDR (XCAR (tail));
|
||||
p = XPROCESS (proc);
|
||||
if (EQ (p->type, Qreal) && p->pid == pid)
|
||||
break;
|
||||
p = 0;
|
||||
}
|
||||
|
||||
/* Look for an asynchronous process whose pid hasn't been filled
|
||||
in yet. */
|
||||
if (! p)
|
||||
for (tail = Vprocess_alist; CONSP (tail); tail = XCDR (tail))
|
||||
{
|
||||
proc = XCDR (XCAR (tail));
|
||||
p = XPROCESS (proc);
|
||||
if (p->pid == -1)
|
||||
break;
|
||||
p = 0;
|
||||
}
|
||||
|
||||
/* Change the status of the process that was found. */
|
||||
if (p)
|
||||
{
|
||||
int clear_desc_flag = 0;
|
||||
|
||||
p->tick = ++process_tick;
|
||||
p->raw_status = w;
|
||||
p->raw_status_new = 1;
|
||||
|
||||
/* If process has terminated, stop waiting for its output. */
|
||||
if ((WIFSIGNALED (w) || WIFEXITED (w))
|
||||
&& p->infd >= 0)
|
||||
clear_desc_flag = 1;
|
||||
|
||||
/* We use clear_desc_flag to avoid a compiler bug in Microsoft C. */
|
||||
if (clear_desc_flag)
|
||||
Lisp_Object proc = XCDR (XCAR (tail));
|
||||
struct Lisp_Process *p = XPROCESS (proc);
|
||||
if (p->alive && process_status_retrieved (p->pid, pid, &w))
|
||||
{
|
||||
FD_CLR (p->infd, &input_wait_mask);
|
||||
FD_CLR (p->infd, &non_keyboard_wait_mask);
|
||||
}
|
||||
/* Change the status of the process that was found. */
|
||||
p->tick = ++process_tick;
|
||||
p->raw_status = w;
|
||||
p->raw_status_new = 1;
|
||||
|
||||
/* Tell wait_reading_process_output that it needs to wake up and
|
||||
look around. */
|
||||
if (input_available_clear_time)
|
||||
*input_available_clear_time = make_emacs_time (0, 0);
|
||||
/* If process has terminated, stop waiting for its output. */
|
||||
if (WIFSIGNALED (w) || WIFEXITED (w))
|
||||
{
|
||||
int clear_desc_flag = 0;
|
||||
p->alive = 0;
|
||||
if (p->infd >= 0)
|
||||
clear_desc_flag = 1;
|
||||
|
||||
/* clear_desc_flag avoids a compiler bug in Microsoft C. */
|
||||
if (clear_desc_flag)
|
||||
{
|
||||
FD_CLR (p->infd, &input_wait_mask);
|
||||
FD_CLR (p->infd, &non_keyboard_wait_mask);
|
||||
}
|
||||
}
|
||||
|
||||
/* Tell wait_reading_process_output that it needs to wake up and
|
||||
look around. */
|
||||
if (input_available_clear_time)
|
||||
*input_available_clear_time = make_emacs_time (0, 0);
|
||||
|
||||
if (record_at_most_one_child)
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* There was no asynchronous process found for that pid: we have
|
||||
a synchronous process. */
|
||||
else
|
||||
|
||||
if (0 <= pid)
|
||||
{
|
||||
/* The caller successfully waited for a pid but no asynchronous
|
||||
process was found for it, so this is a synchronous process. */
|
||||
|
||||
synch_process_alive = 0;
|
||||
|
||||
/* Report the status of the synchronous process. */
|
||||
|
@ -6390,38 +6423,10 @@ record_child_status_change (pid_t pid, int w)
|
|||
|
||||
#ifdef SIGCHLD
|
||||
|
||||
/* On some systems, the SIGCHLD handler must return right away. If
|
||||
any more processes want to signal us, we will get another signal.
|
||||
Otherwise, loop around to use up all the processes that have
|
||||
something to tell us. */
|
||||
#if (defined WINDOWSNT \
|
||||
|| (defined USG && !defined GNU_LINUX \
|
||||
&& !(defined HPUX && defined WNOHANG)))
|
||||
enum { CAN_HANDLE_MULTIPLE_CHILDREN = 0 };
|
||||
#else
|
||||
enum { CAN_HANDLE_MULTIPLE_CHILDREN = 1 };
|
||||
#endif
|
||||
|
||||
static void
|
||||
handle_child_signal (int sig)
|
||||
{
|
||||
do
|
||||
{
|
||||
pid_t pid;
|
||||
int status;
|
||||
|
||||
do
|
||||
pid = waitpid (-1, &status, WNOHANG | WUNTRACED);
|
||||
while (pid < 0 && errno == EINTR);
|
||||
|
||||
/* PID == 0 means no processes found, PID == -1 means a real failure.
|
||||
Either way, we have done all our job. */
|
||||
if (pid <= 0)
|
||||
break;
|
||||
|
||||
record_child_status_change (pid, status);
|
||||
}
|
||||
while (CAN_HANDLE_MULTIPLE_CHILDREN);
|
||||
record_child_status_change (-1, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
@ -142,6 +142,9 @@ struct Lisp_Process
|
|||
/* Flag to set coding-system of the process buffer from the
|
||||
coding_system used to decode process output. */
|
||||
unsigned int inherit_coding_system_flag : 1;
|
||||
/* Whether the process is alive, i.e., can be waited for. Running
|
||||
processes can be waited for, but exited and fake processes cannot. */
|
||||
unsigned int alive : 1;
|
||||
/* Record the process status in the raw form in which it comes from `wait'.
|
||||
This is to avoid consing in a signal handler. The `raw_status_new'
|
||||
flag indicates that `raw_status' contains a new status that still
|
||||
|
|
Loading…
Add table
Reference in a new issue