Don't store docstrings of preloaded .el files in etc/DOC

Since the location of those files changes between build time and
installation time, this requires to tweak the file name used in those
(#$ . NNN) references during the dump so they don't hardcode the build
directory.  We do it in the same way as was already done for those
same file names in `load-history`, except we convert them back to
absolute file names more lazily (i.e. when fetching the actual
docstring rather than at startup), which requires remembering the
`lisp-dir` computed at startup in the new `lisp-directory` variable.

* src/Makefile.in ($(etc)/DOC): Don't scan Lisp files any more.

* src/lread.c (Fload): Use relative file names for `load-file-name`
when preloading for the dump, like we already did for `current-load-list`.
(read_list): Don't zero-out dynamic docstring references during the
preload since they won't be filled later by Snarf-documentation any more.
(read1): Remove the hash-hack for doc references that were zeroed.

* lisp/startup.el (lisp-directory): New variable.
(command-line): Set it.

* src/doc.c (get_doc_string): Use `lisp-directory` for dynamic
docstring references using relative file names.
(syms_of_doc): Add `Qlisp_directory`.

* lib-src/make-docfile.c (scan_file): Don't handle `.el` or `.elc`
files any more.
(IS_SLASH): Remove macro, not used any more.
(skip_white, read_lisp_symbol, search_lisp_doc_at_eol)
(scan_lisp_file): Remove functions, not used any more.

* doc/lispref/loading.texi (Library Search): Mention `lisp-directory`.
This commit is contained in:
Stefan Monnier 2021-12-30 23:17:45 -05:00
parent 337005af0b
commit 59732a83c8
8 changed files with 53 additions and 526 deletions

View file

@ -19,8 +19,8 @@ You should have received a copy of the GNU General Public License
along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
/* The arguments given to this program are all the C and Lisp source files
of GNU Emacs. .elc and .el and .c files are allowed.
/* The arguments given to this program are all the C files
of GNU Emacs. .c files are allowed.
A .o file can also be specified; the .c file it was made from is used.
This helps the makefile pass the correct list of files.
Option -d DIR means change to DIR before looking for files.
@ -62,13 +62,9 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
Similarly, msdos defines this as sys_chdir, but we're not linking with the
file where that function is defined. */
#undef chdir
#define IS_SLASH(c) ((c) == '/' || (c) == '\\' || (c) == ':')
#else /* not DOS_NT */
#define IS_SLASH(c) ((c) == '/')
#endif /* not DOS_NT */
static void scan_file (char *filename);
static void scan_lisp_file (const char *filename, const char *mode);
static void scan_c_file (char *filename, const char *mode);
static void scan_c_stream (FILE *infile);
static void start_globals (void);
@ -238,16 +234,9 @@ put_filename (char *filename)
static void
scan_file (char *filename)
{
ptrdiff_t len = strlen (filename);
if (!generate_globals)
put_filename (filename);
if (len > 4 && !strcmp (filename + len - 4, ".elc"))
scan_lisp_file (filename, "rb");
else if (len > 3 && !strcmp (filename + len - 3, ".el"))
scan_lisp_file (filename, "r");
else
scan_c_file (filename, "r");
scan_c_file (filename, "r");
}
static void
@ -1225,453 +1214,4 @@ scan_c_stream (FILE *infile)
fatal ("read error");
}
/* Read a file of Lisp code, compiled or interpreted.
Looks for
(defun NAME ARGS DOCSTRING ...)
(defmacro NAME ARGS DOCSTRING ...)
(defsubst NAME ARGS DOCSTRING ...)
(autoload (quote NAME) FILE DOCSTRING ...)
(defvar NAME VALUE DOCSTRING)
(defconst NAME VALUE DOCSTRING)
(fset (quote NAME) (make-byte-code ... DOCSTRING ...))
(fset (quote NAME) #[... DOCSTRING ...])
(defalias (quote NAME) #[... DOCSTRING ...])
(custom-declare-variable (quote NAME) VALUE DOCSTRING ...)
starting in column zero.
(quote NAME) may appear as 'NAME as well.
We also look for #@LENGTH CONTENTS^_ at the beginning of the line.
When we find that, we save it for the following defining-form,
and we use that instead of reading a doc string within that defining-form.
For defvar, defconst, and fset we skip to the docstring with a kludgy
formatting convention: all docstrings must appear on the same line as the
initial open-paren (the one in column zero) and must contain a backslash
and a newline immediately after the initial double-quote. No newlines
must appear between the beginning of the form and the first double-quote.
For defun, defmacro, and autoload, we know how to skip over the
arglist, but the doc string must still have a backslash and newline
immediately after the double quote.
The only source files that must follow this convention are preloaded
uncompiled ones like loaddefs.el; aside from that, it is always the .elc
file that we should look at, and they are no problem because byte-compiler
output follows this convention.
The NAME and DOCSTRING are output.
NAME is preceded by `F' for a function or `V' for a variable.
An entry is output only if DOCSTRING has \ newline just after the opening ".
*/
static void
skip_white (FILE *infile)
{
int c;
do
c = getc (infile);
while (c_isspace (c));
ungetc (c, infile);
}
static void
read_lisp_symbol (FILE *infile, char *buffer)
{
int c;
char *fillp = buffer;
skip_white (infile);
while (true)
{
c = getc (infile);
if (c == '\\')
{
c = getc (infile);
if (c < 0)
return;
*fillp++ = c;
}
else if (c_isspace (c) || c == '(' || c == ')' || c < 0)
{
ungetc (c, infile);
*fillp = 0;
break;
}
else
*fillp++ = c;
}
if (! buffer[0])
fprintf (stderr, "## expected a symbol, got '%c'\n", c);
skip_white (infile);
}
static bool
search_lisp_doc_at_eol (FILE *infile)
{
int c = 0, c1 = 0, c2 = 0;
/* Skip until the end of line; remember two previous chars. */
while (c != '\n' && c != '\r' && c != EOF)
{
c2 = c1;
c1 = c;
c = getc (infile);
}
/* If two previous characters were " and \,
this is a doc string. Otherwise, there is none. */
if (c2 != '"' || c1 != '\\')
{
#ifdef DEBUG
fprintf (stderr, "## non-docstring found\n");
#endif
ungetc (c, infile);
return false;
}
return true;
}
#define DEF_ELISP_FILE(fn) { #fn, sizeof(#fn) - 1 }
static void
scan_lisp_file (const char *filename, const char *mode)
{
FILE *infile;
int c;
char *saved_string = 0;
/* These are the only files that are loaded uncompiled, and must
follow the conventions of the doc strings expected by this
function. These conventions are automatically followed by the
byte compiler when it produces the .elc files. */
static struct {
const char *fn;
int fl;
} const uncompiled[] = {
DEF_ELISP_FILE (loaddefs.el),
DEF_ELISP_FILE (loadup.el),
DEF_ELISP_FILE (charprop.el),
DEF_ELISP_FILE (cp51932.el),
DEF_ELISP_FILE (eucjp-ms.el)
};
int i;
int flen = strlen (filename);
if (generate_globals)
fatal ("scanning lisp file when -g specified");
if (flen > 3 && !strcmp (filename + flen - 3, ".el"))
{
bool match = false;
for (i = 0; i < sizeof (uncompiled) / sizeof (uncompiled[0]); i++)
{
if (uncompiled[i].fl <= flen
&& !strcmp (filename + flen - uncompiled[i].fl, uncompiled[i].fn)
&& (flen == uncompiled[i].fl
|| IS_SLASH (filename[flen - uncompiled[i].fl - 1])))
{
match = true;
break;
}
}
if (!match)
fatal ("uncompiled lisp file %s is not supported", filename);
}
infile = fopen (filename, mode);
if (infile == NULL)
{
perror (filename);
exit (EXIT_FAILURE);
}
c = '\n';
while (!feof (infile))
{
char buffer[BUFSIZ];
char type;
/* If not at end of line, skip till we get to one. */
if (c != '\n' && c != '\r')
{
c = getc (infile);
continue;
}
/* Skip the line break. */
while (c == '\n' || c == '\r')
c = getc (infile);
/* Detect a dynamic doc string and save it for the next expression. */
if (c == '#')
{
c = getc (infile);
if (c == '@')
{
ptrdiff_t length = 0;
ptrdiff_t i;
/* Read the length. */
while ((c = getc (infile),
c_isdigit (c)))
{
if (INT_MULTIPLY_WRAPV (length, 10, &length)
|| INT_ADD_WRAPV (length, c - '0', &length)
|| SIZE_MAX < length)
memory_exhausted ();
}
if (length <= 1)
fatal ("invalid dynamic doc string length");
if (c != ' ')
fatal ("space not found after dynamic doc string length");
/* The next character is a space that is counted in the length
but not part of the doc string.
We already read it, so just ignore it. */
length--;
/* Read in the contents. */
free (saved_string);
saved_string = xmalloc (length);
for (i = 0; i < length; i++)
saved_string[i] = getc (infile);
/* The last character is a ^_.
That is needed in the .elc file
but it is redundant in DOC. So get rid of it here. */
saved_string[length - 1] = 0;
/* Skip the line break. */
while (c == '\n' || c == '\r')
c = getc (infile);
/* Skip the following line. */
while (! (c == '\n' || c == '\r' || c < 0))
c = getc (infile);
}
continue;
}
if (c != '(')
continue;
read_lisp_symbol (infile, buffer);
if (! strcmp (buffer, "defun")
|| ! strcmp (buffer, "defmacro")
|| ! strcmp (buffer, "defsubst"))
{
type = 'F';
read_lisp_symbol (infile, buffer);
/* Skip the arguments: either "nil" or a list in parens. */
c = getc (infile);
if (c == 'n') /* nil */
{
if ((c = getc (infile)) != 'i'
|| (c = getc (infile)) != 'l')
{
fprintf (stderr, "## unparsable arglist in %s (%s)\n",
buffer, filename);
continue;
}
}
else if (c != '(')
{
fprintf (stderr, "## unparsable arglist in %s (%s)\n",
buffer, filename);
continue;
}
else
while (! (c == ')' || c < 0))
c = getc (infile);
skip_white (infile);
/* If the next three characters aren't `dquote bslash newline'
then we're not reading a docstring.
*/
if ((c = getc (infile)) != '"'
|| (c = getc (infile)) != '\\'
|| ((c = getc (infile)) != '\n' && c != '\r'))
{
#ifdef DEBUG
fprintf (stderr, "## non-docstring in %s (%s)\n",
buffer, filename);
#endif
continue;
}
}
/* defcustom can only occur in uncompiled Lisp files. */
else if (! strcmp (buffer, "defvar")
|| ! strcmp (buffer, "defconst")
|| ! strcmp (buffer, "defcustom"))
{
type = 'V';
read_lisp_symbol (infile, buffer);
if (saved_string == 0)
if (!search_lisp_doc_at_eol (infile))
continue;
}
else if (! strcmp (buffer, "custom-declare-variable")
|| ! strcmp (buffer, "defvaralias")
)
{
type = 'V';
c = getc (infile);
if (c == '\'')
read_lisp_symbol (infile, buffer);
else
{
if (c != '(')
{
fprintf (stderr,
"## unparsable name in custom-declare-variable in %s\n",
filename);
continue;
}
read_lisp_symbol (infile, buffer);
if (strcmp (buffer, "quote"))
{
fprintf (stderr,
"## unparsable name in custom-declare-variable in %s\n",
filename);
continue;
}
read_lisp_symbol (infile, buffer);
c = getc (infile);
if (c != ')')
{
fprintf (stderr,
"## unparsable quoted name in custom-declare-variable in %s\n",
filename);
continue;
}
}
if (saved_string == 0)
if (!search_lisp_doc_at_eol (infile))
continue;
}
else if (! strcmp (buffer, "fset") || ! strcmp (buffer, "defalias"))
{
type = 'F';
c = getc (infile);
if (c == '\'')
read_lisp_symbol (infile, buffer);
else
{
if (c != '(')
{
fprintf (stderr, "## unparsable name in fset in %s\n",
filename);
continue;
}
read_lisp_symbol (infile, buffer);
if (strcmp (buffer, "quote"))
{
fprintf (stderr, "## unparsable name in fset in %s\n",
filename);
continue;
}
read_lisp_symbol (infile, buffer);
c = getc (infile);
if (c != ')')
{
fprintf (stderr,
"## unparsable quoted name in fset in %s\n",
filename);
continue;
}
}
if (saved_string == 0)
if (!search_lisp_doc_at_eol (infile))
continue;
}
else if (! strcmp (buffer, "autoload"))
{
type = 'F';
c = getc (infile);
if (c == '\'')
read_lisp_symbol (infile, buffer);
else
{
if (c != '(')
{
fprintf (stderr, "## unparsable name in autoload in %s\n",
filename);
continue;
}
read_lisp_symbol (infile, buffer);
if (strcmp (buffer, "quote"))
{
fprintf (stderr, "## unparsable name in autoload in %s\n",
filename);
continue;
}
read_lisp_symbol (infile, buffer);
c = getc (infile);
if (c != ')')
{
fprintf (stderr,
"## unparsable quoted name in autoload in %s\n",
filename);
continue;
}
}
skip_white (infile);
c = getc (infile);
if (c != '\"')
{
fprintf (stderr, "## autoload of %s unparsable (%s)\n",
buffer, filename);
continue;
}
read_c_string_or_comment (infile, 0, false, 0);
if (saved_string == 0)
if (!search_lisp_doc_at_eol (infile))
continue;
}
#ifdef DEBUG
else if (! strcmp (buffer, "if")
|| ! strcmp (buffer, "byte-code"))
continue;
#endif
else
{
#ifdef DEBUG
fprintf (stderr, "## unrecognized top-level form, %s (%s)\n",
buffer, filename);
#endif
continue;
}
/* At this point, we should either use the previous dynamic doc string in
saved_string or gobble a doc string from the input file.
In the latter case, the opening quote (and leading backslash-newline)
have already been read. */
printf ("\037%c%s\n", type, buffer);
if (saved_string)
{
fputs (saved_string, stdout);
/* Don't use one dynamic doc string twice. */
free (saved_string);
saved_string = 0;
}
else
read_c_string_or_comment (infile, 1, false, 0);
}
free (saved_string);
if (ferror (infile) || fclose (infile) != 0)
fatal ("%s: read error", filename);
}
/* make-docfile.c ends here */