Make sure treesit-parse-string gc its temp buffer (bug#71012)

* doc/lispref/parsing.texi (Using Parser): Add notice.
* lisp/treesit.el (treesit-parse-string): Remove function.
* src/treesit.c (make_treesit_parser): Init the new filed.
(treesit_delete_parser): Collect the temp buffer.
(Ftreesit_parse_string): New function.
* src/treesit.h (Lisp_TS_Parser): New field.
This commit is contained in:
Yuan Fu 2024-08-24 14:54:57 -07:00
parent 32afdcca88
commit 4339e70a94
No known key found for this signature in database
GPG key ID: 56E19BC57664A442
4 changed files with 55 additions and 14 deletions

View file

@ -486,8 +486,10 @@ string. Unlike a buffer, parsing a string is a one-off operation, and
there is no way to update the result.
@defun treesit-parse-string string language
This function parses @var{string} using @var{language}, and returns
the root node of the generated syntax tree.
This function parses @var{string} using @var{language}, and returns the
root node of the generated syntax tree. @emph{Do not} use this function
in a loop: this is a convenience function intended for one-off use, and
it isn't optimized; for heavy workload, use a temporary buffer instead.
@end defun
@heading Be notified by changes to the parse tree

View file

@ -123,17 +123,6 @@ of max unsigned 32-bit value for byte offsets into buffer text."
;;; Parser API supplement
(defun treesit-parse-string (string language)
"Parse STRING using a parser for LANGUAGE.
Return the root node of the syntax tree."
;; We can't use `with-temp-buffer' because it kills the buffer when
;; returning from the form.
(let ((buf (generate-new-buffer " *treesit-parse-string*")))
(with-current-buffer buf
(insert string)
(treesit-parser-root-node
(treesit-parser-create language)))))
(defvar-local treesit-language-at-point-function nil
"A function that returns the language at point.
This is used by `treesit-language-at', which is used by various

View file

@ -1181,6 +1181,7 @@ make_treesit_parser (Lisp_Object buffer, TSParser *parser,
lisp_parser->visible_end = BUF_ZV_BYTE (XBUFFER (buffer));
lisp_parser->timestamp = 0;
lisp_parser->deleted = false;
lisp_parser->need_to_gc_buffer = false;
eassert (lisp_parser->visible_beg <= lisp_parser->visible_end);
return make_lisp_ptr (lisp_parser, Lisp_Vectorlike);
}
@ -1220,6 +1221,8 @@ make_treesit_query (Lisp_Object query, Lisp_Object language)
void
treesit_delete_parser (struct Lisp_TS_Parser *lisp_parser)
{
if (lisp_parser->need_to_gc_buffer)
Fkill_buffer (lisp_parser->buffer);
ts_tree_delete (lisp_parser->tree);
ts_parser_delete (lisp_parser->parser);
}
@ -1859,6 +1862,49 @@ positions. PARSER is the parser issuing the notification. */)
return Qnil;
}
// Why don't we use ts_parse_string? I tried, but it requires too much
// change throughout treesit.c: we either return a root node that has no
// associated parser, or one that has a parser but the parser doesn't
// have associated buffer. Both route requires us to add checks and
// branches everytime we use the parser of a node or the buffer of a
// parser. I tried route 1, and found that on top of needing to add a
// bunch of branches to handle the no-parser case, many functions
// requires a parser alongside the node (getting the tree, or language
// symbol, etc), and I would need to rewrite those as well. Overall
// it's just not worth it--this is just a convenience function. --yuan
DEFUN ("treesit-parse-string",
Ftreesit_parse_string, Streesit_parse_string,
2, 2, 0,
doc: /* Parse STRING using a parser for LANGUAGE.
Return the root node of the result parse tree. DO NOT use this function
in a loop: this function is intended for one-off use and isn't
optimized; for heavy workload, use a temporary buffer instead. */)
(Lisp_Object string, Lisp_Object language)
{
CHECK_SYMBOL (language);
CHECK_STRING (string);
Lisp_Object name_str = build_string (" *treesit-parse-string*");
Lisp_Object buffer_name = Fgenerate_new_buffer_name (name_str, Qnil);
Lisp_Object buffer = Fget_buffer_create (buffer_name, Qnil);
struct buffer *old_buffer = current_buffer;
set_buffer_internal (XBUFFER (buffer));
insert1 (string);
set_buffer_internal (old_buffer);
Lisp_Object parser = Ftreesit_parser_create (language, buffer, Qt, Qnil);
XTS_PARSER (parser)->need_to_gc_buffer = true;
/* Make sure the temp buffer doesn't reference the parser, otherwise
the buffer and parser cross-reference each other and the parser is
never garbage-collected. */
BVAR (XBUFFER (buffer), ts_parser_list) = Qnil;
return Ftreesit_parser_root_node (parser);
}
/*** Node API */
@ -4245,7 +4291,7 @@ applies to LANGUAGE-A will be redirected to LANGUAGE-B instead. */);
defsubr (&Streesit_parser_tag);
defsubr (&Streesit_parser_root_node);
/* defsubr (&Streesit_parse_string); */
defsubr (&Streesit_parse_string);
defsubr (&Streesit_parser_set_included_ranges);
defsubr (&Streesit_parser_included_ranges);

View file

@ -82,6 +82,10 @@ struct Lisp_TS_Parser
/* If this field is true, parser functions raises
treesit-parser-deleted signal. */
bool deleted;
/* If this field is true, deleting the parser should also delete the
associated buffer. This is for parsers created by
treesit-parse-string, which uses a hidden temp buffer. */
bool need_to_gc_buffer;
};
/* A wrapper around a tree-sitter node. */