diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index ddf02d9283b..20b1085b46c 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -486,8 +486,10 @@ string. Unlike a buffer, parsing a string is a one-off operation, and there is no way to update the result. @defun treesit-parse-string string language -This function parses @var{string} using @var{language}, and returns -the root node of the generated syntax tree. +This function parses @var{string} using @var{language}, and returns the +root node of the generated syntax tree. @emph{Do not} use this function +in a loop: this is a convenience function intended for one-off use, and +it isn't optimized; for heavy workload, use a temporary buffer instead. @end defun @heading Be notified by changes to the parse tree diff --git a/lisp/treesit.el b/lisp/treesit.el index c91864725da..86dc4733d37 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -123,17 +123,6 @@ of max unsigned 32-bit value for byte offsets into buffer text." ;;; Parser API supplement -(defun treesit-parse-string (string language) - "Parse STRING using a parser for LANGUAGE. -Return the root node of the syntax tree." - ;; We can't use `with-temp-buffer' because it kills the buffer when - ;; returning from the form. - (let ((buf (generate-new-buffer " *treesit-parse-string*"))) - (with-current-buffer buf - (insert string) - (treesit-parser-root-node - (treesit-parser-create language))))) - (defvar-local treesit-language-at-point-function nil "A function that returns the language at point. This is used by `treesit-language-at', which is used by various diff --git a/src/treesit.c b/src/treesit.c index 27779692923..a41892b1cac 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -1181,6 +1181,7 @@ make_treesit_parser (Lisp_Object buffer, TSParser *parser, lisp_parser->visible_end = BUF_ZV_BYTE (XBUFFER (buffer)); lisp_parser->timestamp = 0; lisp_parser->deleted = false; + lisp_parser->need_to_gc_buffer = false; eassert (lisp_parser->visible_beg <= lisp_parser->visible_end); return make_lisp_ptr (lisp_parser, Lisp_Vectorlike); } @@ -1220,6 +1221,8 @@ make_treesit_query (Lisp_Object query, Lisp_Object language) void treesit_delete_parser (struct Lisp_TS_Parser *lisp_parser) { + if (lisp_parser->need_to_gc_buffer) + Fkill_buffer (lisp_parser->buffer); ts_tree_delete (lisp_parser->tree); ts_parser_delete (lisp_parser->parser); } @@ -1859,6 +1862,49 @@ positions. PARSER is the parser issuing the notification. */) return Qnil; } +// Why don't we use ts_parse_string? I tried, but it requires too much +// change throughout treesit.c: we either return a root node that has no +// associated parser, or one that has a parser but the parser doesn't +// have associated buffer. Both route requires us to add checks and +// branches everytime we use the parser of a node or the buffer of a +// parser. I tried route 1, and found that on top of needing to add a +// bunch of branches to handle the no-parser case, many functions +// requires a parser alongside the node (getting the tree, or language +// symbol, etc), and I would need to rewrite those as well. Overall +// it's just not worth it--this is just a convenience function. --yuan +DEFUN ("treesit-parse-string", + Ftreesit_parse_string, Streesit_parse_string, + 2, 2, 0, + doc: /* Parse STRING using a parser for LANGUAGE. + +Return the root node of the result parse tree. DO NOT use this function +in a loop: this function is intended for one-off use and isn't +optimized; for heavy workload, use a temporary buffer instead. */) + (Lisp_Object string, Lisp_Object language) +{ + CHECK_SYMBOL (language); + CHECK_STRING (string); + + Lisp_Object name_str = build_string (" *treesit-parse-string*"); + Lisp_Object buffer_name = Fgenerate_new_buffer_name (name_str, Qnil); + Lisp_Object buffer = Fget_buffer_create (buffer_name, Qnil); + + struct buffer *old_buffer = current_buffer; + set_buffer_internal (XBUFFER (buffer)); + insert1 (string); + set_buffer_internal (old_buffer); + + Lisp_Object parser = Ftreesit_parser_create (language, buffer, Qt, Qnil); + XTS_PARSER (parser)->need_to_gc_buffer = true; + + /* Make sure the temp buffer doesn't reference the parser, otherwise + the buffer and parser cross-reference each other and the parser is + never garbage-collected. */ + BVAR (XBUFFER (buffer), ts_parser_list) = Qnil; + + return Ftreesit_parser_root_node (parser); +} + /*** Node API */ @@ -4245,7 +4291,7 @@ applies to LANGUAGE-A will be redirected to LANGUAGE-B instead. */); defsubr (&Streesit_parser_tag); defsubr (&Streesit_parser_root_node); - /* defsubr (&Streesit_parse_string); */ + defsubr (&Streesit_parse_string); defsubr (&Streesit_parser_set_included_ranges); defsubr (&Streesit_parser_included_ranges); diff --git a/src/treesit.h b/src/treesit.h index 3da4cc155ea..cd84fa358c5 100644 --- a/src/treesit.h +++ b/src/treesit.h @@ -82,6 +82,10 @@ struct Lisp_TS_Parser /* If this field is true, parser functions raises treesit-parser-deleted signal. */ bool deleted; + /* If this field is true, deleting the parser should also delete the + associated buffer. This is for parsers created by + treesit-parse-string, which uses a hidden temp buffer. */ + bool need_to_gc_buffer; }; /* A wrapper around a tree-sitter node. */