Support compiled queries in treesit-query-capture
Last commit added this new type, this commit adds functionalities. treesit.el only has documentation changes. * lisp/treesit.el (treesit-query-in, treesit-font-lock-settings, treesit-defun-query): Update docstring. * src/treesit.c (make_ts_query): New function. (Ftreesit_query_compile): New function. (Ftreesit_query_capture): Remove code that creates a query object and instead either use make_ts_query or use the give compiled query. Free the query object conditonally. (syms_of_treesit): New symbol.
This commit is contained in:
parent
a8428b917d
commit
e171ef933f
2 changed files with 134 additions and 30 deletions
|
@ -366,9 +366,12 @@ language symbol, use the root node of the first parser for that
|
|||
language; if a parser, use the root node of that parser; if a
|
||||
node, use that node.
|
||||
|
||||
QUERY is either a string query or a sexp query. See Info node
|
||||
`(elisp)Pattern Matching' for how to write a query pattern in either
|
||||
string or s-expression form.
|
||||
QUERY is either a string query, a sexp query, or a compiled
|
||||
query. See Info node `(elisp)Pattern Matching' for how to write
|
||||
a query in either string or s-expression form. When using
|
||||
repeatedly, a compiled query is much faster than a string or sexp
|
||||
one, so it is recommend to compile your queries if it will be
|
||||
used over and over.
|
||||
|
||||
BEG and END, if _both_ non-nil, specifies the range in which the query
|
||||
is executed.
|
||||
|
@ -442,8 +445,12 @@ Each SETTING controls one parser (often of different language).
|
|||
LANGUAGE is the language symbol. See Info node `(elisp)Language
|
||||
Definitions'.
|
||||
|
||||
QUERY is either a string query or a sexp query.
|
||||
See Info node `(elisp)Pattern Matching' for writing queries.
|
||||
QUERY is either a string query, a sexp query, or a compiled
|
||||
query. See Info node `(elisp)Pattern Matching' for how to write
|
||||
a query in either string or s-expression form. When using
|
||||
repeatedly, a compiled query is much faster than a string or sexp
|
||||
one, so it is recommend to compile your queries if it will be
|
||||
used over and over.
|
||||
|
||||
Capture names in QUERY should be face names like
|
||||
`font-lock-keyword-face'. The captured node will be fontified
|
||||
|
@ -923,7 +930,9 @@ search failed."
|
|||
(defvar-local treesit-defun-query nil
|
||||
"A tree-sitter query that matches function/class definitions.
|
||||
Capture names don't matter. This variable is used by navigation
|
||||
functions like `treesit-beginning-of-defun'.")
|
||||
functions like `treesit-beginning-of-defun'.
|
||||
|
||||
It is recommended to use compiled query for this variable.")
|
||||
|
||||
(defun treesit-beginning-of-defun (&optional arg)
|
||||
"Move backward to the beginning of a defun.
|
||||
|
|
143
src/treesit.c
143
src/treesit.c
|
@ -88,6 +88,28 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
|
|||
parser of buffer changes.
|
||||
- lisp/emacs-lisp/cl-preloaded.el & data.c & lisp.h for parser and
|
||||
node type.
|
||||
|
||||
Because it is pretty slow (comparing to other tree-sitter
|
||||
operations) for tree-sitter to parse the query and produce a query
|
||||
object, it is very wasteful to reparse the query every time
|
||||
treesit-query-capture is called, and it completely kills the
|
||||
performance of querying in a loop for a moderate amount of times
|
||||
(hundreds of queries takes seconds rather than milliseconds to
|
||||
complete). Therefore we want some caching. We can either use a
|
||||
search.c style transparent caching, or simply expose a new type,
|
||||
compiled-ts-query and let the user to manually compile AOT. I
|
||||
believe AOT compiling gives users more control, makes the
|
||||
performance stable and easy to understand (compiled -> fast,
|
||||
uncompiled -> slow), and avoids some edge cases transparent cache
|
||||
could have (see below). So I implemented the AOT compilation.
|
||||
|
||||
Problems a transparent cache could have: Suppose we store cache
|
||||
entries in a fixed-length linked-list, and compare with EQ. 1)
|
||||
One-off query could kick out useful cache. 2) if the user messed
|
||||
up and the query doesn't EQ to the cache anymore, the performance
|
||||
mysteriously drops. 3) what if a user uses so many stuff that the
|
||||
default cache size (20) is not enough and we end up thrashing?
|
||||
These are all imagined scenarios but they are not impossible :-)
|
||||
*/
|
||||
|
||||
/*** Initialization */
|
||||
|
@ -536,6 +558,31 @@ make_ts_node (Lisp_Object parser, TSNode node)
|
|||
return make_lisp_ptr (lisp_node, Lisp_Vectorlike);
|
||||
}
|
||||
|
||||
/* Make a compiled query struct. Return NULL if error occurs. QUERY
|
||||
has to be either a cons or a string. */
|
||||
static struct Lisp_TS_Query *
|
||||
make_ts_query (Lisp_Object query, const TSLanguage *language,
|
||||
uint32_t *error_offset, TSQueryError *error_type)
|
||||
{
|
||||
if (CONSP (query))
|
||||
query = Ftreesit_expand_query (query);
|
||||
char *source = SSDATA (query);
|
||||
|
||||
TSQuery *ts_query = ts_query_new (language, source, strlen (source),
|
||||
error_offset, error_type);
|
||||
TSQueryCursor *ts_cursor = ts_query_cursor_new ();
|
||||
|
||||
if (ts_query == NULL)
|
||||
return NULL;
|
||||
|
||||
struct Lisp_TS_Query *lisp_query
|
||||
= ALLOCATE_PLAIN_PSEUDOVECTOR (struct Lisp_TS_Query,
|
||||
PVEC_TS_COMPILED_QUERY);
|
||||
lisp_query->query = ts_query;
|
||||
lisp_query->cursor = ts_cursor;
|
||||
return lisp_query;
|
||||
}
|
||||
|
||||
DEFUN ("treesit-parser-p",
|
||||
Ftreesit_parser_p, Streesit_parser_p, 1, 1, 0,
|
||||
doc: /* Return t if OBJECT is a tree-sitter parser. */)
|
||||
|
@ -1467,6 +1514,39 @@ ts_eval_predicates
|
|||
return pass;
|
||||
}
|
||||
|
||||
DEFUN ("treesit-query-compile",
|
||||
Ftreesit_query_compile,
|
||||
Streesit_query_compile, 2, 2, 0,
|
||||
doc: /* Compile QUERY to a compiled query.
|
||||
|
||||
Querying a compiled query is much faster than an uncompiled one.
|
||||
LANGUAGE is the language this query is for.
|
||||
|
||||
Signals treesit-query-error if QUERY is malformed or something
|
||||
else goes wrong. */)
|
||||
(Lisp_Object language, Lisp_Object query)
|
||||
{
|
||||
if (!Ftreesit_query_p (query))
|
||||
wrong_type_argument (Qtreesit_query_p, query);
|
||||
CHECK_SYMBOL (language);
|
||||
if (TS_COMPILED_QUERY_P (query))
|
||||
return query;
|
||||
|
||||
TSLanguage *ts_lang = ts_load_language (language, true);
|
||||
uint32_t error_offset;
|
||||
TSQueryError error_type;
|
||||
|
||||
struct Lisp_TS_Query *lisp_query
|
||||
= make_ts_query (query, ts_lang, &error_offset, &error_type);
|
||||
|
||||
if (lisp_query == NULL)
|
||||
xsignal2 (Qtreesit_query_error,
|
||||
build_string (ts_query_error_to_string (error_type)),
|
||||
make_fixnum (error_offset + 1));
|
||||
|
||||
return make_lisp_ptr (lisp_query, Lisp_Vectorlike);
|
||||
}
|
||||
|
||||
DEFUN ("treesit-query-capture",
|
||||
Ftreesit_query_capture,
|
||||
Streesit_query_capture, 2, 4, 0,
|
||||
|
@ -1475,9 +1555,11 @@ DEFUN ("treesit-query-capture",
|
|||
Return a list of (CAPTURE_NAME . NODE). CAPTURE_NAME is the name
|
||||
assigned to the node in PATTERN. NODE is the captured node.
|
||||
|
||||
QUERY is either a string query or a sexp query. See Info node
|
||||
`(elisp)Pattern Matching' for how to write a query in either string or
|
||||
s-expression form.
|
||||
QUERY is either a string query, a sexp query, or a compiled query.
|
||||
See Info node `(elisp)Pattern Matching' for how to write a query in
|
||||
either string or s-expression form. When using repeatedly, a compiled
|
||||
query is much faster than a string or sexp one, so it is recommend to
|
||||
compile your queries if it will be used over and over.
|
||||
|
||||
BEG and END, if both non-nil, specifies the range in which the query
|
||||
is executed.
|
||||
|
@ -1493,10 +1575,9 @@ else goes wrong. */)
|
|||
if (!NILP (end))
|
||||
CHECK_INTEGER (end);
|
||||
|
||||
if (CONSP (query))
|
||||
query = Ftreesit_expand_query (query);
|
||||
else
|
||||
CHECK_STRING (query);
|
||||
if (!(TS_COMPILED_QUERY_P (query)
|
||||
|| CONSP (query) || STRINGP (query)))
|
||||
wrong_type_argument (Qtreesit_query_p, query);
|
||||
|
||||
/* Extract C values from Lisp objects. */
|
||||
TSNode ts_node = XTS_NODE (node)->node;
|
||||
|
@ -1505,25 +1586,34 @@ else goes wrong. */)
|
|||
XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
|
||||
const TSLanguage *lang = ts_parser_language
|
||||
(XTS_PARSER (lisp_parser)->parser);
|
||||
char *source = SSDATA (query);
|
||||
|
||||
/* Initialize query objects, and execute query. */
|
||||
uint32_t error_offset;
|
||||
TSQueryError error_type;
|
||||
/* TODO: We could cache the query object, so that repeatedly
|
||||
querying with the same query can reuse the query object. It also
|
||||
saves us from expanding the sexp query into a string. I don't
|
||||
know how much time that could save though. */
|
||||
TSQuery *ts_query = ts_query_new (lang, source, strlen (source),
|
||||
&error_offset, &error_type);
|
||||
TSQueryCursor *cursor = ts_query_cursor_new ();
|
||||
|
||||
if (ts_query == NULL)
|
||||
struct Lisp_TS_Query *lisp_query;
|
||||
/* If the lisp query is temporary, we need to free it after use. */
|
||||
bool lisp_query_temp_p;
|
||||
if (TS_COMPILED_QUERY_P (query))
|
||||
{
|
||||
xsignal2 (Qtreesit_query_error,
|
||||
build_string (ts_query_error_to_string (error_type)),
|
||||
make_fixnum (error_offset + 1));
|
||||
lisp_query_temp_p = false;
|
||||
lisp_query = XTS_COMPILED_QUERY (query);
|
||||
}
|
||||
else
|
||||
{
|
||||
lisp_query_temp_p = true;
|
||||
uint32_t error_offset;
|
||||
TSQueryError error_type;
|
||||
lisp_query = make_ts_query (query, lang,
|
||||
&error_offset, &error_type);
|
||||
if (lisp_query == NULL)
|
||||
{
|
||||
xsignal2 (Qtreesit_query_error,
|
||||
build_string
|
||||
(ts_query_error_to_string (error_type)),
|
||||
make_fixnum (error_offset + 1));
|
||||
}
|
||||
}
|
||||
TSQuery *ts_query = lisp_query->query;
|
||||
TSQueryCursor *cursor = lisp_query->cursor;
|
||||
|
||||
if (!NILP (beg) && !NILP (end))
|
||||
{
|
||||
EMACS_INT beg_byte = XFIXNUM (beg);
|
||||
|
@ -1578,8 +1668,11 @@ else goes wrong. */)
|
|||
result = prev_result;
|
||||
}
|
||||
}
|
||||
ts_query_delete (ts_query);
|
||||
ts_query_cursor_delete (cursor);
|
||||
if (lisp_query_temp_p)
|
||||
{
|
||||
ts_query_delete (ts_query);
|
||||
ts_query_cursor_delete (cursor);
|
||||
}
|
||||
return Fnreverse (result);
|
||||
}
|
||||
|
||||
|
@ -1592,6 +1685,7 @@ syms_of_treesit (void)
|
|||
DEFSYM (Qtreesit_parser_p, "treesit-parser-p");
|
||||
DEFSYM (Qtreesit_node_p, "treesit-node-p");
|
||||
DEFSYM (Qtreesit_compiled_query_p, "treesit-compiled-query-p");
|
||||
DEFSYM (Qtreesit_query_p, "treesit-query-p");
|
||||
DEFSYM (Qnamed, "named");
|
||||
DEFSYM (Qmissing, "missing");
|
||||
DEFSYM (Qextra, "extra");
|
||||
|
@ -1705,5 +1799,6 @@ dynamic libraries, in that order. */);
|
|||
|
||||
defsubr (&Streesit_expand_pattern);
|
||||
defsubr (&Streesit_expand_query);
|
||||
defsubr (&Streesit_query_compile);
|
||||
defsubr (&Streesit_query_capture);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue