Add a new tree-sitter query predicate 'pred'

I realized that using an arbitrary function as the predicate in
queries is very helpful for some queries I'm writing for python and
javascript, and presumably most other languages[1].

Granted, we can already filter out unwanted nodes by using a function
instead of a face for the capture name, and (1) determine whether the
captured node is valid and (2) fontify that node if it's valid.

However, such approach is a bit more cumbersome and more importantly
gets in the way of another potential use of the fontification queries:
context extraction.

For example, I could use the query for the 'variable' feature to get
all the variables in a certain region.  In this use-case, we want the
filtering happen before returning the captured nodes.

Besides, the change is relatively small and straightforward: most code
are already there, I just need to add some boilerplate.

[1] For a code like aa.bb(cc), we want bb to be in function face,
because obviously its a function.  But for aa.bb, we want bb to be in
property face, because it's a property.  In the AST, bb is always a
property, the difference between the two cases is the enclosing node:
in the first case, aa.bb is in a "call_expression" node, indicating
that bb is used as a function (a method).  So we want a predicate
function that checks whether bb is used as a function or a property,
and determine whether it should be in function or property face.

* doc/lispref/parsing.texi (Pattern Matching): Update manual.
* src/treesit.c (Ftreesit_pattern_expand): Handle :pred.
(treesit_predicate_capture_name_to_node): A new function extracted
from treesit_predicate_capture_name_to_text.
(treesit_predicate_capture_name_to_text): Use the newly extracted
function.
(treesit_predicate_pred): New predicate function.
(treesit_eval_predicates): Add new predicate.  Also fix a bug: we want
to AND the results of each predicate.
* test/src/treesit-tests.el (treesit--ert-pred-last-sibling): New
helper function.
(treesit-query-api): Test #pred predicate.
This commit is contained in:
Yuan Fu 2022-12-26 17:16:59 -08:00
parent 835a80dcc4
commit a6d961ae2f
No known key found for this signature in database
GPG key ID: 56E19BC57664A442
3 changed files with 65 additions and 19 deletions

View file

@ -1266,10 +1266,11 @@ example, with the following pattern:
@end example
@noindent
tree-sitter only matches arrays where the first element equals to
the last element. To attach a predicate to a pattern, we need to
group them together. A predicate always starts with a @samp{#}.
Currently there are two predicates, @code{#equal} and @code{#match}.
tree-sitter only matches arrays where the first element equals to the
last element. To attach a predicate to a pattern, we need to group
them together. A predicate always starts with a @samp{#}. Currently
there are three predicates, @code{#equal}, @code{#match}, and
@code{#pred}.
@deffn Predicate equal arg1 arg2
Matches if @var{arg1} equals to @var{arg2}. Arguments can be either
@ -1282,6 +1283,11 @@ Matches if the text that @var{capture-name}'s node spans in the buffer
matches regular expression @var{regexp}. Matching is case-sensitive.
@end deffn
@deffn Predicate pred fn &rest nodes
Matches if function @var{fn} returns non-@code{nil} when passed each
node in @var{nodes} as arguments.
@end deffn
Note that a predicate can only refer to capture names that appear in
the same pattern. Indeed, it makes little sense to refer to capture
names in other patterns.

View file

@ -2170,6 +2170,8 @@ See Info node `(elisp)Pattern Matching' for detailed explanation. */)
return build_pure_c_string ("#equal");
if (EQ (pattern, QCmatch))
return build_pure_c_string ("#match");
if (EQ (pattern, QCpred))
return build_pure_c_string ("#pred");
Lisp_Object opening_delimeter
= build_pure_c_string (VECTORP (pattern) ? "[" : "(");
Lisp_Object closing_delimiter
@ -2269,10 +2271,10 @@ treesit_predicates_for_pattern (TSQuery *query, uint32_t pattern_index)
return Fnreverse (result);
}
/* Translate a capture NAME (symbol) to the text of the captured node.
/* Translate a capture NAME (symbol) to a node.
Signals treesit-query-error if such node is not captured. */
static Lisp_Object
treesit_predicate_capture_name_to_text (Lisp_Object name,
treesit_predicate_capture_name_to_node (Lisp_Object name,
struct capture_range captures)
{
Lisp_Object node = Qnil;
@ -2292,6 +2294,16 @@ treesit_predicate_capture_name_to_text (Lisp_Object name,
name, build_pure_c_string ("A predicate can only refer"
" to captured nodes in the "
"same pattern"));
return node;
}
/* Translate a capture NAME (symbol) to the text of the captured node.
Signals treesit-query-error if such node is not captured. */
static Lisp_Object
treesit_predicate_capture_name_to_text (Lisp_Object name,
struct capture_range captures)
{
Lisp_Object node = treesit_predicate_capture_name_to_node (name, captures);
struct buffer *old_buffer = current_buffer;
set_buffer_internal (XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer));
@ -2365,13 +2377,30 @@ treesit_predicate_match (Lisp_Object args, struct capture_range captures)
return false;
}
/* About predicates: I decide to hard-code predicates in C instead of
implementing an extensible system where predicates are translated
to Lisp functions, and new predicates can be added by extending a
list of functions, because I really couldn't imagine any useful
predicates besides equal and match. If we later found out that
such system is indeed useful and necessary, it can be easily
added. */
/* Handles predicate (#pred FN ARG...). Return true if FN returns
non-nil; return false otherwise. The arity of FN must match the
number of ARGs */
static bool
treesit_predicate_pred (Lisp_Object args, struct capture_range captures)
{
if (XFIXNUM (Flength (args)) < 2)
xsignal2 (Qtreesit_query_error,
build_pure_c_string ("Predicate `pred' requires "
"at least two arguments, "
"but was only given"),
Flength (args));
Lisp_Object fn = Fintern (XCAR (args), Qnil);
Lisp_Object nodes = Qnil;
Lisp_Object tail = XCDR (args);
FOR_EACH_TAIL (tail)
nodes = Fcons (treesit_predicate_capture_name_to_node (XCAR (tail),
captures),
nodes);
nodes = Fnreverse (nodes);
return !NILP (CALLN (Fapply, fn, nodes));
}
/* If all predicates in PREDICATES passes, return true; otherwise
return false. */
@ -2387,14 +2416,17 @@ treesit_eval_predicates (struct capture_range captures, Lisp_Object predicates)
Lisp_Object fn = XCAR (predicate);
Lisp_Object args = XCDR (predicate);
if (!NILP (Fstring_equal (fn, build_pure_c_string ("equal"))))
pass = treesit_predicate_equal (args, captures);
pass &= treesit_predicate_equal (args, captures);
else if (!NILP (Fstring_equal (fn, build_pure_c_string ("match"))))
pass = treesit_predicate_match (args, captures);
pass &= treesit_predicate_match (args, captures);
else if (!NILP (Fstring_equal (fn, build_pure_c_string ("pred"))))
pass &= treesit_predicate_pred (args, captures);
else
xsignal3 (Qtreesit_query_error,
build_pure_c_string ("Invalid predicate"),
fn, build_pure_c_string ("Currently Emacs only supports"
" equal and match predicate"));
" equal, match, and pred"
" predicate"));
}
/* If all predicates passed, add captures to result list. */
return pass;
@ -3217,6 +3249,7 @@ syms_of_treesit (void)
DEFSYM (QCanchor, ":anchor");
DEFSYM (QCequal, ":equal");
DEFSYM (QCmatch, ":match");
DEFSYM (QCpred, ":pred");
DEFSYM (Qnot_found, "not-found");
DEFSYM (Qsymbol_error, "symbol-error");

View file

@ -335,6 +335,9 @@ BODY is the test body."
;;; Query
(defun treesit--ert-pred-last-sibling (node)
(null (treesit-node-next-sibling node t)))
(ert-deftest treesit-query-api ()
"Tests for query API."
(skip-unless (treesit-language-available-p 'json))
@ -357,13 +360,16 @@ BODY is the test body."
(pair key: (_) @keyword)
((_) @bob (#match \"^B.b$\" @bob))
(number) @number
((number) @n3 (#equal \"3\" @n3)) "
((number) @n3 (#equal \"3\" @n3))
((number) @n3p (#pred treesit--ert-pred-last-sibling @n3p))"
;; Sexp query.
((string) @string
(pair key: (_) @keyword)
((_) @bob (:match "^B.b$" @bob))
(number) @number
((number) @n3 (:equal "3" @n3)))))
((number) @n3 (:equal "3" @n3))
((number) @n3p (:pred treesit--ert-pred-last-sibling
@n3p)))))
;; Test `treesit-query-compile'.
(dolist (query (list query1
(treesit-query-compile 'json query1)))
@ -375,7 +381,8 @@ BODY is the test body."
(string . "\"Bob\"")
(bob . "Bob")
(number . "3")
(n3 . "3"))
(n3 . "3")
(n3p . "3"))
(mapcar (lambda (entry)
(cons (car entry)
(treesit-node-text