Add a new tree-sitter query predicate 'pred'

I realized that using an arbitrary function as the predicate in
queries is very helpful for some queries I'm writing for python and
javascript, and presumably most other languages[1].

Granted, we can already filter out unwanted nodes by using a function
instead of a face for the capture name, and (1) determine whether the
captured node is valid and (2) fontify that node if it's valid.

However, such approach is a bit more cumbersome and more importantly
gets in the way of another potential use of the fontification queries:
context extraction.

For example, I could use the query for the 'variable' feature to get
all the variables in a certain region.  In this use-case, we want the
filtering happen before returning the captured nodes.

Besides, the change is relatively small and straightforward: most code
are already there, I just need to add some boilerplate.

[1] For a code like aa.bb(cc), we want bb to be in function face,
because obviously its a function.  But for aa.bb, we want bb to be in
property face, because it's a property.  In the AST, bb is always a
property, the difference between the two cases is the enclosing node:
in the first case, aa.bb is in a "call_expression" node, indicating
that bb is used as a function (a method).  So we want a predicate
function that checks whether bb is used as a function or a property,
and determine whether it should be in function or property face.

* doc/lispref/parsing.texi (Pattern Matching): Update manual.
* src/treesit.c (Ftreesit_pattern_expand): Handle :pred.
(treesit_predicate_capture_name_to_node): A new function extracted
from treesit_predicate_capture_name_to_text.
(treesit_predicate_capture_name_to_text): Use the newly extracted
function.
(treesit_predicate_pred): New predicate function.
(treesit_eval_predicates): Add new predicate.  Also fix a bug: we want
to AND the results of each predicate.
* test/src/treesit-tests.el (treesit--ert-pred-last-sibling): New
helper function.
(treesit-query-api): Test #pred predicate.
This commit is contained in:
Yuan Fu 2022-12-26 17:16:59 -08:00
parent 835a80dcc4
commit a6d961ae2f
No known key found for this signature in database
GPG key ID: 56E19BC57664A442
3 changed files with 65 additions and 19 deletions

View file

@ -1266,10 +1266,11 @@ example, with the following pattern:
@end example @end example
@noindent @noindent
tree-sitter only matches arrays where the first element equals to tree-sitter only matches arrays where the first element equals to the
the last element. To attach a predicate to a pattern, we need to last element. To attach a predicate to a pattern, we need to group
group them together. A predicate always starts with a @samp{#}. them together. A predicate always starts with a @samp{#}. Currently
Currently there are two predicates, @code{#equal} and @code{#match}. there are three predicates, @code{#equal}, @code{#match}, and
@code{#pred}.
@deffn Predicate equal arg1 arg2 @deffn Predicate equal arg1 arg2
Matches if @var{arg1} equals to @var{arg2}. Arguments can be either Matches if @var{arg1} equals to @var{arg2}. Arguments can be either
@ -1282,6 +1283,11 @@ Matches if the text that @var{capture-name}'s node spans in the buffer
matches regular expression @var{regexp}. Matching is case-sensitive. matches regular expression @var{regexp}. Matching is case-sensitive.
@end deffn @end deffn
@deffn Predicate pred fn &rest nodes
Matches if function @var{fn} returns non-@code{nil} when passed each
node in @var{nodes} as arguments.
@end deffn
Note that a predicate can only refer to capture names that appear in Note that a predicate can only refer to capture names that appear in
the same pattern. Indeed, it makes little sense to refer to capture the same pattern. Indeed, it makes little sense to refer to capture
names in other patterns. names in other patterns.

View file

@ -2170,6 +2170,8 @@ See Info node `(elisp)Pattern Matching' for detailed explanation. */)
return build_pure_c_string ("#equal"); return build_pure_c_string ("#equal");
if (EQ (pattern, QCmatch)) if (EQ (pattern, QCmatch))
return build_pure_c_string ("#match"); return build_pure_c_string ("#match");
if (EQ (pattern, QCpred))
return build_pure_c_string ("#pred");
Lisp_Object opening_delimeter Lisp_Object opening_delimeter
= build_pure_c_string (VECTORP (pattern) ? "[" : "("); = build_pure_c_string (VECTORP (pattern) ? "[" : "(");
Lisp_Object closing_delimiter Lisp_Object closing_delimiter
@ -2269,10 +2271,10 @@ treesit_predicates_for_pattern (TSQuery *query, uint32_t pattern_index)
return Fnreverse (result); return Fnreverse (result);
} }
/* Translate a capture NAME (symbol) to the text of the captured node. /* Translate a capture NAME (symbol) to a node.
Signals treesit-query-error if such node is not captured. */ Signals treesit-query-error if such node is not captured. */
static Lisp_Object static Lisp_Object
treesit_predicate_capture_name_to_text (Lisp_Object name, treesit_predicate_capture_name_to_node (Lisp_Object name,
struct capture_range captures) struct capture_range captures)
{ {
Lisp_Object node = Qnil; Lisp_Object node = Qnil;
@ -2292,6 +2294,16 @@ treesit_predicate_capture_name_to_text (Lisp_Object name,
name, build_pure_c_string ("A predicate can only refer" name, build_pure_c_string ("A predicate can only refer"
" to captured nodes in the " " to captured nodes in the "
"same pattern")); "same pattern"));
return node;
}
/* Translate a capture NAME (symbol) to the text of the captured node.
Signals treesit-query-error if such node is not captured. */
static Lisp_Object
treesit_predicate_capture_name_to_text (Lisp_Object name,
struct capture_range captures)
{
Lisp_Object node = treesit_predicate_capture_name_to_node (name, captures);
struct buffer *old_buffer = current_buffer; struct buffer *old_buffer = current_buffer;
set_buffer_internal (XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer)); set_buffer_internal (XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer));
@ -2365,13 +2377,30 @@ treesit_predicate_match (Lisp_Object args, struct capture_range captures)
return false; return false;
} }
/* About predicates: I decide to hard-code predicates in C instead of /* Handles predicate (#pred FN ARG...). Return true if FN returns
implementing an extensible system where predicates are translated non-nil; return false otherwise. The arity of FN must match the
to Lisp functions, and new predicates can be added by extending a number of ARGs */
list of functions, because I really couldn't imagine any useful static bool
predicates besides equal and match. If we later found out that treesit_predicate_pred (Lisp_Object args, struct capture_range captures)
such system is indeed useful and necessary, it can be easily {
added. */ if (XFIXNUM (Flength (args)) < 2)
xsignal2 (Qtreesit_query_error,
build_pure_c_string ("Predicate `pred' requires "
"at least two arguments, "
"but was only given"),
Flength (args));
Lisp_Object fn = Fintern (XCAR (args), Qnil);
Lisp_Object nodes = Qnil;
Lisp_Object tail = XCDR (args);
FOR_EACH_TAIL (tail)
nodes = Fcons (treesit_predicate_capture_name_to_node (XCAR (tail),
captures),
nodes);
nodes = Fnreverse (nodes);
return !NILP (CALLN (Fapply, fn, nodes));
}
/* If all predicates in PREDICATES passes, return true; otherwise /* If all predicates in PREDICATES passes, return true; otherwise
return false. */ return false. */
@ -2387,14 +2416,17 @@ treesit_eval_predicates (struct capture_range captures, Lisp_Object predicates)
Lisp_Object fn = XCAR (predicate); Lisp_Object fn = XCAR (predicate);
Lisp_Object args = XCDR (predicate); Lisp_Object args = XCDR (predicate);
if (!NILP (Fstring_equal (fn, build_pure_c_string ("equal")))) if (!NILP (Fstring_equal (fn, build_pure_c_string ("equal"))))
pass = treesit_predicate_equal (args, captures); pass &= treesit_predicate_equal (args, captures);
else if (!NILP (Fstring_equal (fn, build_pure_c_string ("match")))) else if (!NILP (Fstring_equal (fn, build_pure_c_string ("match"))))
pass = treesit_predicate_match (args, captures); pass &= treesit_predicate_match (args, captures);
else if (!NILP (Fstring_equal (fn, build_pure_c_string ("pred"))))
pass &= treesit_predicate_pred (args, captures);
else else
xsignal3 (Qtreesit_query_error, xsignal3 (Qtreesit_query_error,
build_pure_c_string ("Invalid predicate"), build_pure_c_string ("Invalid predicate"),
fn, build_pure_c_string ("Currently Emacs only supports" fn, build_pure_c_string ("Currently Emacs only supports"
" equal and match predicate")); " equal, match, and pred"
" predicate"));
} }
/* If all predicates passed, add captures to result list. */ /* If all predicates passed, add captures to result list. */
return pass; return pass;
@ -3217,6 +3249,7 @@ syms_of_treesit (void)
DEFSYM (QCanchor, ":anchor"); DEFSYM (QCanchor, ":anchor");
DEFSYM (QCequal, ":equal"); DEFSYM (QCequal, ":equal");
DEFSYM (QCmatch, ":match"); DEFSYM (QCmatch, ":match");
DEFSYM (QCpred, ":pred");
DEFSYM (Qnot_found, "not-found"); DEFSYM (Qnot_found, "not-found");
DEFSYM (Qsymbol_error, "symbol-error"); DEFSYM (Qsymbol_error, "symbol-error");

View file

@ -335,6 +335,9 @@ BODY is the test body."
;;; Query ;;; Query
(defun treesit--ert-pred-last-sibling (node)
(null (treesit-node-next-sibling node t)))
(ert-deftest treesit-query-api () (ert-deftest treesit-query-api ()
"Tests for query API." "Tests for query API."
(skip-unless (treesit-language-available-p 'json)) (skip-unless (treesit-language-available-p 'json))
@ -357,13 +360,16 @@ BODY is the test body."
(pair key: (_) @keyword) (pair key: (_) @keyword)
((_) @bob (#match \"^B.b$\" @bob)) ((_) @bob (#match \"^B.b$\" @bob))
(number) @number (number) @number
((number) @n3 (#equal \"3\" @n3)) " ((number) @n3 (#equal \"3\" @n3))
((number) @n3p (#pred treesit--ert-pred-last-sibling @n3p))"
;; Sexp query. ;; Sexp query.
((string) @string ((string) @string
(pair key: (_) @keyword) (pair key: (_) @keyword)
((_) @bob (:match "^B.b$" @bob)) ((_) @bob (:match "^B.b$" @bob))
(number) @number (number) @number
((number) @n3 (:equal "3" @n3))))) ((number) @n3 (:equal "3" @n3))
((number) @n3p (:pred treesit--ert-pred-last-sibling
@n3p)))))
;; Test `treesit-query-compile'. ;; Test `treesit-query-compile'.
(dolist (query (list query1 (dolist (query (list query1
(treesit-query-compile 'json query1))) (treesit-query-compile 'json query1)))
@ -375,7 +381,8 @@ BODY is the test body."
(string . "\"Bob\"") (string . "\"Bob\"")
(bob . "Bob") (bob . "Bob")
(number . "3") (number . "3")
(n3 . "3")) (n3 . "3")
(n3p . "3"))
(mapcar (lambda (entry) (mapcar (lambda (entry)
(cons (car entry) (cons (car entry)
(treesit-node-text (treesit-node-text