gccrs: nr2.0: Add ForeverStack data structure.

This data structure replaces our existing scopes and allows for multiple name resolution passes to insert definitions or look up names at different stages of the pipeline. The documentation goes into detail about how the data structure works, and how the source's hierarchy is preserved throughout the pipeline. The class is templated to enforce specific behavior based on the namespace the scope map targets. gcc/rust/ChangeLog: * resolve/rust-forever-stack.h: New file. * resolve/rust-forever-stack.hxx: New file.
2023-06-23 16:16:34 +02:00 · 2023-06-23 16:16:34 +02:00 · 2327631e4b
commit 2327631e4b
parent bcd76dbcaa
2 changed files with 782 additions and 0 deletions
--- a/gcc/rust/resolve/rust-forever-stack.h
+++ b/gcc/rust/resolve/rust-forever-stack.h
@ -0,0 +1,533 @@
+// Copyright (C) 2020-2023 Free Software Foundation, Inc.
+
+// This file is part of GCC.
+
+// GCC is free software; you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3, or (at your option) any later
+// version.
+
+// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with GCC; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+#ifndef RUST_FOREVER_STACK_H
+#define RUST_FOREVER_STACK_H
+
+#include "rust-system.h"
+#include "rust-rib.h"
+#include "rust-ast.h"
+#include "rust-path.h"
+#include "optional.h"
+#include "expected.h"
+
+namespace Rust {
+namespace Resolver2_0 {
+
+/**
+
+Let's look at our stack for resolving and traversing the following Rust code:
+
+```rust
+mod foo {
+    mod bar {
+	fn outer() {
+	    fn inner() {}
+	}
+
+	fn another() {}
+    }
+}
+```
+
+We start by creating the stack, which contains only one rib - the crate's. We
+won't look in details on how different namespaces end up with different stacks,
+and will only consider the "value" namespace for this example. Modules do not
+get added to the value namespace, but functions do:
+
+```rust
+let _ = foo;   // foo is a module, invalid Rust code
+let _ = outer; // outer is a function, ok!
+```
+
+So passing each module will create a new Rib, but not add that module's node to
+the Rib.
+
+The current cursor of the stack will be denoted with `-->`: an arrow pointing to
+the current rib.
+
+When we start the `TopLevel` pass on the crate we are compiling, we only see the
+top rib, which is empty at first:
+
+      ┌───────────────┐
+      │               │
+  --> │               │
+      │               │
+      └───────────────┘
+
+We pass through our first module, and emplace another Rib: Another "scope" is
+created, and it impacts name resolution rules.
+
+      ┌───────────────┐
+      │               │
+      │               │
+      │               │
+      └───────┬───────┘
+	      │
+	  foo │
+	      │
+	      ▼
+      ┌───────────────┐
+      │               │
+  --> │               │
+      │               │
+      └───────────────┘
+
+Notice that we have moved the cursor to the newly-created Rib, and that we have
+added a path between the two ribs - this is a `Link`. A link contains
+information such as the scope's NodeId, as well as an optional path - present
+only when the scope is named. This allows us to easily fetch AST nodes based on
+their canonical path, or build a canonical path from a NodeId. It also makes it
+really easy to do complex path name resolution, such as `super::super::<item>`.
+As mentioned earlier, modules are not present in the value namespace, so our new
+rib is also empty. Let's pass through the second module:
+
+      ┌───────────────┐
+      │               │
+      │               │
+      │               │
+      └───────┬───────┘
+	      │
+	  foo │
+	      │
+	      ▼
+      ┌───────────────┐
+      │               │
+      │               │
+      │               │
+      └───────┬───────┘
+	      │
+	  bar │
+	      │
+	      ▼
+      ┌───────────────┐
+      │               │
+  --> │               │
+      │               │
+      └───────────────┘
+
+Once again, the new rib is empty, and we have a link with a path. We now go
+through each item in the `bar` module and visit them. The first item is a
+function, `outer` - upon being visited, it adds itself to the current rib.
+
+      ┌───────────────┐
+      │               │
+      │               │
+      │               │
+      └───────┬───────┘
+	      │
+	  foo │
+	      │
+	      ▼
+      ┌───────────────┐
+      │               │
+      │               │
+      │               │
+      └───────┬───────┘
+	      │
+	  bar │
+	      │
+	      ▼
+      ┌───────────────┐
+      │outer          │
+  --> │               │
+      │               │
+      └───────────────┘
+
+We now visit `outer`'s definition. This creates a new Rib, as functions can have
+arguments, whose declaration only lives for the function's scope.
+
+      ┌───────────────┐
+      │               │
+      │               │
+      │               │
+      └───────┬───────┘
+	      │
+	  foo │
+	      │
+	      ▼
+      ┌───────────────┐
+      │               │
+      │               │
+      │               │
+      └───────┬───────┘
+	      │
+	  bar │
+	      │
+	      ▼
+      ┌───────────────┐
+      │outer          │
+      │               │
+      │               │
+      └───────┬───────┘
+	      │
+       <anon> │
+	      │
+	      ▼
+      ┌───────────────┐
+      │               │
+  --> │               │
+      │               │
+      └───────────────┘
+
+This rib is anonymous (the link to it does not have a path), because we cannot
+refer to a function's inner items from the outside:
+
+```rust
+pub mod a {
+    pub fn foo() {}
+}
+
+pub fn b() {
+    pub fn foo() {}
+}
+
+fn main() {
+    a::foo(); // ok
+    b::foo(); // ko!
+}
+```
+
+We visit the function's block, which contain a single declaration, a function
+named `inner`. It adds itself to the current rib.
+
+      ┌───────────────┐
+      │               │
+      │               │
+      │               │
+      └───────┬───────┘
+	      │
+	  foo │
+	      │
+	      ▼
+      ┌───────────────┐
+      │               │
+      │               │
+      │               │
+      └───────┬───────┘
+	      │
+	  bar │
+	      │
+	      ▼
+      ┌───────────────┐
+      │outer          │
+      │               │
+      │               │
+      └───────┬───────┘
+	      │
+       <anon> │
+	      │
+	      ▼
+      ┌───────────────┐
+      │inner          │
+  --> │               │
+      │               │
+      └───────────────┘
+
+We visit `inner`, which yields a rib but no other declaration.
+
+      ┌───────────────┐
+      │               │
+      │               │
+      │               │
+      └───────┬───────┘
+	      │
+	  foo │
+	      │
+	      ▼
+      ┌───────────────┐
+      │               │
+      │               │
+      │               │
+      └───────┬───────┘
+	      │
+	  bar │
+	      │
+	      ▼
+      ┌───────────────┐
+      │outer          │
+      │               │
+      │               │
+      └───────┬───────┘
+	      │
+       <anon> │
+	      │
+	      ▼
+      ┌───────────────┐
+      │inner          │
+      │               │
+      │               │
+      └───────────────┘
+	      │
+       <anon> │
+	      │
+	      ▼
+      ┌───────────────┐
+      │               │
+  --> │               │
+      │               │
+      └───────────────┘
+
+We are now at the end of the `inner` function, and we want to pop the current
+scope. Instead of deleting the current rib, we simply move the cursor backwards.
+This allows us to keep track of the existing information and access it in later
+name resolution passes. We then finish visiting `outer`, then go back to our
+`bar` module. This is what our stack looks like after this. Note how the only
+difference is the cursor's location.
+
+      ┌───────────────┐
+      │               │
+      │               │
+      │               │
+      └───────┬───────┘
+	      │
+	  foo │
+	      │
+	      ▼
+      ┌───────────────┐
+      │               │
+      │               │
+      │               │
+      └───────┬───────┘
+	      │
+	  bar │
+	      │
+	      ▼
+      ┌───────────────┐
+      │outer          │
+  --> │               │
+      │               │
+      └───────┬───────┘
+	      │
+       <anon> │
+	      │
+	      ▼
+      ┌───────────────┐
+      │inner          │
+      │               │
+      │               │
+      └───────────────┘
+	      │
+       <anon> │
+	      │
+	      ▼
+      ┌───────────────┐
+      │               │
+      │               │
+      │               │
+      └───────────────┘
+
+We then visit the remaining `bar` items, which are composed of the `another`
+function. It adds itself to the current rib. This function contains no
+declarations, but it still creates a Rib upon being visited. We then finish our
+visit of `bar`, which marks the end of our visit of `foo`, which marks the end
+of our `TopLevel` name resolution pass.
+
+      ┌───────────────┐
+      │               │
+  --> │               │
+      │               │
+      └───────┬───────┘
+	      │
+	  foo │
+	      │
+	      ▼
+      ┌───────────────┐
+      │               │
+      │               │
+      │               │
+      └───────┬───────┘
+	      │
+	  bar │
+	      │
+	      ▼
+      ┌───────────────┐
+      │outer          │
+      │another        │
+      │               │
+      └───────┬──┬────┘
+	      │  │       <anon>
+       <anon> │  └────────────────────┐
+	      │                       │
+	      ▼                       ▼
+      ┌───────────────┐       ┌───────────────┐
+      │inner          │       │               │
+      │               │       │               │
+      │               │       │               │
+      └───────┬───────┘       └───────────────┘
+	      │
+       <anon> │
+	      │
+	      ▼
+      ┌───────────────┐
+      │               │
+      │               │
+      │               │
+      └───────────────┘
+
+We now have a stack with a lot of ribs, prime for the `Early` and `Late` name
+resolution passes. We will revisit the ribs we created in these passes, and we
+won't need to allocate or create new ones: because they will still be present in
+the stack, we will simply move our cursor to these ribs. In this case, there is
+nothing to do, since there are no uses of our definitions, as the Rust code we
+are name-resolving is not really interesting. You'll also note that our
+`TopLevel` pass did not resolve a whole lot: all it did was create new ribs, and
+empty ones at that. The `Early` pass will not go further, since our code does
+not contain any imports, macro definitions or macro invocations. You can look at
+this pass's documentation for more details on this resolution process.
+
+**/
+template <Namespace N> class ForeverStack
+{
+public:
+  ForeverStack ()
+    : root (Node (Rib (Rib::Kind::Normal))), cursor_reference (root)
+  {
+    rust_assert (root.is_root ());
+    rust_assert (root.is_leaf ());
+  }
+
+  /**
+   * Add a new Rib to the stack. If the Rib already exists, nothing is pushed
+   * and the stack's cursor is simply moved to this existing Rib.
+   *
+   * @param rib The Rib to push
+   * @param id The NodeId of the node for which the Rib was created. For
+   *        example, if a Rib is created because a lexical scope is entered,
+   *        then `id` is that `BlockExpr`'s NodeId.
+   * @param path An optional path if the Rib was created due to a "named"
+   *        lexical scope, like a module's.
+   */
+  void push (Rib rib, NodeId id, tl::optional<Identifier> path = {});
+
+  /**
+   * Pop the innermost Rib from the stack
+   */
+  void pop ();
+
+  /**
+   * Insert a new definition in the innermost `Rib` in this stack
+   *
+   * @param name The name of the definition
+   * @param id Its NodeId
+   *
+   * @return `DuplicateNameError` if that node was already present in the Rib,
+   * the node's `NodeId` otherwise.
+   *
+   * @aborts if there are no `Rib`s inserted in the current map, this function
+   *         aborts the program.
+   */
+  tl::expected<NodeId, DuplicateNameError> insert (Identifier name, NodeId id);
+
+  /* Access the innermost `Rib` in this map */
+  Rib &peek ();
+  const Rib &peek () const;
+
+  /**
+   * Reverse iter on all ribs from the innermost one to the outermost one,
+   * trying to find a name. This is the default algorithm.
+   * This function gets specialized based on the Rib::Kind
+   * this way, we ensure a proper resolution algorithm at the type level
+   *
+   * @param name Name of the identifier to locate in this scope or an outermore
+   *        scope
+   *
+   * @return a valid option with the NodeId if the identifier is present in the
+   *         current map, an empty one otherwise.
+   */
+  inline tl::optional<NodeId> get (const Identifier &name);
+
+  // TODO: Should we dump to a file instead, like the other dumps? Or just print
+  // the string to a file in the SessionManager?
+  std::string as_debug_string ();
+
+private:
+  /**
+   * A link between two Nodes in our trie data structure. This class represents
+   * the edges of the graph
+   */
+  class Link
+  {
+  public:
+    Link (NodeId id, tl::optional<Identifier> path) : id (id), path (path) {}
+
+    bool compare (const Link &other) const { return id < other.id; }
+
+    NodeId id;
+    tl::optional<Identifier> path;
+  };
+
+  /* Link comparison class, which we use in a Node's `children` map */
+  class LinkCmp
+  {
+  public:
+    bool operator() (const Link &lhs, const Link &rhs) const
+    {
+      return lhs.compare (rhs);
+    }
+  };
+
+  class Node
+  {
+  public:
+    Node (Rib rib) : rib (rib) {}
+    Node (Rib rib, Node &parent) : rib (rib), parent (parent) {}
+
+    bool is_root () const;
+    bool is_leaf () const;
+
+    void insert_child (Link link, Node child);
+
+    Rib rib; // this is the "value" of the node - the data it keeps.
+    std::map<Link, Node, LinkCmp> children; // all the other nodes it links to
+
+    tl::optional<Node &> parent; // `None` only if the node is a root
+  };
+
+  /* Should we keep going upon seeing a Rib? */
+  enum class KeepGoing
+  {
+    Yes,
+    No,
+  };
+
+  /* Add a new Rib to the stack. This is an internal method */
+  void push_inner (Rib rib, Link link);
+
+  /* Reverse iterate on all Ribs from the current one, in an outwards fashion */
+  void reverse_iter (std::function<KeepGoing (Rib &)> lambda);
+
+  Node &cursor ();
+  const Node &cursor () const;
+  void update_cursor (Node &new_cursor);
+
+  Node root;
+  std::reference_wrapper<Node> cursor_reference;
+
+  void stream_rib (std::stringstream &stream, const Rib &rib,
+		   const std::string &next, const std::string &next_next);
+  void stream_node (std::stringstream &stream, unsigned indentation,
+		    const Node &node);
+};
+
+} // namespace Resolver2_0
+} // namespace Rust
+
+#include "rust-forever-stack.hxx"
+
+#endif // !RUST_FOREVER_STACK_H
--- a/gcc/rust/resolve/rust-forever-stack.hxx
+++ b/gcc/rust/resolve/rust-forever-stack.hxx
@ -0,0 +1,249 @@
+// Copyright (C) 2020-2023 Free Software Foundation, Inc.
+
+// This file is part of GCC.
+
+// GCC is free software; you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3, or (at your option) any later
+// version.
+
+// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with GCC; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+#include "rust-forever-stack.h"
+#include "rust-rib.h"
+#include "optional.h"
+
+namespace Rust {
+namespace Resolver2_0 {
+
+template <Namespace N>
+bool
+ForeverStack<N>::Node::is_root () const
+{
+  return !parent.has_value ();
+}
+
+template <Namespace N>
+bool
+ForeverStack<N>::Node::is_leaf () const
+{
+  return children.empty ();
+}
+
+template <Namespace N>
+void
+ForeverStack<N>::Node::insert_child (Link link, Node child)
+{
+  auto res = children.insert ({link, child});
+
+  // Do we want to error if the child already exists? Probably not, right?
+  // That's kinda the point, isn't it. So this method always succeeds, right?
+}
+
+// FIXME: Add correct implementation
+template <Namespace N>
+void
+ForeverStack<N>::push (Rib rib, NodeId id, tl::optional<Identifier> path)
+{
+  push_inner (rib, Link (id, path));
+}
+
+template <Namespace N>
+void
+ForeverStack<N>::push_inner (Rib rib, Link link)
+{
+  // If the link does not exist, we create it and emplace a new `Node` with the
+  // current node as its parent. `unordered_map::emplace` returns a pair with
+  // the iterator and a boolean. If the value already exists, the iterator
+  // points to it. Otherwise, it points to the newly emplaced value, so we can
+  // just update our cursor().
+  auto emplace
+    = cursor ().children.emplace (std::make_pair (link, Node (rib, cursor ())));
+
+  auto it = emplace.first;
+  auto existed = !emplace.second;
+
+  rust_debug ("inserting link: Link(%d [%s]): existed? %s", link.id,
+	      link.path.has_value () ? link.path.value ().as_string ().c_str ()
+				     : "<anon>",
+	      existed ? "yes" : "no");
+
+  // We update the cursor
+  update_cursor (it->second);
+}
+
+template <Namespace N>
+void
+ForeverStack<N>::pop ()
+{
+  assert (!cursor ().is_root ());
+
+  rust_debug ("popping link");
+
+  for (const auto &kv : cursor ().rib.get_values ())
+    rust_debug ("current_rib: k: %s, v: %d", kv.first.c_str (), kv.second);
+
+  if (cursor ().parent.has_value ())
+    for (const auto &kv : cursor ().parent.value ().rib.get_values ())
+      rust_debug ("new cursor: k: %s, v: %d", kv.first.c_str (), kv.second);
+
+  update_cursor (cursor ().parent.value ());
+}
+
+template <Namespace N>
+tl::expected<NodeId, DuplicateNameError>
+ForeverStack<N>::insert (Identifier name, NodeId node)
+{
+  auto &innermost_rib = peek ();
+
+  // So what do we do here - if the Rib has already been pushed in an earlier
+  // pass, we might end up in a situation where it is okay to re-add new names.
+  // Do we just ignore that here? Do we keep track of if the Rib is new or not?
+  // should our cursor have info on the current node like "is it newly pushed"?
+  return innermost_rib.insert (name.as_string (), node);
+}
+
+template <Namespace N>
+Rib &
+ForeverStack<N>::peek ()
+{
+  return cursor ().rib;
+}
+
+template <Namespace N>
+const Rib &
+ForeverStack<N>::peek () const
+{
+  return cursor ().rib;
+}
+
+template <Namespace N>
+void
+ForeverStack<N>::reverse_iter (std::function<KeepGoing (Rib &)> lambda)
+{
+  auto &tmp = cursor ();
+
+  while (true)
+    {
+      auto keep_going = lambda (tmp);
+      if (keep_going == KeepGoing::No)
+	return;
+
+      if (tmp.is_root ())
+	return;
+
+      tmp = tmp.parent.value ();
+    }
+}
+
+template <Namespace N>
+typename ForeverStack<N>::Node &
+ForeverStack<N>::cursor ()
+{
+  return cursor_reference;
+}
+
+template <Namespace N>
+const typename ForeverStack<N>::Node &
+ForeverStack<N>::cursor () const
+{
+  return cursor_reference;
+}
+
+template <Namespace N>
+void
+ForeverStack<N>::update_cursor (Node &new_cursor)
+{
+  cursor_reference = new_cursor;
+}
+
+template <Namespace N>
+tl::optional<NodeId>
+ForeverStack<N>::get (const Identifier &name)
+{
+  return {};
+}
+
+// TODO: Are there different fetching behavior for different namespaces?
+// inline template <>
+// tl::optional<NodeId>
+// ForeverStack<Namespace::Values>::get (const Identifier &name)
+// {
+//   return {};
+// }
+
+template <Namespace N>
+void
+ForeverStack<N>::stream_rib (std::stringstream &stream, const Rib &rib,
+			     const std::string &next,
+			     const std::string &next_next)
+{
+  if (rib.get_values ().empty ())
+    {
+      stream << next << "rib: {},\n";
+      return;
+    }
+
+  stream << next << "rib: {\n";
+
+  for (const auto &kv : rib.get_values ())
+    stream << next_next << kv.first << ": " << kv.second << "\n";
+
+  stream << next << "},\n";
+}
+
+template <Namespace N>
+void
+ForeverStack<N>::stream_node (std::stringstream &stream, unsigned indentation,
+			      const ForeverStack<N>::Node &node)
+{
+  auto indent = std::string (indentation, ' ');
+  auto next = std::string (indentation + 4, ' ');
+  auto next_next = std::string (indentation + 8, ' ');
+
+  stream << indent << "Node {\n"
+	 << next << "is_root: " << (node.is_root () ? "true" : "false") << ",\n"
+	 << next << "is_leaf: " << (node.is_leaf () ? "true" : "false")
+	 << ",\n";
+
+  stream_rib (stream, node.rib, next, next_next);
+
+  stream << indent << "}\n";
+
+  for (auto &kv : node.children)
+    {
+      auto link = kv.first;
+      auto child = kv.second;
+      stream << indent << "Link (" << link.id << ", "
+	     << (link.path.has_value () ? link.path.value ().as_string ()
+					: "<anon>")
+	     << "):\n";
+
+      stream_node (stream, indentation + 4, child);
+
+      stream << '\n';
+    }
+}
+
+template <Namespace N>
+std::string
+ForeverStack<N>::as_debug_string ()
+{
+  std::stringstream stream;
+
+  stream_node (stream, 0, root);
+
+  return stream.str ();
+}
+
+// FIXME: Can we add selftests?
+
+} // namespace Resolver2_0
+} // namespace Rust