diff --git a/gcc/rust/resolve/rust-forever-stack.h b/gcc/rust/resolve/rust-forever-stack.h new file mode 100644 index 00000000000..ff4221bd2ba --- /dev/null +++ b/gcc/rust/resolve/rust-forever-stack.h @@ -0,0 +1,533 @@ +// Copyright (C) 2020-2023 Free Software Foundation, Inc. + +// This file is part of GCC. + +// GCC is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3, or (at your option) any later +// version. + +// GCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. + +// You should have received a copy of the GNU General Public License +// along with GCC; see the file COPYING3. If not see +// . + +#ifndef RUST_FOREVER_STACK_H +#define RUST_FOREVER_STACK_H + +#include "rust-system.h" +#include "rust-rib.h" +#include "rust-ast.h" +#include "rust-path.h" +#include "optional.h" +#include "expected.h" + +namespace Rust { +namespace Resolver2_0 { + +/** + +Let's look at our stack for resolving and traversing the following Rust code: + +```rust +mod foo { + mod bar { + fn outer() { + fn inner() {} + } + + fn another() {} + } +} +``` + +We start by creating the stack, which contains only one rib - the crate's. We +won't look in details on how different namespaces end up with different stacks, +and will only consider the "value" namespace for this example. Modules do not +get added to the value namespace, but functions do: + +```rust +let _ = foo; // foo is a module, invalid Rust code +let _ = outer; // outer is a function, ok! +``` + +So passing each module will create a new Rib, but not add that module's node to +the Rib. + +The current cursor of the stack will be denoted with `-->`: an arrow pointing to +the current rib. + +When we start the `TopLevel` pass on the crate we are compiling, we only see the +top rib, which is empty at first: + + ┌───────────────┐ + │ │ + --> │ │ + │ │ + └───────────────┘ + +We pass through our first module, and emplace another Rib: Another "scope" is +created, and it impacts name resolution rules. + + ┌───────────────┐ + │ │ + │ │ + │ │ + └───────┬───────┘ + │ + foo │ + │ + ▼ + ┌───────────────┐ + │ │ + --> │ │ + │ │ + └───────────────┘ + +Notice that we have moved the cursor to the newly-created Rib, and that we have +added a path between the two ribs - this is a `Link`. A link contains +information such as the scope's NodeId, as well as an optional path - present +only when the scope is named. This allows us to easily fetch AST nodes based on +their canonical path, or build a canonical path from a NodeId. It also makes it +really easy to do complex path name resolution, such as `super::super::`. +As mentioned earlier, modules are not present in the value namespace, so our new +rib is also empty. Let's pass through the second module: + + ┌───────────────┐ + │ │ + │ │ + │ │ + └───────┬───────┘ + │ + foo │ + │ + ▼ + ┌───────────────┐ + │ │ + │ │ + │ │ + └───────┬───────┘ + │ + bar │ + │ + ▼ + ┌───────────────┐ + │ │ + --> │ │ + │ │ + └───────────────┘ + +Once again, the new rib is empty, and we have a link with a path. We now go +through each item in the `bar` module and visit them. The first item is a +function, `outer` - upon being visited, it adds itself to the current rib. + + ┌───────────────┐ + │ │ + │ │ + │ │ + └───────┬───────┘ + │ + foo │ + │ + ▼ + ┌───────────────┐ + │ │ + │ │ + │ │ + └───────┬───────┘ + │ + bar │ + │ + ▼ + ┌───────────────┐ + │outer │ + --> │ │ + │ │ + └───────────────┘ + +We now visit `outer`'s definition. This creates a new Rib, as functions can have +arguments, whose declaration only lives for the function's scope. + + ┌───────────────┐ + │ │ + │ │ + │ │ + └───────┬───────┘ + │ + foo │ + │ + ▼ + ┌───────────────┐ + │ │ + │ │ + │ │ + └───────┬───────┘ + │ + bar │ + │ + ▼ + ┌───────────────┐ + │outer │ + │ │ + │ │ + └───────┬───────┘ + │ + │ + │ + ▼ + ┌───────────────┐ + │ │ + --> │ │ + │ │ + └───────────────┘ + +This rib is anonymous (the link to it does not have a path), because we cannot +refer to a function's inner items from the outside: + +```rust +pub mod a { + pub fn foo() {} +} + +pub fn b() { + pub fn foo() {} +} + +fn main() { + a::foo(); // ok + b::foo(); // ko! +} +``` + +We visit the function's block, which contain a single declaration, a function +named `inner`. It adds itself to the current rib. + + ┌───────────────┐ + │ │ + │ │ + │ │ + └───────┬───────┘ + │ + foo │ + │ + ▼ + ┌───────────────┐ + │ │ + │ │ + │ │ + └───────┬───────┘ + │ + bar │ + │ + ▼ + ┌───────────────┐ + │outer │ + │ │ + │ │ + └───────┬───────┘ + │ + │ + │ + ▼ + ┌───────────────┐ + │inner │ + --> │ │ + │ │ + └───────────────┘ + +We visit `inner`, which yields a rib but no other declaration. + + ┌───────────────┐ + │ │ + │ │ + │ │ + └───────┬───────┘ + │ + foo │ + │ + ▼ + ┌───────────────┐ + │ │ + │ │ + │ │ + └───────┬───────┘ + │ + bar │ + │ + ▼ + ┌───────────────┐ + │outer │ + │ │ + │ │ + └───────┬───────┘ + │ + │ + │ + ▼ + ┌───────────────┐ + │inner │ + │ │ + │ │ + └───────────────┘ + │ + │ + │ + ▼ + ┌───────────────┐ + │ │ + --> │ │ + │ │ + └───────────────┘ + +We are now at the end of the `inner` function, and we want to pop the current +scope. Instead of deleting the current rib, we simply move the cursor backwards. +This allows us to keep track of the existing information and access it in later +name resolution passes. We then finish visiting `outer`, then go back to our +`bar` module. This is what our stack looks like after this. Note how the only +difference is the cursor's location. + + ┌───────────────┐ + │ │ + │ │ + │ │ + └───────┬───────┘ + │ + foo │ + │ + ▼ + ┌───────────────┐ + │ │ + │ │ + │ │ + └───────┬───────┘ + │ + bar │ + │ + ▼ + ┌───────────────┐ + │outer │ + --> │ │ + │ │ + └───────┬───────┘ + │ + │ + │ + ▼ + ┌───────────────┐ + │inner │ + │ │ + │ │ + └───────────────┘ + │ + │ + │ + ▼ + ┌───────────────┐ + │ │ + │ │ + │ │ + └───────────────┘ + +We then visit the remaining `bar` items, which are composed of the `another` +function. It adds itself to the current rib. This function contains no +declarations, but it still creates a Rib upon being visited. We then finish our +visit of `bar`, which marks the end of our visit of `foo`, which marks the end +of our `TopLevel` name resolution pass. + + ┌───────────────┐ + │ │ + --> │ │ + │ │ + └───────┬───────┘ + │ + foo │ + │ + ▼ + ┌───────────────┐ + │ │ + │ │ + │ │ + └───────┬───────┘ + │ + bar │ + │ + ▼ + ┌───────────────┐ + │outer │ + │another │ + │ │ + └───────┬──┬────┘ + │ │ + │ └────────────────────┐ + │ │ + ▼ ▼ + ┌───────────────┐ ┌───────────────┐ + │inner │ │ │ + │ │ │ │ + │ │ │ │ + └───────┬───────┘ └───────────────┘ + │ + │ + │ + ▼ + ┌───────────────┐ + │ │ + │ │ + │ │ + └───────────────┘ + +We now have a stack with a lot of ribs, prime for the `Early` and `Late` name +resolution passes. We will revisit the ribs we created in these passes, and we +won't need to allocate or create new ones: because they will still be present in +the stack, we will simply move our cursor to these ribs. In this case, there is +nothing to do, since there are no uses of our definitions, as the Rust code we +are name-resolving is not really interesting. You'll also note that our +`TopLevel` pass did not resolve a whole lot: all it did was create new ribs, and +empty ones at that. The `Early` pass will not go further, since our code does +not contain any imports, macro definitions or macro invocations. You can look at +this pass's documentation for more details on this resolution process. + +**/ +template class ForeverStack +{ +public: + ForeverStack () + : root (Node (Rib (Rib::Kind::Normal))), cursor_reference (root) + { + rust_assert (root.is_root ()); + rust_assert (root.is_leaf ()); + } + + /** + * Add a new Rib to the stack. If the Rib already exists, nothing is pushed + * and the stack's cursor is simply moved to this existing Rib. + * + * @param rib The Rib to push + * @param id The NodeId of the node for which the Rib was created. For + * example, if a Rib is created because a lexical scope is entered, + * then `id` is that `BlockExpr`'s NodeId. + * @param path An optional path if the Rib was created due to a "named" + * lexical scope, like a module's. + */ + void push (Rib rib, NodeId id, tl::optional path = {}); + + /** + * Pop the innermost Rib from the stack + */ + void pop (); + + /** + * Insert a new definition in the innermost `Rib` in this stack + * + * @param name The name of the definition + * @param id Its NodeId + * + * @return `DuplicateNameError` if that node was already present in the Rib, + * the node's `NodeId` otherwise. + * + * @aborts if there are no `Rib`s inserted in the current map, this function + * aborts the program. + */ + tl::expected insert (Identifier name, NodeId id); + + /* Access the innermost `Rib` in this map */ + Rib &peek (); + const Rib &peek () const; + + /** + * Reverse iter on all ribs from the innermost one to the outermost one, + * trying to find a name. This is the default algorithm. + * This function gets specialized based on the Rib::Kind + * this way, we ensure a proper resolution algorithm at the type level + * + * @param name Name of the identifier to locate in this scope or an outermore + * scope + * + * @return a valid option with the NodeId if the identifier is present in the + * current map, an empty one otherwise. + */ + inline tl::optional get (const Identifier &name); + + // TODO: Should we dump to a file instead, like the other dumps? Or just print + // the string to a file in the SessionManager? + std::string as_debug_string (); + +private: + /** + * A link between two Nodes in our trie data structure. This class represents + * the edges of the graph + */ + class Link + { + public: + Link (NodeId id, tl::optional path) : id (id), path (path) {} + + bool compare (const Link &other) const { return id < other.id; } + + NodeId id; + tl::optional path; + }; + + /* Link comparison class, which we use in a Node's `children` map */ + class LinkCmp + { + public: + bool operator() (const Link &lhs, const Link &rhs) const + { + return lhs.compare (rhs); + } + }; + + class Node + { + public: + Node (Rib rib) : rib (rib) {} + Node (Rib rib, Node &parent) : rib (rib), parent (parent) {} + + bool is_root () const; + bool is_leaf () const; + + void insert_child (Link link, Node child); + + Rib rib; // this is the "value" of the node - the data it keeps. + std::map children; // all the other nodes it links to + + tl::optional parent; // `None` only if the node is a root + }; + + /* Should we keep going upon seeing a Rib? */ + enum class KeepGoing + { + Yes, + No, + }; + + /* Add a new Rib to the stack. This is an internal method */ + void push_inner (Rib rib, Link link); + + /* Reverse iterate on all Ribs from the current one, in an outwards fashion */ + void reverse_iter (std::function lambda); + + Node &cursor (); + const Node &cursor () const; + void update_cursor (Node &new_cursor); + + Node root; + std::reference_wrapper cursor_reference; + + void stream_rib (std::stringstream &stream, const Rib &rib, + const std::string &next, const std::string &next_next); + void stream_node (std::stringstream &stream, unsigned indentation, + const Node &node); +}; + +} // namespace Resolver2_0 +} // namespace Rust + +#include "rust-forever-stack.hxx" + +#endif // !RUST_FOREVER_STACK_H diff --git a/gcc/rust/resolve/rust-forever-stack.hxx b/gcc/rust/resolve/rust-forever-stack.hxx new file mode 100644 index 00000000000..2685f1d108e --- /dev/null +++ b/gcc/rust/resolve/rust-forever-stack.hxx @@ -0,0 +1,249 @@ +// Copyright (C) 2020-2023 Free Software Foundation, Inc. + +// This file is part of GCC. + +// GCC is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3, or (at your option) any later +// version. + +// GCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. + +// You should have received a copy of the GNU General Public License +// along with GCC; see the file COPYING3. If not see +// . + +#include "rust-forever-stack.h" +#include "rust-rib.h" +#include "optional.h" + +namespace Rust { +namespace Resolver2_0 { + +template +bool +ForeverStack::Node::is_root () const +{ + return !parent.has_value (); +} + +template +bool +ForeverStack::Node::is_leaf () const +{ + return children.empty (); +} + +template +void +ForeverStack::Node::insert_child (Link link, Node child) +{ + auto res = children.insert ({link, child}); + + // Do we want to error if the child already exists? Probably not, right? + // That's kinda the point, isn't it. So this method always succeeds, right? +} + +// FIXME: Add correct implementation +template +void +ForeverStack::push (Rib rib, NodeId id, tl::optional path) +{ + push_inner (rib, Link (id, path)); +} + +template +void +ForeverStack::push_inner (Rib rib, Link link) +{ + // If the link does not exist, we create it and emplace a new `Node` with the + // current node as its parent. `unordered_map::emplace` returns a pair with + // the iterator and a boolean. If the value already exists, the iterator + // points to it. Otherwise, it points to the newly emplaced value, so we can + // just update our cursor(). + auto emplace + = cursor ().children.emplace (std::make_pair (link, Node (rib, cursor ()))); + + auto it = emplace.first; + auto existed = !emplace.second; + + rust_debug ("inserting link: Link(%d [%s]): existed? %s", link.id, + link.path.has_value () ? link.path.value ().as_string ().c_str () + : "", + existed ? "yes" : "no"); + + // We update the cursor + update_cursor (it->second); +} + +template +void +ForeverStack::pop () +{ + assert (!cursor ().is_root ()); + + rust_debug ("popping link"); + + for (const auto &kv : cursor ().rib.get_values ()) + rust_debug ("current_rib: k: %s, v: %d", kv.first.c_str (), kv.second); + + if (cursor ().parent.has_value ()) + for (const auto &kv : cursor ().parent.value ().rib.get_values ()) + rust_debug ("new cursor: k: %s, v: %d", kv.first.c_str (), kv.second); + + update_cursor (cursor ().parent.value ()); +} + +template +tl::expected +ForeverStack::insert (Identifier name, NodeId node) +{ + auto &innermost_rib = peek (); + + // So what do we do here - if the Rib has already been pushed in an earlier + // pass, we might end up in a situation where it is okay to re-add new names. + // Do we just ignore that here? Do we keep track of if the Rib is new or not? + // should our cursor have info on the current node like "is it newly pushed"? + return innermost_rib.insert (name.as_string (), node); +} + +template +Rib & +ForeverStack::peek () +{ + return cursor ().rib; +} + +template +const Rib & +ForeverStack::peek () const +{ + return cursor ().rib; +} + +template +void +ForeverStack::reverse_iter (std::function lambda) +{ + auto &tmp = cursor (); + + while (true) + { + auto keep_going = lambda (tmp); + if (keep_going == KeepGoing::No) + return; + + if (tmp.is_root ()) + return; + + tmp = tmp.parent.value (); + } +} + +template +typename ForeverStack::Node & +ForeverStack::cursor () +{ + return cursor_reference; +} + +template +const typename ForeverStack::Node & +ForeverStack::cursor () const +{ + return cursor_reference; +} + +template +void +ForeverStack::update_cursor (Node &new_cursor) +{ + cursor_reference = new_cursor; +} + +template +tl::optional +ForeverStack::get (const Identifier &name) +{ + return {}; +} + +// TODO: Are there different fetching behavior for different namespaces? +// inline template <> +// tl::optional +// ForeverStack::get (const Identifier &name) +// { +// return {}; +// } + +template +void +ForeverStack::stream_rib (std::stringstream &stream, const Rib &rib, + const std::string &next, + const std::string &next_next) +{ + if (rib.get_values ().empty ()) + { + stream << next << "rib: {},\n"; + return; + } + + stream << next << "rib: {\n"; + + for (const auto &kv : rib.get_values ()) + stream << next_next << kv.first << ": " << kv.second << "\n"; + + stream << next << "},\n"; +} + +template +void +ForeverStack::stream_node (std::stringstream &stream, unsigned indentation, + const ForeverStack::Node &node) +{ + auto indent = std::string (indentation, ' '); + auto next = std::string (indentation + 4, ' '); + auto next_next = std::string (indentation + 8, ' '); + + stream << indent << "Node {\n" + << next << "is_root: " << (node.is_root () ? "true" : "false") << ",\n" + << next << "is_leaf: " << (node.is_leaf () ? "true" : "false") + << ",\n"; + + stream_rib (stream, node.rib, next, next_next); + + stream << indent << "}\n"; + + for (auto &kv : node.children) + { + auto link = kv.first; + auto child = kv.second; + stream << indent << "Link (" << link.id << ", " + << (link.path.has_value () ? link.path.value ().as_string () + : "") + << "):\n"; + + stream_node (stream, indentation + 4, child); + + stream << '\n'; + } +} + +template +std::string +ForeverStack::as_debug_string () +{ + std::stringstream stream; + + stream_node (stream, 0, root); + + return stream.str (); +} + +// FIXME: Can we add selftests? + +} // namespace Resolver2_0 +} // namespace Rust