Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
TeamSPoon committed Nov 19, 2023
2 parents 8ba025f + fc37163 commit cef153c
Show file tree
Hide file tree
Showing 7 changed files with 981 additions and 333 deletions.
6 changes: 2 additions & 4 deletions c/src/metta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -822,22 +822,20 @@ pub extern "C" fn metta_new_with_space_environment_and_stdlib(space: *mut space_
/// @brief Creates a new core MeTTa Interpreter, with no loaded stdlib nor initialization
/// @ingroup interpreter_group
/// @param[in] space A pointer to a handle for the Space for use by the Interpreter
/// @param[in] tokenizer A pointer to a handle for the Tokenizer for use by the Interpreter
/// @param[in] environment An `env_builder_t` handle to configure the environment to use
/// @return A `metta_t` handle to the newly created Interpreter
/// @note The caller must take ownership responsibility for the returned `metta_t`, and free it with `metta_free()`
/// @note This function does not load any stdlib, nor does it run the `init.metta` file from the environment
///
#[no_mangle]
pub extern "C" fn metta_new_core(space: *mut space_t, tokenizer: *mut tokenizer_t, env_builder: env_builder_t) -> metta_t {
pub extern "C" fn metta_new_core(space: *mut space_t, env_builder: env_builder_t) -> metta_t {
let dyn_space = unsafe{ &*space }.borrow();
let tokenizer = unsafe{ &*tokenizer }.clone_handle();
let env_builder = if env_builder.is_default() {
None
} else {
Some(env_builder.into_inner())
};
let metta = Metta::new_core(dyn_space.clone(), tokenizer, env_builder);
let metta = Metta::new_core(dyn_space.clone(), env_builder);
metta.into()
}

Expand Down
745 changes: 539 additions & 206 deletions lib/src/metta/runner/mod.rs

Large diffs are not rendered by default.

274 changes: 274 additions & 0 deletions lib/src/metta/runner/modules/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@

use std::path::{Path, PathBuf};
use std::collections::HashMap;
use std::sync::Mutex;

use crate::*;
use crate::space::DynSpace;
use crate::metta::*;
use crate::metta::runner::{Metta, ModId, RunnerState};
use crate::metta::types::validate_atom;
use crate::metta::text::Tokenizer;
use crate::common::shared::Shared;

use regex::Regex;

#[cfg(not(feature = "minimal"))]
use super::interpreter::interpret;
#[cfg(not(feature = "minimal"))]
use super::stdlib::*;

#[cfg(feature = "minimal")]
use super::interpreter2::interpret;
#[cfg(feature = "minimal")]
use super::stdlib2::*;

/// A data structure that uniquely identifies an exact version of a module with a particular provenance
///
/// If two modules have the same ModuleDescriptor, they are considered to be the same module
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct ModuleDescriptor {
name: String,
path: Option<PathBuf>,
//TODO: version, checksum?
}

impl ModuleDescriptor {
/// Internal method to create a ModuleDescriptor for a runner's "top" module
pub(crate) fn top() -> Self {
Self::new("top".to_string(), None)
}
/// Create a new ModuleDescriptor
pub fn new(name: String, path: Option<&Path>) -> Self {
Self {
name,
path: path.map(|path| path.into())
}
}
}

/// Contains state associated with a loaded MeTTa module
#[derive(Debug)]
pub struct MettaMod {
descriptor: ModuleDescriptor,
working_dir: Option<PathBuf>,
space: DynSpace,
tokenizer: Shared<Tokenizer>,
deps: Mutex<HashMap<String, ModId>>,
}

impl Clone for MettaMod {
fn clone(&self) -> Self {
Self {
descriptor: self.descriptor.clone(),
space: self.space.clone(),
tokenizer: self.tokenizer.clone(),
working_dir: self.working_dir.clone(),
deps: Mutex::new(self.deps.lock().unwrap().clone())
}
}
}

impl MettaMod {

/// Internal method to initialize an empty MettaMod
// TODO: may be able to remove the metta argument when the dust settles
pub(crate) fn new_with_tokenizer(metta: &Metta, descriptor: ModuleDescriptor, space: DynSpace, tokenizer: Shared<Tokenizer>, working_dir: Option<PathBuf>) -> Self {
let new_mod = Self {
descriptor,
space,
tokenizer,
working_dir,
deps: Mutex::new(HashMap::new()),
};

// Load the base tokens for the module's new Tokenizer
register_runner_tokens(&mut *new_mod.tokenizer().borrow_mut(), new_mod.tokenizer().clone(), metta);
register_common_tokens(&mut *new_mod.tokenizer().borrow_mut(), new_mod.tokenizer().clone(), metta);

new_mod
}

/// Adds a loaded module as a dependency of the `&self` [MettaMod], and adds the dependent module's Space
/// as an atom in the Space of &self called "&name"
pub fn import_dependency_as(&self, metta: &Metta, mod_id: ModId, name: Option<String>) -> Result<(), String> {

// Get the space and name associated with the dependent module
let mut runner_state = RunnerState::new_with_module(metta, mod_id);
let (dep_space, name) = runner_state.run_in_context(|context| {
let dep_space = context.module().space().clone();
let name = match name {
Some(name) => name,
None => context.module().descriptor().name.clone()
};
Ok((dep_space, name))
})?;

// Add a new atom to the &self space, so we can access the dependent module
let new_token = format!("&{name}");
let dep_space_atom = Atom::gnd(dep_space);
self.add_atom(dep_space_atom.clone(), false).map_err(|a| a.to_string())?;
self.tokenizer.borrow_mut().register_token_with_regex_str(&new_token, move |_| { dep_space_atom.clone() });

// Include the dependent module's mod_id in our deps
let mut deps = self.deps.lock().unwrap();
deps.insert(name, mod_id);

Ok(())
}

/// Adds a specific atom and/or Tokenizer entry from a dependency module to the &self module
///
/// Behavior:
/// * If the `from_name` argument exactly matches a [Tokenizer] entry in the source module,
/// then that entry will be imported, and the `name` argument will be ignored
/// * If an exact [Tokenizer] entry was not found, this method will attempt to resolve `from_name`
/// into an atom, using the [Tokenizer] and [Space] associated with the dependent module, and
/// the resolved atom will be imported into the `&self` [Space]
/// * If `name` is provided, then if the resolved atom not a Symbol or if the resolved atom is a
/// symbol that doesn't perfectly match `name`, a new [Tokenizer] entry will be created to
/// access the atom
///
// QUESTION: This behavior of exactly matching a regex makes importing a Tokenizer pattern pretty
// unfriendly. Does it make sense to require Tokenizers entries to be associated with atoms, for
// example "Type Atoms"? For example, we could have an "Number" type that is tied to all the
// Tokenizer regex patters used to parse different types of numbers? Then a user could
// "!(import! Number from Arithmetic)" or whatever, and get all the Tokenizer patters that parse
// numbers?
//
// Another alternative behavior might be to allow any match for a Tokenizer entry to cause the
// import of that Tokenizer entry instead of a specific atom. For example, importing "3.14"
// would import the RegEx entry that parsed that first. However that strikes me as not great
// for 2 reasons. 1.) A string matched by a Tokenizer entry is definitely not the same thing as
// a Tokenizer entry, so it's a bad way to refer to one and 2.) multiple RegEx Tokenizer entries
// might be used to compose one conceptual type.
pub fn import_item_from_dependency_as(&self, metta: &Metta, from_name: &str, mod_id: ModId, name: Option<&str>) -> Result<(), String> {

// Get the space and tokenizer associated with the dependent module
let mut runner_state = RunnerState::new_with_module(metta, mod_id);
let (dep_space, dep_tokenizer, src_mod_name) = runner_state.run_in_context(|context| {
let dep_space = context.module().space().clone();
let dep_tokenizer = context.module().tokenizer().clone();
let src_mod_name = context.module().descriptor().name.clone();
Ok((dep_space, dep_tokenizer, src_mod_name))
})?;

//See if there is a match in the dependent module's Tokenizer
if let Some(found_constructor) = dep_tokenizer.borrow().find_exact(from_name) {

// If so, this method just transplants the Tokenizer entry
self.tokenizer.borrow_mut().register_token_with_func_ptr(Regex::new(from_name).unwrap(), found_constructor);
} else {
//Otherwise we will try and transplant an atom

// Get and reduce the atom we are importing, from the dependent module
let mut parser = SExprParser::new(from_name);
let import_sym = parser.parse(&dep_tokenizer.borrow())?.ok_or_else(|| format!("Import failed to resolve \"{from_name}\""))?;
if let Some(extra_atom) = parser.parse(&dep_tokenizer.borrow())? { return Err(format!("Extraneous token in import \"{extra_atom}\""));}
let src_atom_vec = interpret(dep_space, &import_sym)?;
match src_atom_vec.len() {
0 => return Err(format!("Failed to resolve import \"{from_name}\" in module \"{src_mod_name}\"")),
1 => {},
_ => return Err(format!("Ambiguous import \"{from_name}\" in module \"{src_mod_name}\"")),
}
let src_atom = src_atom_vec.into_iter().next().unwrap();

// Add the atom to our module's space
self.add_atom(src_atom.clone(), false).map_err(|a| a.to_string())?;

// Finally, Add a Tokenizer entry to access this atom, if one is needed
let name = match name {
Some(name) => name,
None => from_name
};
//We will add Tokenizer entries for all non-symbols, and symbol atoms that don't match name
let should_add_tok = match &src_atom {
Atom::Symbol(s) => s.name() != name,
_ => true,
};
if should_add_tok {
self.tokenizer.borrow_mut().register_token_with_regex_str(&name, move |_| { src_atom.clone() });
}
}

Ok(())
}

/// Adds all atom and tokens in a dependency module to the &self module
///
/// Currently this function effectively merges the spaces & tokenizers.
//
//QUESTION: How do we prevent duplication of transitive imports? For example, consider:
// ModA imports ModOne and ModTwo. ModOne imports ModB. ModTwo also imports ModB. How do we
// make sure ModA doesn't end up with duplicate definitions for everything in ModB?
//
//This is usually solved in other languages using some combination of strategies.
// 1.) Keep loaded modules contained within their own name-spaces. And allow aliasing between a parent's
// space and a child module's space. This is not really sufficient for us because we also want to
// import Tokenizer entries and their associated functionality, such as the ability to parse integers
// 2.) Allow a conditional "import-unique" brute-force import which is tantamount to inlining the dependency
// at the point of the import statement. e.g. `#include` in C., but with the "ifdef" guards to make sure
// the same header isn't inlined multiple times. We may want to offer this functionality, but I assume
// we'll want to offer this in addition to a more hygenic module system structure, given that most languages
// (Python, JavaScript, even Perl!) that started off with brute-force imports have retrofitted more
// sophisticated dependency management as they have matured
// 3.) Force a module to be explicit about what can be exported from the module. e.g. the `pub` visibility
// qualifiers in Rust, etc.
//
//Personally, I feel like 3. (being explicit about exported items) is the cleanest solution and provides
// the most flexibility into the future (such as the ability to unload modules, etc.)
//
pub fn import_all_from_dependency(&self, _metta: &Metta, _mod_id: ModId) -> Result<(), String> {
//TODO: Unimplemented until we decide what "import *" actually means
unimplemented!();
}

/// Merges all Tokenizer entries in a dependency module into &self
pub(crate) fn import_all_tokens_from_dependency(&self, metta: &Metta, mod_id: ModId) -> Result<(), String> {

// Get the tokenizer associated with the dependent module
let mut runner_state = RunnerState::new_with_module(metta, mod_id);
let dep_tokenizer = runner_state.run_in_context(|context| Ok(context.module().tokenizer().clone()))?;

//Import all the Tokenizer entries from the dependency
let mut dep_tok_clone = dep_tokenizer.borrow().clone();
self.tokenizer.borrow_mut().move_front(&mut dep_tok_clone);

Ok(())
}

/// Implements the prior import behavior of importing all atoms into a sub-space, loaded into the
/// &self Space, and merging all tokens
//TODO: This method is a stop-gap, until we figure out all the details of the import behavior we want
pub(crate) fn import_dependency_legacy(&self, metta: &Metta, mod_id: ModId) -> Result<(), String> {
self.import_dependency_as(metta, mod_id, None)?;
self.import_all_tokens_from_dependency(metta, mod_id)
}

pub fn descriptor(&self) -> &ModuleDescriptor {
&self.descriptor
}

pub fn space(&self) -> &DynSpace {
&self.space
}

pub fn tokenizer(&self) -> &Shared<Tokenizer> {
&self.tokenizer
}

pub fn working_dir(&self) -> Option<&Path> {
self.working_dir.as_ref().map(|p| p.as_ref())
}

/// A convenience to add an an atom to a module's Space, if it passes type-checking
pub(crate) fn add_atom(&self, atom: Atom, type_check: bool) -> Result<(), Atom> {
if type_check && !validate_atom(self.space.borrow().as_space(), &atom) {
return Err(Atom::expr([ERROR_SYMBOL, atom, BAD_TYPE_SYMBOL]));
}
self.space.borrow_mut().add(atom);
Ok(())
}

}

Loading

0 comments on commit cef153c

Please sign in to comment.