Skip to content

Commit

Permalink
Cleaned up old refactor, fixed a couple regressions, all tests pass!
Browse files Browse the repository at this point in the history
  • Loading branch information
Evan Ovadia committed May 23, 2024
1 parent dd4f860 commit f40d736
Show file tree
Hide file tree
Showing 4 changed files with 438 additions and 276 deletions.
230 changes: 137 additions & 93 deletions ValeRuster/src/indexer.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use std::collections::{HashMap, HashSet};
use rustdoc_types::{Crate, Enum, Item, ItemEnum, Primitive, Struct};
use crate::{GenealogyKey, resolve_id, ResolveError, UId};
use rustdoc_types::{Crate, Enum, Id, Item, ItemEnum, Primitive, Struct};
use crate::{GenealogyKey, item_has_name, resolve_id, ResolveError, UId};
use crate::GenealogyKey::Normal;
use crate::resolve_id::{get_expanded_direct_child_uids, get_unexpanded_direct_child_uids, lookup_uid, resolve_uid};
use crate::resolve_id::{collapse_children, get_expanded_direct_child_uids, get_unexpanded_direct_child_uids_exclude_impl_children, include_impls_children, lookup_uid, resolve_uid};
use crate::ResolveError::ResolveFatal;

pub struct ItemIndex {
Expand Down Expand Up @@ -52,8 +52,11 @@ pub fn genealogize(
let child_uids =
match &item.inner {
ItemEnum::Module(_) => {
let direct_child_uids =
get_unexpanded_direct_child_uids(crates, &primitive_name_to_uid, &self_uid)?;
let direct_child_uids_without_methods =
get_unexpanded_direct_child_uids_exclude_impl_children(crates, &primitive_name_to_uid, &self_uid)?;
let direct_child_keys =
include_impls_children(crates, &primitive_name_to_uid, direct_child_uids_without_methods)?;
let direct_child_uids = collapse_children(&direct_child_keys);
direct_child_uids
.into_iter()
.map(|x| GenealogyKey::Normal(x.clone()))
Expand All @@ -63,19 +66,24 @@ pub fn genealogize(
ItemEnum::Struct(Struct { impls: impl_ids, .. }) |
ItemEnum::Enum(Enum { impls: impl_ids, .. }) => {
let mut result = Vec::new();
for impl_uid in get_unexpanded_direct_child_uids(crates, &primitive_name_to_uid, &self_uid)? {
result.push(GenealogyKey::ImplOrMethod { struct_uid: self_uid.clone(), child_uid: impl_uid.clone() });
}
// Now add all the impls' children.
for impl_id in impl_ids {
let impl_uid = UId { crate_name: crate_name.clone(), id: impl_id.clone() };
for method_uid in get_unexpanded_direct_child_uids(crates, &primitive_name_to_uid, &impl_uid)? {
result.push(GenealogyKey::ImplOrMethod { struct_uid: self_uid.clone(), child_uid: method_uid.clone() });
}
let direct_child_uids_without_methods =
get_unexpanded_direct_child_uids_exclude_impl_children(crates, &primitive_name_to_uid, &self_uid)?;
let direct_child_keys =
include_impls_children(crates, &primitive_name_to_uid, direct_child_uids_without_methods)?;
let direct_child_uids = collapse_children(&direct_child_keys);
for child_id in direct_child_uids {
result.push(GenealogyKey::ImplOrMethod { struct_uid: self_uid.clone(), child_uid: child_id.clone() });
}
// // Now add all the impls' children.
// for impl_id in impl_ids {
// let impl_uid = UId { crate_name: crate_name.clone(), id: impl_id.clone() };
// for method_uid in get_unexpanded_direct_child_uids(crates, &primitive_name_to_uid, &impl_uid, true)? {
// result.push(GenealogyKey::ImplOrMethod { struct_uid: self_uid.clone(), child_uid: method_uid.clone() });
// }
// }
result
},
ItemEnum::Import(import) => {
ItemEnum::Import(_import) => {
continue;
}
_ => continue
Expand Down Expand Up @@ -103,9 +111,11 @@ pub fn genealogize(
UId { crate_name: crate_name.to_string(), id: self_id.clone() };
match &item.inner {
ItemEnum::Module(_) => {
let direct_child_uids =
get_unexpanded_direct_child_uids(crates, &primitive_name_to_uid, &self_uid)?;

let direct_child_uids_without_methods =
get_unexpanded_direct_child_uids_exclude_impl_children(crates, &primitive_name_to_uid, &self_uid)?;
let direct_child_keys =
include_impls_children(crates, &primitive_name_to_uid, direct_child_uids_without_methods)?;
let direct_child_uids = collapse_children(&direct_child_keys);
for direct_child_uid in &direct_child_uids {
let direct_child_item = lookup_uid(crates, &direct_child_uid);
match &direct_child_item.inner {
Expand All @@ -131,7 +141,7 @@ pub fn genealogize(
Err(ResolveFatal(e)) => return Err(e)
};
match get_expanded_direct_child_uids(
crates, &primitive_name_to_uid, &target_module_uid) {
crates, &primitive_name_to_uid, &target_module_uid, true) {
Ok(x) => {
x
},
Expand All @@ -145,8 +155,8 @@ pub fn genealogize(
if !importee_uid_to_imports.contains_key(&importee_uid) {
importee_uid_to_imports.insert(importee_uid.clone(), HashSet::new());
}
eprintln!("Noting importee {:?} imported by import {:?}", importee_uid, self_uid.clone());
importee_uid_to_imports.get_mut(&importee_uid).unwrap().insert(self_uid.clone());
// println!("Noting importee {:?} imported by {:?}", importee_uid, direct_child_uid.clone());
importee_uid_to_imports.get_mut(&importee_uid).unwrap().insert(direct_child_uid.clone());
}
}
_ => {}
Expand Down Expand Up @@ -239,14 +249,15 @@ fn search_owner_paths(
return true;
}
}
if let Some(importer_uids) = importee_uid_to_imports.get(&this_uid) {
if let Some(import_uids) = importee_uid_to_imports.get(&this_uid) {
// Nothing, we've hit a dead end.
// This can happen for example in the regex crate, to the crate::string module which nobody
// imports or ever mentions.
// Instead, the root module imports crate::string's children directly.

let mut found = false;
for importer_uid in importer_uids {
for import_uid in import_uids {
let importer_uid = child_key_to_parent_uid.get(&Normal(import_uid.clone())).unwrap();
let mut new_path = path_so_far_from_child.clone();
new_path.push(importer_uid.clone());
search_owner_paths(results, crates, child_key_to_parent_uid, importee_uid_to_imports, new_path);
Expand Down Expand Up @@ -283,11 +294,11 @@ fn infer_missing_owners(

// Sanity check:
for (crate_name, crate_) in crates {
for (item_id, item) in &crate_.index {
for (item_id, _item) in &crate_.index {
let item_uid =
UId { crate_name: crate_name.to_string(), id: item_id.clone() };
if item_uid.id.0 == "0:462:2678" {
println!("lol");

}
let item = crate_.index.get(&item_uid.id).unwrap();
match &item.inner {
Expand All @@ -298,58 +309,87 @@ fn infer_missing_owners(
let parent_uid =
match determine_ultimate_owner(crates, child_key_to_parent_uid, importee_uid_to_imports, &item_uid) {
None => {
eprintln!("No owners or imports for {:?}", item_uid);
println!("No owners or imports for {:?}", item_uid);
continue
},
Some(value) => value,
};
// let import_key = Normal(parent_uid);
eprintln!("Noting new owner for {:?}, import {:?}", item_uid.clone(), parent_uid);
// println!("{:?} Noting new owner, module {:?} parent import {:?}", result.len(), item_uid.clone(), parent_uid);
assert!(!result.contains_key(&Normal(item_uid.clone())));
result.insert(Normal(item_uid), parent_uid.clone());
}
ItemEnum::Primitive(Primitive { impls: impl_ids, .. }) |
ItemEnum::Struct(Struct { impls: impl_ids, .. }) |
ItemEnum::Enum(Enum { impls: impl_ids, .. }) => {
if item_has_name(&item, "Chars") {

}
let parent_uid =
match determine_ultimate_owner(crates, child_key_to_parent_uid, importee_uid_to_imports, &item_uid) {
None => {
eprintln!("No owners or imports for {:?}", item_uid);
println!("No owners or imports for {:?}", item_uid);
continue
},
Some(value) => value,
};
eprintln!("Noting new owner for {:?}, import {:?}", item_uid.clone(), parent_uid);
// println!("{:?} Noting new owner for {:?}, parent {:?}", result.len(), item_uid.clone(), parent_uid);
assert!(!result.contains_key(&Normal(item_uid.clone())));
result.insert(Normal(item_uid.clone()), parent_uid.clone());

let direct_child_uids_without_methods =
get_unexpanded_direct_child_uids_exclude_impl_children(
crates, &primitive_name_to_uid, &item_uid)?;
let direct_child_keys =
include_impls_children(
crates, &primitive_name_to_uid, direct_child_uids_without_methods)?;
println!("Direct child keys: {:?}", &direct_child_keys);
let direct_child_uids = collapse_children(&direct_child_keys);
println!("Direct child uids: {:?}", &direct_child_uids);
// Now look for all their methods.
let mut method_keys = Vec::new();
for impl_uid in get_unexpanded_direct_child_uids(crates, &primitive_name_to_uid, &item_uid)? {
method_keys.push(GenealogyKey::ImplOrMethod { struct_uid: item_uid.clone(), child_uid: impl_uid.clone() });
}
// Now add all the impls' children.
for impl_id in impl_ids {
let impl_uid = UId { crate_name: crate_name.clone(), id: impl_id.clone() };
for method_uid in get_unexpanded_direct_child_uids(crates, &primitive_name_to_uid, &impl_uid)? {
method_keys.push(GenealogyKey::ImplOrMethod { struct_uid: item_uid.clone(), child_uid: method_uid.clone() });
}
for direct_child_uid in direct_child_uids {
// println!("Pushing A {:?}", GenealogyKey::ImplOrMethod { struct_uid: item_uid.clone(), child_uid: direct_child_uid.clone() });
method_keys.push(GenealogyKey::ImplOrMethod { struct_uid: item_uid.clone(), child_uid: direct_child_uid.clone() });
}
// // Now add all the impls' children.
// for impl_id in impl_ids {
// let impl_uid = UId { crate_name: crate_name.clone(), id: impl_id.clone() };
// for method_uid in get_unexpanded_direct_child_uids(crates, &primitive_name_to_uid, &impl_uid, true)? {
// println!("Pushing B {:?}", GenealogyKey::ImplOrMethod { struct_uid: item_uid.clone(), child_uid: method_uid.clone() });
// method_keys.push(GenealogyKey::ImplOrMethod { struct_uid: item_uid.clone(), child_uid: method_uid.clone() });
// }
// }
for method_key in method_keys {
match &method_key {
GenealogyKey::ImplOrMethod {
struct_uid: UId { crate_name: _, id: Id(x) },
child_uid: UId { crate_name: _, id: Id(y) },
} if x == "0:3816:3157" && y == "1:3721:1799" => {

}
_ => {}
}

// println!("{:?} Noting impl-method {:?} has owner {:?}", result.len(), &method_key, &item_uid);
assert!(!result.contains_key(&method_key));
result.insert(method_key, item_uid.clone());
}
}
ItemEnum::Function(func) => {
ItemEnum::Function(_func) => {
if child_key_to_parent_uid.contains_key(&Normal(item_uid.clone())) {
// Then it's a free function.
let parent_uid =
match determine_ultimate_owner(crates, child_key_to_parent_uid, importee_uid_to_imports, &item_uid) {
None => {
eprintln!("No owners or imports for {:?}", &item_uid);
println!("No owners or imports for {:?}", &item_uid);
continue
},
Some(value) => value,
};
// let import_key = Normal(parent_uid);
eprintln!("Noting new owner for {:?}, import {:?}", item_uid.clone(), parent_uid);
// println!("{:?} Noting new owner for free function {:?}, import {:?}", result.len(), item_uid.clone(), parent_uid);
assert!(!result.contains_key(&Normal(item_uid.clone())));
result.insert(Normal(item_uid), parent_uid.clone());
} else {
// It's a method, skip it. We'll get it in the struct|trait|enum case.
Expand All @@ -361,56 +401,60 @@ fn infer_missing_owners(
}
}

// Sanity check:
for (crate_name, crate_) in crates {
for (item_id, item) in &crate_.index {
let item_uid =
UId { crate_name: crate_name.to_string(), id: item_id.clone() };
let item = crate_.index.get(&item_uid.id).unwrap();
match item.inner {
ItemEnum::Module(_) | ItemEnum::Struct(_) | ItemEnum::Enum(_) | ItemEnum::Primitive(_) => {
let item_key = Normal(item_uid.clone());
if !child_key_to_parent_uid.contains_key(&item_key) {
if let Some(unnarrowed_imports) = importee_uid_to_imports.get(item_key.uid()) {
let imports: Vec<UId> =
if unnarrowed_imports.len() == 0 {
eprintln!("No owners or imports for {:?}", item_key);
continue;
} else if unnarrowed_imports.len() > 1 {
// Narrow it down by the smallest path. For example, LineWriter
// is imported in two places:
// "library/std/src/io/buffered/mod.rs",
// "library/std/src/io/mod.rs",
// so we'll go with the second one.
// TODO: use a better resolution approach
let mut bork: Vec<(UId, &Item)> =
unnarrowed_imports.iter()
.map(|id| (id.clone(), lookup_uid(crates, id)))
.collect::<Vec<_>>();
bork.sort_by_key(|(uid, item)| item.span.as_ref().unwrap().filename.to_str().as_ref().unwrap().len());
eprintln!("Heuristic, estimating owner for {:?} is {:?}", item_key, bork.iter().next().unwrap().clone());
bork.into_iter().map(|x| x.0).collect()
} else {
unnarrowed_imports.iter().map(|x| x.clone()).collect()
};
let import_key = Normal(imports.iter().next().unwrap().clone());
if let Some(import_parent_id) = child_key_to_parent_uid.get(&import_key) {
eprintln!("Noting new owner for {:?}, import {:?}", item_key, import_key.uid());
result.insert(item_key, import_parent_id.clone());
} else {
eprintln!("New owner for {:?}, import {:?} has no owner itself!", item_key, import_key.uid());
continue;
}
} else {
eprintln!("Orphan module: {:?}", item.name);
}
}
}
// ItemEnum::Function(_) => {}
_ => {}
}
}
}
// for (crate_name, crate_) in crates {
// for (item_id, _item) in &crate_.index {
// let item_uid =
// UId { crate_name: crate_name.to_string(), id: item_id.clone() };
// let item = crate_.index.get(&item_uid.id).unwrap();
// match item.inner {
// ItemEnum::Module(_) | ItemEnum::Struct(_) | ItemEnum::Enum(_) | ItemEnum::Primitive(_) => {
// let item_key = Normal(item_uid.clone());
// if !result.contains_key(&item_key) {
// if let Some(unnarrowed_imports) = importee_uid_to_imports.get(item_key.uid()) {
// let imports: Vec<UId> =
// if unnarrowed_imports.len() == 0 {
// println!("No owners or imports for {:?}", item_key);
// continue;
// } else if unnarrowed_imports.len() > 1 {
// // Narrow it down by the smallest path. For example, LineWriter
// // is imported in two places:
// // "library/std/src/io/buffered/mod.rs",
// // "library/std/src/io/mod.rs",
// // so we'll go with the second one.
// // TODO: use a better resolution approach
// let mut bork: Vec<(UId, &Item)> =
// unnarrowed_imports.iter()
// .map(|id| (id.clone(), lookup_uid(crates, id)))
// .collect::<Vec<_>>();
// bork.sort_by_key(|(_uid, item)| item.span.as_ref().unwrap().filename.to_str().as_ref().unwrap().len());
// println!("Heuristic, estimating owner for {:?} is {:?}", item_key, bork.iter().next().unwrap().clone());
// bork.into_iter().map(|x| x.0).collect()
// } else {
// unnarrowed_imports.iter().map(|x| x.clone()).collect()
// };
// let import_key = Normal(imports.iter().next().unwrap().clone());
// if let Some(import_parent_id) = child_key_to_parent_uid.get(&import_key) {
// println!("{:?} Noting new owner for {:?}, import {:?}'s parent {:?}", result.len(), item_key, import_key.uid(), &import_parent_id);
// match result.get(&item_key) {
// None => {}
// Some(existing_id) => panic!("Already existing owner: {:?}", existing_id)
// }
// assert!(!result.contains_key(&item_key));
// result.insert(item_key, import_parent_id.clone());
// } else {
// println!("New owner for {:?}, import {:?} has no owner itself!", item_key, import_key.uid());
// continue;
// }
// } else {
// println!("Orphan module: {:?}", item.name);
// }
// }
// }
// // ItemEnum::Function(_) => {}
// _ => {}
// }
// }
// }

Ok(result)
}
Expand Down Expand Up @@ -453,8 +497,8 @@ fn determine_ultimate_owner(
unnarrowed_parent_ids.iter()
.map(|id| (id.clone(), lookup_uid(crates, id)))
.collect::<Vec<(UId, &Item)>>();
parent_uids_and_items.sort_by_key(|(uid, item)| item.span.as_ref().unwrap().filename.to_str().as_ref().unwrap().len());
eprintln!("Heuristic, estimating owner for {:?} is {:?}", item_uid, parent_uids_and_items.iter().next().unwrap().clone());
parent_uids_and_items.sort_by_key(|(_uid, item)| item.span.as_ref().unwrap().filename.to_str().as_ref().unwrap().len());
println!("Heuristic, estimating owner for {:?} is {:?}", item_uid, parent_uids_and_items.iter().next().unwrap().clone());
parent_uids_and_items
.into_iter()
.map(|x| x.0)
Expand Down
Loading

0 comments on commit f40d736

Please sign in to comment.