Skip to content

Commit

Permalink
More unit tests, more fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
dkhofer committed Aug 22, 2024
1 parent 91eb838 commit d07edda
Show file tree
Hide file tree
Showing 2 changed files with 304 additions and 9 deletions.
306 changes: 298 additions & 8 deletions src/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,7 @@ impl BlockGroup {

#[allow(clippy::ptr_arg)]
#[allow(clippy::too_many_arguments)]
#[allow(clippy::needless_late_init)]
pub fn new_insert_change(
conn: &mut Connection,
block_group_id: i32,
Expand All @@ -728,19 +729,26 @@ impl BlockGroup {
chromosome_index: i32,
phased: i32,
) {
// todo:
// cases to check:
// change that is the size of a block
// change that goes over multiple blocks
// change that hits just start/end boundary, e.g. block is 1,5 and change is 3,5 or 1,3.
// change that deletes block boundary

let tree = Path::intervaltree_for(conn, path);

let start_blocks: Vec<NewBlock> =
tree.query_point(start).map(|x| x.value.clone()).collect();
assert_eq!(start_blocks.len(), 1);
let start_block = &start_blocks[0];
// NOTE: This may not be used but needs to be initialized here instead of inside the if
// statement that uses it, so that the borrow checker is happy
let previous_start_blocks: Vec<NewBlock> = tree
.query_point(start - 1)
.map(|x| x.value.clone())
.collect();
assert_eq!(previous_start_blocks.len(), 1);
let start_block;
if start_blocks[0].path_start == start {
// First part of this block will be replaced/deleted, need to get previous block to add
// edge including it
start_block = &previous_start_blocks[0];
} else {
start_block = &start_blocks[0];
}

let end_blocks: Vec<NewBlock> = tree.query_point(end).map(|x| x.value.clone()).collect();
assert_eq!(end_blocks.len(), 1);
Expand Down Expand Up @@ -1279,4 +1287,286 @@ mod tests {
])
);
}

#[test]
fn insert_on_block_boundary_middle_new() {
let mut conn = get_connection();
let (block_group_id, path) = setup_multipath(&conn);
let insert_sequence_hash = Sequence::create(&conn, "DNA", "NNNN", true);
let sequences_by_hash =
Sequence::sequences_by_hash(&conn, vec![format!("\"{}\"", insert_sequence_hash)]);
let insert_sequence = sequences_by_hash.get(&insert_sequence_hash).unwrap();
let insert = NewBlock {
id: 0,
sequence: insert_sequence.clone(),
block_sequence: insert_sequence.sequence[0..4].to_string(),
sequence_start: 0,
sequence_end: 4,
path_start: 15,
path_end: 15,
strand: "+".to_string(),
};
BlockGroup::new_insert_change(&mut conn, block_group_id, &path, 15, 15, &insert, 1, 0);

let all_sequences = BlockGroup::new_get_all_sequences(&conn, block_group_id);
assert_eq!(
all_sequences,
HashSet::from_iter(vec![
"AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
"AAAAAAAAAATTTTTNNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
])
);
}

#[test]
fn insert_within_block_new() {
let mut conn = get_connection();
let (block_group_id, path) = setup_multipath(&conn);
let insert_sequence_hash = Sequence::create(&conn, "DNA", "NNNN", true);
let sequences_by_hash =
Sequence::sequences_by_hash(&conn, vec![format!("\"{}\"", insert_sequence_hash)]);
let insert_sequence = sequences_by_hash.get(&insert_sequence_hash).unwrap();
let insert = NewBlock {
id: 0,
sequence: insert_sequence.clone(),
block_sequence: insert_sequence.sequence[0..4].to_string(),
sequence_start: 0,
sequence_end: 4,
path_start: 12,
path_end: 17,
strand: "+".to_string(),
};
BlockGroup::new_insert_change(&mut conn, block_group_id, &path, 12, 17, &insert, 1, 0);

let all_sequences = BlockGroup::new_get_all_sequences(&conn, block_group_id);
assert_eq!(
all_sequences,
HashSet::from_iter(vec![
"AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
"AAAAAAAAAATTNNNNTTTCCCCCCCCCCGGGGGGGGGG".to_string()
])
);
}

#[test]
fn insert_on_block_boundary_start_new() {
let mut conn = get_connection();
let (block_group_id, path) = setup_multipath(&conn);
let insert_sequence_hash = Sequence::create(&conn, "DNA", "NNNN", true);
let sequences_by_hash =
Sequence::sequences_by_hash(&conn, vec![format!("\"{}\"", insert_sequence_hash)]);
let insert_sequence = sequences_by_hash.get(&insert_sequence_hash).unwrap();
let insert = NewBlock {
id: 0,
sequence: insert_sequence.clone(),
block_sequence: insert_sequence.sequence[0..4].to_string(),
sequence_start: 0,
sequence_end: 4,
path_start: 10,
path_end: 10,
strand: "+".to_string(),
};
BlockGroup::new_insert_change(&mut conn, block_group_id, &path, 10, 10, &insert, 1, 0);

let all_sequences = BlockGroup::new_get_all_sequences(&conn, block_group_id);
assert_eq!(
all_sequences,
HashSet::from_iter(vec![
"AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
"AAAAAAAAAANNNNTTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
])
);
}

#[test]
fn insert_on_block_boundary_end_new() {
let mut conn = get_connection();
let (block_group_id, path) = setup_multipath(&conn);
let insert_sequence_hash = Sequence::create(&conn, "DNA", "NNNN", true);
let sequences_by_hash =
Sequence::sequences_by_hash(&conn, vec![format!("\"{}\"", insert_sequence_hash)]);
let insert_sequence = sequences_by_hash.get(&insert_sequence_hash).unwrap();
let insert = NewBlock {
id: 0,
sequence: insert_sequence.clone(),
block_sequence: insert_sequence.sequence[0..4].to_string(),
sequence_start: 0,
sequence_end: 4,
path_start: 9,
path_end: 9,
strand: "+".to_string(),
};
BlockGroup::new_insert_change(&mut conn, block_group_id, &path, 9, 9, &insert, 1, 0);

let all_sequences = BlockGroup::new_get_all_sequences(&conn, block_group_id);
assert_eq!(
all_sequences,
HashSet::from_iter(vec![
"AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
"AAAAAAAAANNNNATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
])
);
}

#[test]
fn insert_across_entire_block_boundary_new() {
let mut conn = get_connection();
let (block_group_id, path) = setup_multipath(&conn);
let insert_sequence_hash = Sequence::create(&conn, "DNA", "NNNN", true);
let sequences_by_hash =
Sequence::sequences_by_hash(&conn, vec![format!("\"{}\"", insert_sequence_hash)]);
let insert_sequence = sequences_by_hash.get(&insert_sequence_hash).unwrap();
let insert = NewBlock {
id: 0,
sequence: insert_sequence.clone(),
block_sequence: insert_sequence.sequence[0..4].to_string(),
sequence_start: 0,
sequence_end: 4,
path_start: 10,
path_end: 20,
strand: "+".to_string(),
};
BlockGroup::new_insert_change(&mut conn, block_group_id, &path, 10, 20, &insert, 1, 0);

let all_sequences = BlockGroup::new_get_all_sequences(&conn, block_group_id);
assert_eq!(
all_sequences,
HashSet::from_iter(vec![
"AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
"AAAAAAAAAANNNNCCCCCCCCCCGGGGGGGGGG".to_string()
])
);
}

#[test]
fn insert_across_two_blocks_new() {
let mut conn = get_connection();
let (block_group_id, path) = setup_multipath(&conn);
let insert_sequence_hash = Sequence::create(&conn, "DNA", "NNNN", true);
let sequences_by_hash =
Sequence::sequences_by_hash(&conn, vec![format!("\"{}\"", insert_sequence_hash)]);
let insert_sequence = sequences_by_hash.get(&insert_sequence_hash).unwrap();
let insert = NewBlock {
id: 0,
sequence: insert_sequence.clone(),
block_sequence: insert_sequence.sequence[0..4].to_string(),
sequence_start: 0,
sequence_end: 4,
path_start: 15,
path_end: 25,
strand: "+".to_string(),
};
BlockGroup::new_insert_change(&mut conn, block_group_id, &path, 15, 25, &insert, 1, 0);

let all_sequences = BlockGroup::new_get_all_sequences(&conn, block_group_id);
assert_eq!(
all_sequences,
HashSet::from_iter(vec![
"AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
"AAAAAAAAAATTTTTNNNNCCCCCGGGGGGGGGG".to_string()
])
);
}

#[test]
fn insert_spanning_blocks_new() {
let mut conn = get_connection();
let (block_group_id, path) = setup_multipath(&conn);
let insert_sequence_hash = Sequence::create(&conn, "DNA", "NNNN", true);
let sequences_by_hash =
Sequence::sequences_by_hash(&conn, vec![format!("\"{}\"", insert_sequence_hash)]);
let insert_sequence = sequences_by_hash.get(&insert_sequence_hash).unwrap();
let insert = NewBlock {
id: 0,
sequence: insert_sequence.clone(),
block_sequence: insert_sequence.sequence[0..4].to_string(),
sequence_start: 0,
sequence_end: 4,
path_start: 5,
path_end: 35,
strand: "+".to_string(),
};
BlockGroup::new_insert_change(&mut conn, block_group_id, &path, 5, 35, &insert, 1, 0);

let all_sequences = BlockGroup::new_get_all_sequences(&conn, block_group_id);
assert_eq!(
all_sequences,
HashSet::from_iter(vec![
"AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
"AAAAANNNNGGGGG".to_string()
])
);
}

#[test]
fn simple_deletion_new() {
let mut conn = get_connection();
let (block_group_id, path) = setup_multipath(&conn);
let deletion_sequence_hash = Sequence::create(&conn, "DNA", "", true);
let sequences_by_hash =
Sequence::sequences_by_hash(&conn, vec![format!("\"{}\"", deletion_sequence_hash)]);
let deletion_sequence = sequences_by_hash.get(&deletion_sequence_hash).unwrap();
let deletion = NewBlock {
id: 0,
sequence: deletion_sequence.clone(),
block_sequence: deletion_sequence.sequence.clone(),
sequence_start: 0,
sequence_end: 0,
path_start: 19,
path_end: 31,
strand: "+".to_string(),
};

// take out an entire block.
BlockGroup::new_insert_change(&mut conn, block_group_id, &path, 19, 31, &deletion, 1, 0);
let all_sequences = BlockGroup::new_get_all_sequences(&conn, block_group_id);
assert_eq!(
all_sequences,
HashSet::from_iter(vec![
"AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
"AAAAAAAAAATTTTTTTTTGGGGGGGGG".to_string(),
])
)
}

#[test]
fn doesnt_apply_same_insert_twice_new() {
let mut conn = get_connection();
let (block_group_id, path) = setup_multipath(&conn);
let insert_sequence_hash = Sequence::create(&conn, "DNA", "NNNN", true);
let sequences_by_hash =
Sequence::sequences_by_hash(&conn, vec![format!("\"{}\"", insert_sequence_hash)]);
let insert_sequence = sequences_by_hash.get(&insert_sequence_hash).unwrap();
let insert = NewBlock {
id: 0,
sequence: insert_sequence.clone(),
block_sequence: insert_sequence.sequence[0..4].to_string(),
sequence_start: 0,
sequence_end: 4,
path_start: 7,
path_end: 15,
strand: "+".to_string(),
};
BlockGroup::new_insert_change(&mut conn, block_group_id, &path, 7, 15, &insert, 1, 0);

let all_sequences = BlockGroup::new_get_all_sequences(&conn, block_group_id);
assert_eq!(
all_sequences,
HashSet::from_iter(vec![
"AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
"AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
])
);

BlockGroup::new_insert_change(&mut conn, block_group_id, &path, 7, 15, &insert, 1, 0);

let all_sequences = BlockGroup::new_get_all_sequences(&conn, block_group_id);
assert_eq!(
all_sequences,
HashSet::from_iter(vec![
"AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
"AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
])
);
}
}
7 changes: 6 additions & 1 deletion src/models/new_edge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ impl NewEdge {
) -> NewEdge {
let query = "INSERT INTO new_edges (source_hash, source_coordinate, target_hash, target_coordinate, chromosome_index, phased) VALUES (?1, ?2, ?3, ?4, ?5, ?6) RETURNING *";
let id_query = "select id from new_edges where source_hash = ?1 and source_coordinate = ?2 and target_hash = ?3 and target_coordinate = ?4 and chromosome_index = ?5 and phased = ?6";
let mut placeholders: Vec<Value> = vec![
let placeholders: Vec<Value> = vec![
source_hash.clone().into(),
source_coordinate.into(),
target_hash.clone().into(),
Expand Down Expand Up @@ -170,6 +170,11 @@ impl NewEdge {
);
edge_rows_to_insert.push(edge_row);
}

if edge_rows_to_insert.is_empty() {
return existing_edge_ids;
}

let formatted_edge_rows_to_insert = edge_rows_to_insert.join(", ");

let insert_statement = format!("INSERT INTO new_edges (source_hash, source_coordinate, target_hash, target_coordinate, chromosome_index, phased) VALUES {0} RETURNING (id);", formatted_edge_rows_to_insert);
Expand Down

0 comments on commit d07edda

Please sign in to comment.