Skip to content

Commit

Permalink
Merge pull request #360 from neuralaudio/embedding-fix
Browse files Browse the repository at this point in the history
Bug fix: Race condition
  • Loading branch information
turian authored Mar 23, 2022
2 parents ad69298 + 513acb6 commit f2868ab
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions heareval/embeddings/task_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,20 +269,22 @@ def memmap_embeddings(
metadata: Dict,
split_name: str,
embed_task_dir: Path,
split_data: Dict,
):
"""
Memmap all the embeddings to one file, and pickle all the labels.
(We assume labels can fit in memory.)
TODO: This writes things to disk double, we could clean that up after.
We might also be able to get away with writing to disk only once.
"""
embedding_files = list(outdir.glob("*.embedding.npy"))
embedding_files = [outdir.joinpath(f"{f}.embedding.npy") for f in split_data.keys()]
prng.shuffle(embedding_files)

# First count the number of embeddings total
nembeddings = 0
ndim: int
for embedding_file in tqdm(embedding_files):
assert embedding_file.exists()
emb = np.load(embedding_file).astype(np.float32)
if metadata["embedding_type"] == "scene":
assert emb.ndim == 1
Expand Down Expand Up @@ -460,4 +462,4 @@ def task_embeddings(
f"Unknown embedding type: {metadata['embedding_type']}"
)

memmap_embeddings(outdir, prng, metadata, split, embed_task_dir)
memmap_embeddings(outdir, prng, metadata, split, embed_task_dir, split_data)

0 comments on commit f2868ab

Please sign in to comment.