Skip to content

Commit

Permalink
Ported eval_abx.sh script to CPC
Browse files Browse the repository at this point in the history
  • Loading branch information
jdzikowski committed Apr 1, 2021
1 parent 1f66873 commit d840cc0
Show file tree
Hide file tree
Showing 3 changed files with 284 additions and 1 deletion.
1 change: 0 additions & 1 deletion cpc/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,6 @@ def __init__(self,
def forward(self, batchData, label):
cFeature, encodedData, label = self.cpc(batchData, label)
cFeature = self.nullspace(cFeature)
encodedData = self.nullspace(encodedData)
return cFeature, encodedData, label


Expand Down
138 changes: 138 additions & 0 deletions scripts/embeddings_abx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#!/usr/bin/env python3 -u
# !/usr/bin/env python3 -u
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import logging
import os
import sys
import argparse
from itertools import chain
from pathlib import Path
import time
import copy
import numpy as np
import soundfile as sf

from cpc.feature_loader import loadModel, FeatureModule

import torch
import torch.nn as nn
import torch.nn.functional as F

logging.basicConfig(
format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
level=os.environ.get("LOGLEVEL", "INFO").upper(),
stream=sys.stdout,
)
logger = logging.getLogger("zerospeech2021 abx")

def parse_args():
# Run parameters
parser = argparse.ArgumentParser()
parser.add_argument("path_checkpoint", type=str,
help="Path to the trained fairseq wav2vec2.0 model.")
parser.add_argument("path_data", type=str,
help="Path to the dataset that we want to compute ABX for.")
parser.add_argument("path_output_dir", type=str,
help="Path to the output directory.")
parser.add_argument("--debug", action="store_true",
help="Load only a very small amount of files for "
"debugging purposes.")
parser.add_argument("--cpu", action="store_true",
help="Run on a cpu machine.")
parser.add_argument("--file_extension", type=str, default="wav",
help="Extension of the audio files in the dataset (default: wav).")
parser.add_argument("--no_test", action="store_true",
help="Don't compute embeddings for test-* parts of dataset")
parser.add_argument('--gru_level', type=int, default=-1,
help='Hidden level of the LSTM autoregressive model to be taken'
'(default: -1, last layer).')
parser.add_argument('--nullspace', action='store_true',
help="Additionally load nullspace")
return parser.parse_args()

def main():
# Parse and print args
args = parse_args()
logger.info(args)

# Load the model
print("")
print(f"Loading model from {args.path_checkpoint}")

if args.gru_level is not None and args.gru_level > 0:
updateConfig = argparse.Namespace(nLevelsGRU=args.gru_level)
else:
updateConfig = None

model = loadModel([args.path_checkpoint], load_nullspace=args.nullspace, updateConfig=updateConfig)[0]

if args.gru_level is not None and args.gru_level > 0:
# Keep hidden units at LSTM layers on sequential batches
if args.nullspace:
model.cpc.gAR.keepHidden = True
else:
model.gAR.keepHidden = True

device = "cuda" if torch.cuda.is_available() and not args.cpu else "cpu"

# Register the hooks
layer_outputs = {}
def get_layer_output(name):
def hook(model, input, output):
if type(output) is tuple:
layer_outputs[name] = output[0].detach().squeeze(1).cpu().numpy()
elif type(output) is dict:
layer_outputs[name] = output["x"].detach().squeeze(0).cpu().numpy()
else:
layer_outputs[name] = output.detach().squeeze(0).cpu().numpy()
return hook

layer_names = []
layer_name = os.path.basename(os.path.dirname(args.path_checkpoint))
layer_names.append(layer_name)
if not args.nullspace:
model.gAR.register_forward_hook(get_layer_output(layer_name))
else:
model.nullspace.register_forward_hook(get_layer_output(layer_name))

model = model.eval().to(device)
print("Model loaded!")
print(model)

# Extract values from chosen layers and save them to files
phonetic = "phonetic"
datasets_path = os.path.join(args.path_data, phonetic)
datasets = os.listdir(datasets_path)
datasets = [dataset for dataset in datasets if not args.no_test or not dataset.startswith("test")]
print(datasets)

with torch.no_grad():
for dataset in datasets:
print("> {}".format(dataset))
dataset_path = os.path.join(datasets_path, dataset)
files = [f for f in os.listdir(dataset_path) if f.endswith(args.file_extension)]
for i, f in enumerate(files):
print("Progress {:2.1%}".format(i / len(files)), end="\r")
input_f = os.path.join(dataset_path, f)
x, sample_rate = sf.read(input_f)
x = torch.tensor(x).float().reshape(1,1,-1).to(device)
output = model(x, None)[0]

for layer_name, value in layer_outputs.items():
output_dir = os.path.join(args.path_output_dir, layer_name, phonetic, dataset)
Path(output_dir).mkdir(parents=True, exist_ok=True)
out_f = os.path.join(output_dir, os.path.splitext(f)[0] + ".txt")
np.savetxt(out_f, value)

if __name__ == "__main__":
#import ptvsd
#ptvsd.enable_attach(('0.0.0.0', 7310))
#print("Attach debugger now")
#ptvsd.wait_for_attach()
main()

146 changes: 146 additions & 0 deletions scripts/eval_abx.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
########## CHANGE THIS ##################
ZEROSPEECH_EVAL_ENV=zerospeech2021 # Where the zerospeech2021-evaluate is installed
CPC_ENV=202010-fairseq-c11
CONDA_PATH=/pio/scratch/2/i273233/miniconda3
#########################################

DATASET_PATH=false
ORIGINAL_DATASET_PATH=false
CHECKPOINT_PATH=false
OUTPUT_DIR=false
NULLSPACE=false
NO_TEST=false

print_usage() {
echo -e "Usage: ./eval_abx.sh"
echo -e "\t-d DATASET_PATH"
echo -e "\t-r ORIGINAL_DATASET_PATH"
echo -e "\t-c CHECKPOINT_PATH"
echo -e "\t-o OUTPUT_DIR"
echo -e "OPTIONAL FLAGS:"
echo -e "\t-n (Load a model with nullspace)"
echo -e "\t-a CONDA_PATH"
echo -e "\t-e CPC_ENV"
echo -e "\t-z ZEROSPEECH_EVAL_ENV (The conda environment where the zerospeech2021-evaluate is installed)"
echo -e "\t-t (Do not compute embeddings for test set)"
}

while getopts 'd:r:c:o:na:e:z:t' flag; do
case "${flag}" in
d) DATASET_PATH="${OPTARG}" ;;
r) ORIGINAL_DATASET_PATH="${OPTARG}" ;;
c) CHECKPOINT_PATH="${OPTARG}" ;;
o) OUTPUT_DIR="${OPTARG}" ;;
n) NULLSPACE=true ;;
a) CONDA_PATH="${OPTARG}" ;;
e) CPC_ENV="${OPTARG}" ;;
z) ZEROSPEECH_EVAL_ENV="${OPTARG}" ;;
t) NO_TEST=true ;;
*) print_usage
exit 1 ;;
esac
done

echo $DATASET_PATH $ORIGINAL_DATASET_PATH $CHECKPOINT_PATH $OUTPUT_DIR $NULLSPACE $CONDA_PATH $CPC_ENV $ZEROSPEECH_EVAL_ENV $NO_TEST

if [[ $DATASET_PATH == false || $ORIGINAL_DATASET_PATH == false || $CHECKPOINT_PATH == false || $OUTPUT_DIR == false ]]
then
echo "Either DATASET_PATH or ORIGINAL_DATASET_PATH or CHECKPOINT_PATH or OUTPUT_DIR is not set."
print_usage
exit 1
fi

SCRIPT_PATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"

results=$OUTPUT_DIR/results
embeddings=$OUTPUT_DIR/embeddings
mkdir -p embeddings

source $CONDA_PATH/etc/profile.d/conda.sh
SAVED_ENV=$(conda info | sed -n 's/\( \)*active environment : //p')
echo SAVED_ENV: $SAVED_ENV

ENV_TO_ACTIVATE=$CPC_ENV
conda activate $ENV_TO_ACTIVATE

params=""
if [[ $NULLSPACE == true ]]
then
params="${params} --nullspace"
fi

if [[ $NO_TEST == true ]]
then
params="${params} --no_test"
fi
echo "Params: $params"

echo "$SCRIPT_PATH/embeddings_abx.py"
python $SCRIPT_PATH/embeddings_abx.py $CHECKPOINT_PATH $DATASET_PATH $embeddings --gru_level 2 $params

directories=("dev-clean" "dev-other")
if [[ $NO_TEST == false ]]
then
directories+=("test-clean" "test-other")
fi
echo "Directories: ${directories[@]}"

for i in `basename -a $(ls -d $embeddings/*/)`
do
for directory in ${directories[@]}
do
for file in `ls $embeddings/$i/phonetic/$directory`
do
filename_no_ext="${file%.*}"
if [[ ! -f "$ORIGINAL_DATASET_PATH/phonetic/$directory/${filename_no_ext}.wav" ]]
then
rm $embeddings/$i/phonetic/$directory/$file
fi
done
done
done

conda activate $ZEROSPEECH_EVAL_ENV

frame_shift="0.01"
echo "Frame shift is ${frame_shift}s"

metrics=("cosine" "euclidean")
for metric in ${metrics[@]}
do
cat > $embeddings/$metric.yaml << EOF
author: LSTM Baseline
affiliation: EHESS, ENS, PSL Research Univerity, CNRS and Inria
description: >
CPC-big (trained on librispeech 960), kmeans (trained on librispeech 100),
LSTM. See https://zerospeech.com/2021 for more details.
open_source: true
train_set: librispeech 100 and 960
gpu_budget: 60
parameters:
phonetic:
metric: ${metric}
frame_shift: ${frame_shift}
EOF

for i in `basename -a $(ls -d $embeddings/*/)`
do
cp $embeddings/$metric.yaml $embeddings/$i/meta.yaml
#zerospeech2021-evaluate -j 12 -o $results/$metric/$i --no-lexical --no-syntactic --no-semantic $DATASET_PATH $embeddings/$i
#zerospeech2021-evaluate -j 12 -o $results/$metric/$i --force-cpu --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i
#zerospeech2021-evaluate -j 20 -o $results/$metric/$i --force-cpu --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i
zerospeech2021-evaluate -j 20 -o $results/$metric/$i --no-lexical --no-syntactic --no-semantic $ORIGINAL_DATASET_PATH $embeddings/$i
done
done

for metric in ${metrics[@]}
do
for i in `basename -a $(ls -d $embeddings/*/)`
do
echo $i $metric
cat $results/$metric/$i/score_phonetic.csv
echo
done
done > $OUTPUT_DIR/combined_results.txt

conda activate $SAVED_ENV

0 comments on commit d840cc0

Please sign in to comment.