-
Notifications
You must be signed in to change notification settings - Fork 194
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add speech to text example * revert commits to example scripts * revert commits to example scripts
- Loading branch information
1 parent
e6707a1
commit 7ae27d3
Showing
12 changed files
with
487 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -154,7 +154,7 @@ We are constantly working to make UpTrain better. Want a new feature or need any | |
|
||
# License 💻 | ||
|
||
This repo is published under Apache 2.0 license. We're currently focused on developing non-enterprise offerings that should cover most use cases by adding more features and extending to more models. We also working towards adding a hosted offering - [contact us](mailto:[email protected]) if you are interested. | ||
This repo is published under Apache 2.0 license, with the exception of the ee directory which will contain premium features requiring an enterprise license in the future. We're currently focused on developing non-enterprise offerings that should cover most use cases by adding more features and extending to more models. We also working towards adding a hosted offering - [contact us](mailto:[email protected]) if you are interested. | ||
|
||
# Stay Updated ☎️ | ||
We are continuously adding tons of features and use cases. Please support us by giving the project a star ⭐! | ||
|
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
import numpy as np | ||
from uptrain.core.classes.monitors import AbstractMonitor | ||
from uptrain.core.classes.measurables import MeasurableResolver | ||
from uptrain.constants import Monitor, ComparisonModel, ComparisonMetric | ||
|
||
|
||
class OutputComparison(AbstractMonitor): | ||
dashboard_name = "output_comparison" | ||
monitor_type = Monitor.OUTPUT_COMPARISON | ||
|
||
def base_init(self, fw, check): | ||
self.comparison_model_base = check['comparison_model'] | ||
self.comparison_model_resolved = ComparisonModelResolver().resolve(check['comparison_model']) | ||
self.comparison_model_inputs = MeasurableResolver(check.get("comparison_model_input_args", None)).resolve(fw) | ||
self.comparison_metric_base = check['comparison_metric'] | ||
self.comparison_metric_resolved = ComparisonMetricResolver().resolve(check['comparison_metric']) | ||
self.threshold = check['threshold'] | ||
self.count = 0 | ||
|
||
def base_check(self, inputs, outputs, gts=None, extra_args={}): | ||
vals = self.measurable.compute_and_log( | ||
inputs, outputs, gts=gts, extra=extra_args | ||
) | ||
|
||
comparison_model_inputs = self.comparison_model_inputs.compute_and_log( | ||
inputs, outputs, gts=gts, extra=extra_args | ||
) | ||
|
||
comparison_model_outputs = self.comparison_model_resolved(comparison_model_inputs) | ||
batch_metrics = self.comparison_metric_resolved(vals, comparison_model_outputs) | ||
self.batch_metrics = batch_metrics | ||
|
||
extra_args.update({self.comparison_model_base + " outputs": comparison_model_outputs, self.comparison_metric_base: batch_metrics}) | ||
|
||
feat_name = self.comparison_metric_base | ||
plot_name = f"{feat_name} Comparison - Production vs {self.comparison_model_base}" | ||
self.count += len(extra_args['id']) | ||
|
||
self.log_handler.add_scalars( | ||
plot_name, | ||
{"y_" + feat_name: np.mean(batch_metrics)}, | ||
self.count, | ||
self.dashboard_name, | ||
file_name=plot_name, | ||
) | ||
|
||
def need_ground_truth(self): | ||
return False | ||
|
||
def base_is_data_interesting(self, inputs, outputs, gts=None, extra_args={}): | ||
reasons = ["None"] * len(extra_args["id"]) | ||
is_interesting = self.batch_metrics < self.threshold | ||
reasons = [] | ||
for idx in range(len(extra_args["id"])): | ||
if is_interesting[idx] == 0: | ||
reasons.append("None") | ||
else: | ||
reasons.append(f"Different output compared to {self.comparison_model_base}") | ||
return is_interesting, reasons | ||
|
||
|
||
class ComparisonModelResolver: | ||
|
||
def resolve(self, model): | ||
if model == ComparisonModel.FASTER_WHISPER: | ||
from uptrain.ee.lib.algorithms import faster_whisper_speech_to_text | ||
return faster_whisper_speech_to_text | ||
else: | ||
raise Exception(f"{model} can't be resolved") | ||
|
||
|
||
class ComparisonMetricResolver: | ||
|
||
def resolve(self, metric): | ||
if metric == ComparisonMetric.ROGUE_L_F1: | ||
from uptrain.ee.lib.algorithms import rogue_l_similarity | ||
return rogue_l_similarity | ||
else: | ||
raise Exception(f"{metric} can't be resolved") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import numpy as np | ||
from uptrain.core.lib.helper_funcs import fn_dependency_required | ||
|
||
try: | ||
import faster_whisper | ||
except: | ||
faster_whisper = None | ||
|
||
try: | ||
import rouge | ||
except: | ||
rouge = None | ||
|
||
@fn_dependency_required(faster_whisper, "faster_whisper") | ||
def faster_whisper_speech_to_text(audio_files): | ||
model_size = "large-v2" | ||
model = faster_whisper.WhisperModel(model_size, device="cpu", compute_type="int8") | ||
prescribed_texts = [] | ||
for audio_file in audio_files: | ||
segments, _ = model.transcribe(audio_file, beam_size=5) | ||
prescribed_text = '' | ||
for segment in segments: | ||
prescribed_text += segment.text | ||
prescribed_texts.append(prescribed_text) | ||
return prescribed_texts | ||
|
||
@fn_dependency_required(rouge, "rouge") | ||
def rogue_l_similarity(text1_list, text2_list): | ||
r = rouge.Rouge() | ||
res = r.get_scores([x.lower() for x in text1_list],[x.lower() for x in text2_list]) | ||
return np.array([x['rouge-l']['f'] for x in res]) |