From 166b441bd8c474f32d15e1672f989bcf02c5adf0 Mon Sep 17 00:00:00 2001 From: takenori-y Date: Tue, 7 Nov 2023 16:37:27 +0900 Subject: [PATCH 1/6] add pnorm and ipnorm --- CMakeLists.txt | 4 + doc/main/ipnorm.rst | 11 + doc/main/pnorm.rst | 11 + ...mel_cepstrum_inverse_power_normalization.h | 94 +++++++++ .../mel_cepstrum_power_normalization.h | 120 +++++++++++ ...el_cepstrum_inverse_power_normalization.cc | 58 +++++ .../mel_cepstrum_power_normalization.cc | 77 +++++++ src/main/ipnorm.cc | 160 ++++++++++++++ src/main/mcpf.cc | 2 +- src/main/pnorm.cc | 198 ++++++++++++++++++ test/test_ipnorm.bats | 34 +++ test/test_pnorm.bats | 54 +++++ 12 files changed, 822 insertions(+), 1 deletion(-) create mode 100644 doc/main/ipnorm.rst create mode 100644 doc/main/pnorm.rst create mode 100644 include/SPTK/conversion/mel_cepstrum_inverse_power_normalization.h create mode 100644 include/SPTK/conversion/mel_cepstrum_power_normalization.h create mode 100644 src/conversion/mel_cepstrum_inverse_power_normalization.cc create mode 100644 src/conversion/mel_cepstrum_power_normalization.cc create mode 100644 src/main/ipnorm.cc create mode 100644 src/main/pnorm.cc create mode 100755 test/test_ipnorm.bats create mode 100755 test/test_pnorm.bats diff --git a/CMakeLists.txt b/CMakeLists.txt index d19492f3..f8c1c65b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -122,6 +122,8 @@ set(CC_SOURCES ${SOURCE_DIR}/conversion/linear_predictive_coefficients_to_line_spectral_pairs.cc ${SOURCE_DIR}/conversion/linear_predictive_coefficients_to_parcor_coefficients.cc ${SOURCE_DIR}/conversion/log_area_ratio_to_parcor_coefficients.cc + ${SOURCE_DIR}/conversion/mel_cepstrum_inverse_power_normalization.cc + ${SOURCE_DIR}/conversion/mel_cepstrum_power_normalization.cc ${SOURCE_DIR}/conversion/mel_cepstrum_to_mlsa_digital_filter_coefficients.cc ${SOURCE_DIR}/conversion/mel_generalized_cepstrum_to_mel_generalized_cepstrum.cc ${SOURCE_DIR}/conversion/mel_generalized_cepstrum_to_spectrum.cc @@ -310,6 +312,7 @@ set(MAIN_SOURCES ${SOURCE_DIR}/main/impulse.cc ${SOURCE_DIR}/main/imsvq.cc ${SOURCE_DIR}/main/interpolate.cc + ${SOURCE_DIR}/main/ipnorm.cc ${SOURCE_DIR}/main/ipqmf.cc ${SOURCE_DIR}/main/iulaw.cc ${SOURCE_DIR}/main/lar2par.cc @@ -352,6 +355,7 @@ set(MAIN_SOURCES ${SOURCE_DIR}/main/pca.cc ${SOURCE_DIR}/main/pcas.cc ${SOURCE_DIR}/main/phase.cc + ${SOURCE_DIR}/main/pnorm.cc ${SOURCE_DIR}/main/pitch.cc ${SOURCE_DIR}/main/pitch2sin.cc ${SOURCE_DIR}/main/pitch_mark.cc diff --git a/doc/main/ipnorm.rst b/doc/main/ipnorm.rst new file mode 100644 index 00000000..029879b8 --- /dev/null +++ b/doc/main/ipnorm.rst @@ -0,0 +1,11 @@ +.. _ipnorm: + +ipnorm +====== + +.. doxygenfile:: ipnorm.cc + +.. seealso:: :ref:`pnorm` + +.. doxygenclass:: sptk::MelCepstrumInversePowerNormalization + :members: diff --git a/doc/main/pnorm.rst b/doc/main/pnorm.rst new file mode 100644 index 00000000..acd8eb35 --- /dev/null +++ b/doc/main/pnorm.rst @@ -0,0 +1,11 @@ +.. _pnorm: + +pnorm +===== + +.. doxygenfile:: pnorm.cc + +.. seealso:: :ref:`ipnorm` + +.. doxygenclass:: sptk::MelCepstrumPowerNormalization + :members: diff --git a/include/SPTK/conversion/mel_cepstrum_inverse_power_normalization.h b/include/SPTK/conversion/mel_cepstrum_inverse_power_normalization.h new file mode 100644 index 00000000..dcc6ca28 --- /dev/null +++ b/include/SPTK/conversion/mel_cepstrum_inverse_power_normalization.h @@ -0,0 +1,94 @@ +// ------------------------------------------------------------------------ // +// Copyright 2021 SPTK Working Group // +// // +// Licensed under the Apache License, Version 2.0 (the "License"); // +// you may not use this file except in compliance with the License. // +// You may obtain a copy of the License at // +// // +// http://www.apache.org/licenses/LICENSE-2.0 // +// // +// Unless required by applicable law or agreed to in writing, software // +// distributed under the License is distributed on an "AS IS" BASIS, // +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // +// See the License for the specific language governing permissions and // +// limitations under the License. // +// ------------------------------------------------------------------------ // + +#ifndef SPTK_CONVERSION_MEL_CEPSTRUM_INVERSE_POWER_NORMALIZATION_H_ +#define SPTK_CONVERSION_MEL_CEPSTRUM_INVERSE_POWER_NORMALIZATION_H_ + +#include // std::vector + +#include "SPTK/utils/sptk_utils.h" + +namespace sptk { + +/** + * Convert power-normalized mel-cepstral coefficients to mel-cepstral ones. + * + * The input is the @f$(M+1)@f$-th order power-normalized mel-cepstral + * coefficients: + * @f[ + * \begin{array}{ccccc} + * P, & \tilde{c}'(0), & \tilde{c}'(1), & \ldots, & \tilde{c}'(M), + * \end{array} + * @f] + * where @f$P@f$ is the power and the output is the @f$M@f$-th order + * mel-cepstral coefficients: + * @f[ + * \begin{array}{cccc} + * \tilde{c}(0), & \tilde{c}(1), & \ldots, & \tilde{c}(M), + * \end{array} + * @f] + * where + * @f[ + * \tilde{c}(m) = \left\{ \begin{array}{ll} + * \tilde{c}'(0) + K, & m = 0 \\ + * \tilde{c}'(m). & 1 \le m \le M + * \end{array} \right. + * @f] + */ +class MelCepstrumInversePowerNormalization { + public: + /** + * @param[in] num_order Order of coefficients, @f$M@f$. + */ + explicit MelCepstrumInversePowerNormalization(int num_order); + + virtual ~MelCepstrumInversePowerNormalization() { + } + + /** + * @return Order of coefficients. + */ + int GetNumOrder() const { + return num_order_; + } + + /** + * @return True if this object is valid. + */ + bool IsValid() const { + return is_valid_; + } + + /** + * @param[in] power_normalized_mel_cepstrum @f$(M+1)@f$-th order + * power-normalized mel-cepstral coefficients. + * @param[out] mel_cepstrum @f$M@f$-th order mel-cepstral coefficients. + * @return True on success, false on failure. + */ + bool Run(const std::vector& power_normalized_mel_cepstrum, + std::vector* mel_cepstrum) const; + + private: + const int num_order_; + + bool is_valid_; + + DISALLOW_COPY_AND_ASSIGN(MelCepstrumInversePowerNormalization); +}; + +} // namespace sptk + +#endif // SPTK_CONVERSION_MEL_CEPSTRUM_INVERSE_POWER_NORMALIZATION_H_ diff --git a/include/SPTK/conversion/mel_cepstrum_power_normalization.h b/include/SPTK/conversion/mel_cepstrum_power_normalization.h new file mode 100644 index 00000000..0302d99b --- /dev/null +++ b/include/SPTK/conversion/mel_cepstrum_power_normalization.h @@ -0,0 +1,120 @@ +// ------------------------------------------------------------------------ // +// Copyright 2021 SPTK Working Group // +// // +// Licensed under the Apache License, Version 2.0 (the "License"); // +// you may not use this file except in compliance with the License. // +// You may obtain a copy of the License at // +// // +// http://www.apache.org/licenses/LICENSE-2.0 // +// // +// Unless required by applicable law or agreed to in writing, software // +// distributed under the License is distributed on an "AS IS" BASIS, // +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // +// See the License for the specific language governing permissions and // +// limitations under the License. // +// ------------------------------------------------------------------------ // + +#ifndef SPTK_CONVERSION_MEL_CEPSTRUM_POWER_NORMALIZATION_H_ +#define SPTK_CONVERSION_MEL_CEPSTRUM_POWER_NORMALIZATION_H_ + +#include // std::vector + +#include "SPTK/conversion/cepstrum_to_autocorrelation.h" +#include "SPTK/math/frequency_transform.h" +#include "SPTK/utils/sptk_utils.h" + +namespace sptk { + +/** + * Convert mel-cepstral coefficients to power-normalized ones. + * + * The input is the @f$M@f$-th order mel-cepstral coefficients: + * @f[ + * \begin{array}{cccc} + * \tilde{c}(0), & \tilde{c}(1), & \ldots, & \tilde{c}(M), + * \end{array} + * @f] + * and the output is the @f$(M+1)@f$-th order power-normalized ones: + * @f[ + * \begin{array}{ccccc} + * P, & \tilde{c}'(0), & \tilde{c}'(1), & \ldots, & \tilde{c}'(M), + * \end{array} + * @f] + * where @f$P@f$ is the power and + * @f[ + * \tilde{c}'(m) = \left\{ \begin{array}{ll} + * \tilde{c}(0) - K, & m = 0 \\ + * \tilde{c}(m). & 1 \le m \le M + * \end{array} \right. + * @f] + */ +class MelCepstrumPowerNormalization { + public: + /** + * Buffer for MelCepstrumPowerNormalization class. + */ + class Buffer { + public: + Buffer() { + } + + virtual ~Buffer() { + } + + private: + std::vector cepstrum_; + std::vector autocorrelation_; + FrequencyTransform::Buffer buffer_for_frequency_transform_; + CepstrumToAutocorrelation::Buffer buffer_for_cepstrum_to_autocorrelation_; + + friend class MelCepstrumPowerNormalization; + DISALLOW_COPY_AND_ASSIGN(Buffer); + }; + + /** + * @param[in] num_order Order of coefficients, @f$M@f$. + * @param[in] impulse_response_length Length of impuse response. + * @param[in] alpha All-pass constant, @f$\alpha@f$. + */ + MelCepstrumPowerNormalization(int num_order, int impulse_response_length, + double alpha); + + virtual ~MelCepstrumPowerNormalization() { + } + + /** + * @return Order of coefficients. + */ + int GetNumOrder() const { + return frequency_transform_.GetNumInputOrder(); + } + + /** + * @return True if this object is valid. + */ + bool IsValid() const { + return is_valid_; + } + + /** + * @param[in] mel_cepstrum @f$M@f$-th order mel-cepstral coefficients. + * @param[out] power_normalized_mel_cepstrum @f$(M+1)@f$-th order + * power-normalized mel-cepstral coefficients. + * @return True on success, false on failure. + */ + bool Run(const std::vector& mel_cepstrum, + std::vector* power_normalized_mel_cepstrum, + MelCepstrumPowerNormalization::Buffer* buffer) const; + + private: + const FrequencyTransform frequency_transform_; + const CepstrumToAutocorrelation cepstrum_to_autocorrelation_; + + bool is_valid_; + + DISALLOW_COPY_AND_ASSIGN(MelCepstrumPowerNormalization); +}; + +} // namespace sptk + +#endif // SPTK_CONVERSION_MEL_CEPSTRUM_POWER_NORMALIZATION_H_ diff --git a/src/conversion/mel_cepstrum_inverse_power_normalization.cc b/src/conversion/mel_cepstrum_inverse_power_normalization.cc new file mode 100644 index 00000000..49a7c00e --- /dev/null +++ b/src/conversion/mel_cepstrum_inverse_power_normalization.cc @@ -0,0 +1,58 @@ +// ------------------------------------------------------------------------ // +// Copyright 2021 SPTK Working Group // +// // +// Licensed under the Apache License, Version 2.0 (the "License"); // +// you may not use this file except in compliance with the License. // +// You may obtain a copy of the License at // +// // +// http://www.apache.org/licenses/LICENSE-2.0 // +// // +// Unless required by applicable law or agreed to in writing, software // +// distributed under the License is distributed on an "AS IS" BASIS, // +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // +// See the License for the specific language governing permissions and // +// limitations under the License. // +// ------------------------------------------------------------------------ // + +#include "SPTK/conversion/mel_cepstrum_inverse_power_normalization.h" + +#include // std::copy +#include // std::size_t + +namespace sptk { + +MelCepstrumInversePowerNormalization::MelCepstrumInversePowerNormalization( + int num_order) + : num_order_(num_order), is_valid_(true) { + if (num_order_ < 0) { + is_valid_ = false; + return; + } +} + +bool MelCepstrumInversePowerNormalization::Run( + const std::vector& power_normalized_mel_cepstrum, + std::vector* mel_cepstrum) const { + // Check inputs. + if (!is_valid_ || + power_normalized_mel_cepstrum.size() != + static_cast(num_order_ + 2) || + NULL == mel_cepstrum) { + return false; + } + + // Prepare memories. + if (mel_cepstrum->size() != static_cast(num_order_ + 1)) { + mel_cepstrum->resize(num_order_ + 1); + } + + // Convert. + (*mel_cepstrum)[0] = + power_normalized_mel_cepstrum[0] + power_normalized_mel_cepstrum[1]; + std::copy(power_normalized_mel_cepstrum.begin() + 2, + power_normalized_mel_cepstrum.end(), mel_cepstrum->begin() + 1); + + return true; +} + +} // namespace sptk diff --git a/src/conversion/mel_cepstrum_power_normalization.cc b/src/conversion/mel_cepstrum_power_normalization.cc new file mode 100644 index 00000000..309891db --- /dev/null +++ b/src/conversion/mel_cepstrum_power_normalization.cc @@ -0,0 +1,77 @@ +// ------------------------------------------------------------------------ // +// Copyright 2021 SPTK Working Group // +// // +// Licensed under the Apache License, Version 2.0 (the "License"); // +// you may not use this file except in compliance with the License. // +// You may obtain a copy of the License at // +// // +// http://www.apache.org/licenses/LICENSE-2.0 // +// // +// Unless required by applicable law or agreed to in writing, software // +// distributed under the License is distributed on an "AS IS" BASIS, // +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // +// See the License for the specific language governing permissions and // +// limitations under the License. // +// ------------------------------------------------------------------------ // + +#include "SPTK/conversion/mel_cepstrum_power_normalization.h" + +#include // std::copy +#include // std::log +#include // std::size_t + +namespace sptk { + +MelCepstrumPowerNormalization::MelCepstrumPowerNormalization( + int num_order, int impulse_response_length, double alpha) + : frequency_transform_(num_order, impulse_response_length - 1, -alpha), + cepstrum_to_autocorrelation_(impulse_response_length - 1, 0, + impulse_response_length), + is_valid_(true) { + if (!sptk::IsInRange(num_order, 0, impulse_response_length - 1) || + !frequency_transform_.IsValid() || + !cepstrum_to_autocorrelation_.IsValid()) { + is_valid_ = false; + return; + } +} + +bool MelCepstrumPowerNormalization::Run( + const std::vector& mel_cepstrum, + std::vector* power_normalized_mel_cepstrum, + MelCepstrumPowerNormalization::Buffer* buffer) const { + // Check inputs. + const int length(GetNumOrder() + 1); + if (!is_valid_ || mel_cepstrum.size() != static_cast(length) || + NULL == power_normalized_mel_cepstrum || NULL == buffer) { + return false; + } + + // Prepare memories. + if (power_normalized_mel_cepstrum->size() != + static_cast(length + 1)) { + power_normalized_mel_cepstrum->resize(length + 1); + } + + // Calculate power of input mel-cepstrum. + if (!frequency_transform_.Run(mel_cepstrum, &buffer->cepstrum_, + &buffer->buffer_for_frequency_transform_)) { + return false; + } + if (!cepstrum_to_autocorrelation_.Run( + buffer->cepstrum_, &buffer->autocorrelation_, + &buffer->buffer_for_cepstrum_to_autocorrelation_)) { + return false; + } + + // Convert. + const double power(std::log(buffer->autocorrelation_[0])); + (*power_normalized_mel_cepstrum)[0] = power; + (*power_normalized_mel_cepstrum)[1] = mel_cepstrum[0] - power; + std::copy(mel_cepstrum.begin() + 1, mel_cepstrum.end(), + power_normalized_mel_cepstrum->begin() + 2); + + return true; +} + +} // namespace sptk diff --git a/src/main/ipnorm.cc b/src/main/ipnorm.cc new file mode 100644 index 00000000..788d361f --- /dev/null +++ b/src/main/ipnorm.cc @@ -0,0 +1,160 @@ +// ------------------------------------------------------------------------ // +// Copyright 2021 SPTK Working Group // +// // +// Licensed under the Apache License, Version 2.0 (the "License"); // +// you may not use this file except in compliance with the License. // +// You may obtain a copy of the License at // +// // +// http://www.apache.org/licenses/LICENSE-2.0 // +// // +// Unless required by applicable law or agreed to in writing, software // +// distributed under the License is distributed on an "AS IS" BASIS, // +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // +// See the License for the specific language governing permissions and // +// limitations under the License. // +// ------------------------------------------------------------------------ // + +#include // std::ifstream +#include // std::setw +#include // std::cerr, std::cin, std::cout, std::endl, etc. +#include // std::ostringstream +#include // std::vector + +#include "GETOPT/ya_getopt.h" +#include "SPTK/conversion/mel_cepstrum_inverse_power_normalization.h" +#include "SPTK/utils/sptk_utils.h" + +namespace { + +const int kDefaultNumOrder(25); + +void PrintUsage(std::ostream* stream) { + // clang-format off + *stream << std::endl; + *stream << " ipnorm - inverse power normalization of mel-cepstrum" << std::endl; // NOLINT + *stream << std::endl; + *stream << " usage:" << std::endl; + *stream << " ipnorm [ options ] [ infile ] > stdout" << std::endl; + *stream << " options:" << std::endl; + *stream << " -m m : order of mel-cepstrum ( int)[" << std::setw(5) << std::right << kDefaultNumOrder << "][ 0 <= m <= ]" << std::endl; // NOLINT + *stream << " -h : print this message" << std::endl; + *stream << " infile:" << std::endl; + *stream << " power-normalized mel-cepstrum (double)[stdin]" << std::endl; // NOLINT + *stream << " stdout:" << std::endl; + *stream << " mel-cepstrum (double)" << std::endl; + *stream << std::endl; + *stream << " SPTK: version " << sptk::kVersion << std::endl; + *stream << std::endl; + // clang-format on +} + +} // namespace + +/** + * @a ipnorm [ @e option ] [ @e infile ] + * + * - @b -m @e int + * - order of mel-cepstral coefficients @f$(0 \le M)@f$ + * - @b infile @e str + * - double-type power-normalized mel-cepstral coefficients + * - @b stdout + * - double-type mel-cepstral coefficients + * + * @param[in] argc Number of arguments. + * @param[in] argv Argument vector. + * @return 0 on success, 1 on failure. + */ +int main(int argc, char* argv[]) { + int num_order(kDefaultNumOrder); + + for (;;) { + const int option_char(getopt_long(argc, argv, "m:h", NULL, NULL)); + if (-1 == option_char) break; + + switch (option_char) { + case 'm': { + if (!sptk::ConvertStringToInteger(optarg, &num_order) || + num_order < 0) { + std::ostringstream error_message; + error_message << "The argument for the -m option must be a " + << "non-negative integer"; + sptk::PrintErrorMessage("ipnorm", error_message); + return 1; + } + break; + } + case 'h': { + PrintUsage(&std::cout); + return 0; + } + default: { + PrintUsage(&std::cerr); + return 1; + } + } + } + + const int num_input_files(argc - optind); + if (1 < num_input_files) { + std::ostringstream error_message; + error_message << "Too many input files"; + sptk::PrintErrorMessage("ipnorm", error_message); + return 1; + } + const char* input_file(0 == num_input_files ? NULL : argv[optind]); + + if (!sptk::SetBinaryMode()) { + std::ostringstream error_message; + error_message << "Cannot set translation mode"; + sptk::PrintErrorMessage("ipnorm", error_message); + return 1; + } + + std::ifstream ifs; + if (NULL != input_file) { + ifs.open(input_file, std::ios::in | std::ios::binary); + if (ifs.fail()) { + std::ostringstream error_message; + error_message << "Cannot open file " << input_file; + sptk::PrintErrorMessage("ipnorm", error_message); + return 1; + } + } + std::istream& input_stream(ifs.is_open() ? ifs : std::cin); + + sptk::MelCepstrumInversePowerNormalization + mel_cepstrum_inverse_power_normalization(num_order); + if (!mel_cepstrum_inverse_power_normalization.IsValid()) { + std::ostringstream error_message; + error_message + << "Failed to initialize MelCepstrumInversePowerNormalization"; + sptk::PrintErrorMessage("ipnorm", error_message); + return 1; + } + + const int input_length(num_order + 2); + const int output_length(num_order + 1); + std::vector power_normalized_mel_cepstrum(input_length); + std::vector mel_cepstrum(output_length); + + while (sptk::ReadStream(false, 0, 0, input_length, + &power_normalized_mel_cepstrum, &input_stream, + NULL)) { + if (!mel_cepstrum_inverse_power_normalization.Run( + power_normalized_mel_cepstrum, &mel_cepstrum)) { + std::ostringstream error_message; + error_message << "Failed to denormalize mel-cepstrum"; + sptk::PrintErrorMessage("ipnorm", error_message); + return 1; + } + + if (!sptk::WriteStream(0, output_length, mel_cepstrum, &std::cout, NULL)) { + std::ostringstream error_message; + error_message << "Failed to write mel-cepstrum"; + sptk::PrintErrorMessage("ipnorm", error_message); + return 1; + } + } + + return 0; +} diff --git a/src/main/mcpf.cc b/src/main/mcpf.cc index e146db36..8a0a3876 100644 --- a/src/main/mcpf.cc +++ b/src/main/mcpf.cc @@ -27,7 +27,7 @@ namespace { const int kDefaultNumOrder(25); -const int kDefaultImpulseResponseLength(1024); +const int kDefaultImpulseResponseLength(128); const int kDefaultOnsetIndex(2); const double kDefaultAlpha(0.35); const double kDefaultBeta(0.1); diff --git a/src/main/pnorm.cc b/src/main/pnorm.cc new file mode 100644 index 00000000..bae635c8 --- /dev/null +++ b/src/main/pnorm.cc @@ -0,0 +1,198 @@ +// ------------------------------------------------------------------------ // +// Copyright 2021 SPTK Working Group // +// // +// Licensed under the Apache License, Version 2.0 (the "License"); // +// you may not use this file except in compliance with the License. // +// You may obtain a copy of the License at // +// // +// http://www.apache.org/licenses/LICENSE-2.0 // +// // +// Unless required by applicable law or agreed to in writing, software // +// distributed under the License is distributed on an "AS IS" BASIS, // +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // +// See the License for the specific language governing permissions and // +// limitations under the License. // +// ------------------------------------------------------------------------ // + +#include // std::ifstream +#include // std::setw +#include // std::cerr, std::cin, std::cout, std::endl, etc. +#include // std::ostringstream +#include // std::vector + +#include "GETOPT/ya_getopt.h" +#include "SPTK/conversion/mel_cepstrum_power_normalization.h" +#include "SPTK/utils/sptk_utils.h" + +namespace { + +const int kDefaultNumOrder(25); +const int kDefaultImpulseResponseLength(128); +const double kDefaultAlpha(0.35); + +void PrintUsage(std::ostream* stream) { + // clang-format off + *stream << std::endl; + *stream << " pnorm - power normalization of mel-cepstrum" << std::endl; + *stream << std::endl; + *stream << " usage:" << std::endl; + *stream << " pnorm [ options ] [ infile ] > stdout" << std::endl; + *stream << " options:" << std::endl; + *stream << " -m m : order of mel-cepstrum ( int)[" << std::setw(5) << std::right << kDefaultNumOrder << "][ 0 <= m < l ]" << std::endl; // NOLINT + *stream << " -l l : length of impulse response ( int)[" << std::setw(5) << std::right << kDefaultImpulseResponseLength << "][ 2 <= l <= ]" << std::endl; // NOLINT + *stream << " -a a : all-pass constant (double)[" << std::setw(5) << std::right << kDefaultAlpha << "][ -1.0 < a < 1.0 ]" << std::endl; // NOLINT + *stream << " -h : print this message" << std::endl; + *stream << " infile:" << std::endl; + *stream << " mel-cepstrum (double)[stdin]" << std::endl; // NOLINT + *stream << " stdout:" << std::endl; + *stream << " power-normalized mel-cepstrum (double)" << std::endl; + *stream << std::endl; + *stream << " SPTK: version " << sptk::kVersion << std::endl; + *stream << std::endl; + // clang-format on +} + +} // namespace + +/** + * @a pnorm [ @e option ] [ @e infile ] + * + * - @b -m @e int + * - order of mel-cepstral coefficients @f$(0 \le M < L)@f$ + * - @b -l @e int + * - length of impulse response @f$(M < L)@f$ + * - @b -a @e double + * - alpha @f$(|\alpha|<1)@f$ + * - @b infile @e str + * - double-type mel-cepstral coefficients + * - @b stdout + * - double-type power-normalized mel-cepstral coefficients + * + * @param[in] argc Number of arguments. + * @param[in] argv Argument vector. + * @return 0 on success, 1 on failure. + */ +int main(int argc, char* argv[]) { + int num_order(kDefaultNumOrder); + int impulse_response_length(kDefaultImpulseResponseLength); + double alpha(kDefaultAlpha); + + for (;;) { + const int option_char(getopt_long(argc, argv, "m:l:a:h", NULL, NULL)); + if (-1 == option_char) break; + + switch (option_char) { + case 'm': { + if (!sptk::ConvertStringToInteger(optarg, &num_order) || + num_order < 0) { + std::ostringstream error_message; + error_message << "The argument for the -m option must be a " + << "non-negative integer"; + sptk::PrintErrorMessage("pnorm", error_message); + return 1; + } + break; + } + case 'l': { + if (!sptk::ConvertStringToInteger(optarg, &impulse_response_length)) { + std::ostringstream error_message; + error_message << "The argument for the -l option must be an integer"; + sptk::PrintErrorMessage("mcpf", error_message); + return 1; + } + break; + } + case 'a': { + if (!sptk::ConvertStringToDouble(optarg, &alpha) || + !sptk::IsValidAlpha(alpha)) { + std::ostringstream error_message; + error_message + << "The argument for the -a option must be in (-1.0, 1.0)"; + sptk::PrintErrorMessage("pnorm", error_message); + return 1; + } + break; + } + case 'h': { + PrintUsage(&std::cout); + return 0; + } + default: { + PrintUsage(&std::cerr); + return 1; + } + } + } + + if (impulse_response_length <= num_order) { + std::ostringstream error_message; + error_message + << "Order of mel-cepstrum must be less than length of impulse response"; + sptk::PrintErrorMessage("pnorm", error_message); + return 1; + } + + const int num_input_files(argc - optind); + if (1 < num_input_files) { + std::ostringstream error_message; + error_message << "Too many input files"; + sptk::PrintErrorMessage("pnorm", error_message); + return 1; + } + const char* input_file(0 == num_input_files ? NULL : argv[optind]); + + if (!sptk::SetBinaryMode()) { + std::ostringstream error_message; + error_message << "Cannot set translation mode"; + sptk::PrintErrorMessage("pnorm", error_message); + return 1; + } + + std::ifstream ifs; + if (NULL != input_file) { + ifs.open(input_file, std::ios::in | std::ios::binary); + if (ifs.fail()) { + std::ostringstream error_message; + error_message << "Cannot open file " << input_file; + sptk::PrintErrorMessage("pnorm", error_message); + return 1; + } + } + std::istream& input_stream(ifs.is_open() ? ifs : std::cin); + + sptk::MelCepstrumPowerNormalization mel_cepstrum_power_normalization( + num_order, impulse_response_length, alpha); + sptk::MelCepstrumPowerNormalization::Buffer buffer; + if (!mel_cepstrum_power_normalization.IsValid()) { + std::ostringstream error_message; + error_message << "Failed to initialize MelCepstrumPowerNormalization"; + sptk::PrintErrorMessage("pnorm", error_message); + return 1; + } + + const int input_length(num_order + 1); + const int output_length(num_order + 2); + std::vector mel_cepstrum(input_length); + std::vector power_normalized_mel_cepstrum(output_length); + + while (sptk::ReadStream(false, 0, 0, input_length, &mel_cepstrum, + &input_stream, NULL)) { + if (!mel_cepstrum_power_normalization.Run( + mel_cepstrum, &power_normalized_mel_cepstrum, &buffer)) { + std::ostringstream error_message; + error_message << "Failed to normalize mel-cepstrum"; + sptk::PrintErrorMessage("pnorm", error_message); + return 1; + } + + if (!sptk::WriteStream(0, output_length, power_normalized_mel_cepstrum, + &std::cout, NULL)) { + std::ostringstream error_message; + error_message << "Failed to write power-normalized mel-cepstrum"; + sptk::PrintErrorMessage("pnorm", error_message); + return 1; + } + } + + return 0; +} diff --git a/test/test_ipnorm.bats b/test/test_ipnorm.bats new file mode 100755 index 00000000..73d56261 --- /dev/null +++ b/test/test_ipnorm.bats @@ -0,0 +1,34 @@ +#!/usr/bin/env bats +# ------------------------------------------------------------------------ # +# Copyright 2021 SPTK Working Group # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); # +# you may not use this file except in compliance with the License. # +# You may obtain a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +# ------------------------------------------------------------------------ # + +sptk3=tools/sptk/bin +sptk4=bin +tmp=test_ipnorm + +setup() { + mkdir -p $tmp +} + +teardown() { + rm -rf $tmp +} + +@test "ipnorm: valgrind" { + $sptk3/nrand -l 20 > $tmp/1 + run valgrind $sptk4/ipnorm -m 9 $tmp/1 + [ "$(echo "${lines[-1]}" | sed -r 's/.*SUMMARY: ([0-9]*) .*/\1/')" -eq 0 ] +} diff --git a/test/test_pnorm.bats b/test/test_pnorm.bats new file mode 100755 index 00000000..f831df0b --- /dev/null +++ b/test/test_pnorm.bats @@ -0,0 +1,54 @@ +#!/usr/bin/env bats +# ------------------------------------------------------------------------ # +# Copyright 2021 SPTK Working Group # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); # +# you may not use this file except in compliance with the License. # +# You may obtain a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +# ------------------------------------------------------------------------ # + +sptk3=tools/sptk/bin +sptk4=bin +tmp=test_pnorm + +setup() { + mkdir -p $tmp +} + +teardown() { + rm -rf $tmp +} + +@test "pnorm: compatibility" { + $sptk3/nrand -l 20 | + $sptk3/mgc2mgc -m 9 -M 127 -a 0.1 -G 1 -U | + $sptk3/sopr -P | + $sptk3/vsum -t 128 | + $sptk3/sopr -LN > $tmp/1 + $sptk3/nrand -l 20 | + $sptk4/pnorm -m 9 -l 128 -a 0.1 | + $sptk3/bcp +d -n 10 -e 0 > $tmp/2 + run $sptk4/aeq $tmp/1 $tmp/2 + [ "$status" -eq 0 ] +} + +@test "pnorm: reversibility" { + $sptk3/nrand -l 20 > $tmp/1 + $sptk4/pnorm -m 9 $tmp/1 | $sptk4/ipnorm -m 9 > $tmp/2 + run $sptk4/aeq $tmp/1 $tmp/2 + [ "$status" -eq 0 ] +} + +@test "pnorm: valgrind" { + $sptk3/nrand -l 20 > $tmp/1 + run valgrind $sptk4/pnorm -m 9 $tmp/1 + [ "$(echo "${lines[-1]}" | sed -r 's/.*SUMMARY: ([0-9]*) .*/\1/')" -eq 0 ] +} From 5d809e15063c68e28f9ec9a3be09b298565721da Mon Sep 17 00:00:00 2001 From: takenori-y Date: Wed, 8 Nov 2023 17:01:08 +0900 Subject: [PATCH 2/6] multiply 0.5 --- .../conversion/mel_cepstrum_inverse_power_normalization.h | 8 ++++---- .../SPTK/conversion/mel_cepstrum_power_normalization.h | 6 +++--- .../mel_cepstrum_inverse_power_normalization.cc | 4 ++-- src/conversion/mel_cepstrum_power_normalization.cc | 6 +++--- test/test_pnorm.bats | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/include/SPTK/conversion/mel_cepstrum_inverse_power_normalization.h b/include/SPTK/conversion/mel_cepstrum_inverse_power_normalization.h index dcc6ca28..340e2da9 100644 --- a/include/SPTK/conversion/mel_cepstrum_inverse_power_normalization.h +++ b/include/SPTK/conversion/mel_cepstrum_inverse_power_normalization.h @@ -30,11 +30,11 @@ namespace sptk { * coefficients: * @f[ * \begin{array}{ccccc} - * P, & \tilde{c}'(0), & \tilde{c}'(1), & \ldots, & \tilde{c}'(M), + * \log K, & \tilde{c}'(0), & \tilde{c}'(1), & \ldots, & \tilde{c}'(M), * \end{array} * @f] - * where @f$P@f$ is the power and the output is the @f$M@f$-th order - * mel-cepstral coefficients: + * where @f$K@f$ is the square root of power and the output is the @f$M@f$-th + * order mel-cepstral coefficients: * @f[ * \begin{array}{cccc} * \tilde{c}(0), & \tilde{c}(1), & \ldots, & \tilde{c}(M), @@ -43,7 +43,7 @@ namespace sptk { * where * @f[ * \tilde{c}(m) = \left\{ \begin{array}{ll} - * \tilde{c}'(0) + K, & m = 0 \\ + * \tilde{c}'(0) + \log K, & m = 0 \\ * \tilde{c}'(m). & 1 \le m \le M * \end{array} \right. * @f] diff --git a/include/SPTK/conversion/mel_cepstrum_power_normalization.h b/include/SPTK/conversion/mel_cepstrum_power_normalization.h index 0302d99b..95544951 100644 --- a/include/SPTK/conversion/mel_cepstrum_power_normalization.h +++ b/include/SPTK/conversion/mel_cepstrum_power_normalization.h @@ -37,13 +37,13 @@ namespace sptk { * and the output is the @f$(M+1)@f$-th order power-normalized ones: * @f[ * \begin{array}{ccccc} - * P, & \tilde{c}'(0), & \tilde{c}'(1), & \ldots, & \tilde{c}'(M), + * \log K, & \tilde{c}'(0), & \tilde{c}'(1), & \ldots, & \tilde{c}'(M), * \end{array} * @f] - * where @f$P@f$ is the power and + * where @f$K@f$ is the square root of power and * @f[ * \tilde{c}'(m) = \left\{ \begin{array}{ll} - * \tilde{c}(0) - K, & m = 0 \\ + * \tilde{c}(0) - \log K, & m = 0 \\ * \tilde{c}(m). & 1 \le m \le M * \end{array} \right. * @f] diff --git a/src/conversion/mel_cepstrum_inverse_power_normalization.cc b/src/conversion/mel_cepstrum_inverse_power_normalization.cc index 49a7c00e..dfc70f2f 100644 --- a/src/conversion/mel_cepstrum_inverse_power_normalization.cc +++ b/src/conversion/mel_cepstrum_inverse_power_normalization.cc @@ -47,8 +47,8 @@ bool MelCepstrumInversePowerNormalization::Run( } // Convert. - (*mel_cepstrum)[0] = - power_normalized_mel_cepstrum[0] + power_normalized_mel_cepstrum[1]; + const double log_k(power_normalized_mel_cepstrum[0]); + (*mel_cepstrum)[0] = power_normalized_mel_cepstrum[1] + log_k; std::copy(power_normalized_mel_cepstrum.begin() + 2, power_normalized_mel_cepstrum.end(), mel_cepstrum->begin() + 1); diff --git a/src/conversion/mel_cepstrum_power_normalization.cc b/src/conversion/mel_cepstrum_power_normalization.cc index 309891db..38f45806 100644 --- a/src/conversion/mel_cepstrum_power_normalization.cc +++ b/src/conversion/mel_cepstrum_power_normalization.cc @@ -65,9 +65,9 @@ bool MelCepstrumPowerNormalization::Run( } // Convert. - const double power(std::log(buffer->autocorrelation_[0])); - (*power_normalized_mel_cepstrum)[0] = power; - (*power_normalized_mel_cepstrum)[1] = mel_cepstrum[0] - power; + const double log_k(0.5 * std::log(buffer->autocorrelation_[0])); + (*power_normalized_mel_cepstrum)[0] = log_k; + (*power_normalized_mel_cepstrum)[1] = mel_cepstrum[0] - log_k; std::copy(mel_cepstrum.begin() + 1, mel_cepstrum.end(), power_normalized_mel_cepstrum->begin() + 2); diff --git a/test/test_pnorm.bats b/test/test_pnorm.bats index f831df0b..15928065 100755 --- a/test/test_pnorm.bats +++ b/test/test_pnorm.bats @@ -32,7 +32,7 @@ teardown() { $sptk3/mgc2mgc -m 9 -M 127 -a 0.1 -G 1 -U | $sptk3/sopr -P | $sptk3/vsum -t 128 | - $sptk3/sopr -LN > $tmp/1 + $sptk3/sopr -LN -m 0.5 > $tmp/1 $sptk3/nrand -l 20 | $sptk4/pnorm -m 9 -l 128 -a 0.1 | $sptk3/bcp +d -n 10 -e 0 > $tmp/2 From d681e91a4c34b22fa1747ed5f1d12759101e4756 Mon Sep 17 00:00:00 2001 From: takenori-y Date: Thu, 25 Jul 2024 20:33:20 +0900 Subject: [PATCH 3/6] bug fix --- ...el_cepstrum_inverse_power_normalization.cc | 6 ++++ src/main/ipnorm.cc | 4 +-- src/main/mgcep.cc | 34 ++++++++----------- src/main/pnorm.cc | 5 +-- test/test_pnorm.bats | 23 ++++++++++--- 5 files changed, 45 insertions(+), 27 deletions(-) diff --git a/src/conversion/mel_cepstrum_inverse_power_normalization.cc b/src/conversion/mel_cepstrum_inverse_power_normalization.cc index 8168aac0..d657bc91 100644 --- a/src/conversion/mel_cepstrum_inverse_power_normalization.cc +++ b/src/conversion/mel_cepstrum_inverse_power_normalization.cc @@ -55,4 +55,10 @@ bool MelCepstrumInversePowerNormalization::Run( return true; } +bool MelCepstrumInversePowerNormalization::Run( + std::vector* input_and_output, double power) const { + if (NULL == input_and_output) return false; + return Run(*input_and_output, power, input_and_output); +} + } // namespace sptk diff --git a/src/main/ipnorm.cc b/src/main/ipnorm.cc index 162d6682..11b3bfd8 100644 --- a/src/main/ipnorm.cc +++ b/src/main/ipnorm.cc @@ -137,8 +137,8 @@ int main(int argc, char* argv[]) { double power; while (sptk::ReadStream(&power, &input_stream) && - sptk::ReadStream(false, 0, 0, input_length, &mel_cepstrum, - &input_stream, NULL)) { + sptk::ReadStream(false, 0, 0, length, &mel_cepstrum, &input_stream, + NULL)) { if (!mel_cepstrum_inverse_power_normalization.Run(&mel_cepstrum, power)) { std::ostringstream error_message; error_message << "Failed to denormalize mel-cepstrum"; diff --git a/src/main/mgcep.cc b/src/main/mgcep.cc index 7a435191..bbade3d2 100644 --- a/src/main/mgcep.cc +++ b/src/main/mgcep.cc @@ -46,7 +46,6 @@ enum OutputFormats { kGainNormalizedCepstrum, kGainNormalizedMlsaFilterCoefficients, kPowerNormalizedCepstrum, - kPowerNormalizedMlsaFilterCoefficients, kNumOutputFormats }; @@ -84,8 +83,7 @@ void PrintUsage(std::ostream* stream) { *stream << " 1 (mlsa filter coefficients)" << std::endl; *stream << " 2 (gain normalized mel-cepstrum)" << std::endl; *stream << " 3 (gain normalized mlsa filter coefficients)" << std::endl; // NOLINT - *stream << " 4 (power normalized mel-cepstrum)" << std::endl; - *stream << " 5 (power normalized mlsa filter coefficients)" << std::endl; // NOLINT + *stream << " 4 (power + power normalized mel-cepstrum)" << std::endl; // NOLINT *stream << " (level 2)" << std::endl; *stream << " -i i : maximum number of iterations ( int)[" << std::setw(5) << std::right << kDefaultNumIteration << "][ 0 <= i <= ]" << std::endl; // NOLINT *stream << " -d d : convergence threshold (double)[" << std::setw(5) << std::right << kDefaultConvergenceThreshold << "][ 0.0 <= d <= ]" << std::endl; // NOLINT @@ -101,6 +99,7 @@ void PrintUsage(std::ostream* stream) { *stream << " value of l must be a power of 2" << std::endl; *stream << " if c = 0 or g = 0, standard mel-cepstral analyzer is used" << std::endl; // NOLINT *stream << " if c > 0 or g != 0, mel-generalized cepstral analyzer is used" << std::endl; // NOLINT + *stream << " if o = 4, output order is m+1 instead of m" << std::endl; *stream << std::endl; *stream << " SPTK: version " << sptk::kVersion << std::endl; *stream << std::endl; @@ -136,7 +135,6 @@ void PrintUsage(std::ostream* stream) { * \arg @c 2 gain normalized mel-cepstrum * \arg @c 3 gain normalized MLSA filter coefficients * \arg @c 4 power normalized mel-cepstrum - * \arg @c 5 power normalized MLSA filter coefficients * - @b -i @e int * - number of iterations @f$(0 \le J)@f$ * - @b -d @e double @@ -460,6 +458,18 @@ int main(int argc, char* argv[]) { return 1; } + // mc -> b + if (0.0 != alpha && + (kMlsaFilterCoefficients == output_format || + kGainNormalizedMlsaFilterCoefficients == output_format)) { + if (!mel_cepstrum_to_mlsa_digital_filter_coefficients.Run(&output)) { + std::ostringstream error_message; + error_message << "Failed to convert to MLSA filter coefficients"; + sptk::PrintErrorMessage("mgcep", error_message); + return 1; + } + } + // gnorm if (kGainNormalizedCepstrum == output_format || kGainNormalizedMlsaFilterCoefficients == output_format) { @@ -472,8 +482,7 @@ int main(int argc, char* argv[]) { } // pnorm - if (kPowerNormalizedCepstrum == output_format || - kPowerNormalizedMlsaFilterCoefficients == output_format) { + if (kPowerNormalizedCepstrum == output_format) { if (!mel_cepstrum_power_normalization.Run( &output, &power, &buffer_for_power_normalization)) { std::ostringstream error_message; @@ -489,19 +498,6 @@ int main(int argc, char* argv[]) { } } - // mc -> b - if (0.0 != alpha && - (kMlsaFilterCoefficients == output_format || - kGainNormalizedMlsaFilterCoefficients == output_format || - kPowerNormalizedMlsaFilterCoefficients == output_format)) { - if (!mel_cepstrum_to_mlsa_digital_filter_coefficients.Run(&output)) { - std::ostringstream error_message; - error_message << "Failed to convert to MLSA filter coefficients"; - sptk::PrintErrorMessage("mgcep", error_message); - return 1; - } - } - if (!sptk::WriteStream(0, output_length, output, &std::cout, NULL)) { std::ostringstream error_message; error_message << "Failed to write mel-generalized cepstrum"; diff --git a/src/main/pnorm.cc b/src/main/pnorm.cc index e4ff7b06..12338844 100644 --- a/src/main/pnorm.cc +++ b/src/main/pnorm.cc @@ -174,8 +174,8 @@ int main(int argc, char* argv[]) { std::vector mel_cepstrum(length); double power; - while (sptk::ReadStream(false, 0, 0, input_length, &mel_cepstrum, - &input_stream, NULL)) { + while (sptk::ReadStream(false, 0, 0, length, &mel_cepstrum, &input_stream, + NULL)) { if (!mel_cepstrum_power_normalization.Run(&mel_cepstrum, &power, &buffer)) { std::ostringstream error_message; error_message << "Failed to normalize mel-cepstrum"; @@ -189,6 +189,7 @@ int main(int argc, char* argv[]) { sptk::PrintErrorMessage("pnorm", error_message); return 1; } + if (!sptk::WriteStream(0, length, mel_cepstrum, &std::cout, NULL)) { std::ostringstream error_message; error_message << "Failed to write power-normalized mel-cepstrum"; diff --git a/test/test_pnorm.bats b/test/test_pnorm.bats index 15928065..af8a7dc6 100755 --- a/test/test_pnorm.bats +++ b/test/test_pnorm.bats @@ -28,15 +28,30 @@ teardown() { } @test "pnorm: compatibility" { + # log P $sptk3/nrand -l 20 | $sptk3/mgc2mgc -m 9 -M 127 -a 0.1 -G 1 -U | $sptk3/sopr -P | $sptk3/vsum -t 128 | - $sptk3/sopr -LN -m 0.5 > $tmp/1 + $sptk3/sopr -LN > $tmp/1 + # log sqrt(P) + $sptk3/sopr $tmp/1 -m 0.5 > $tmp/2 + # b0 $sptk3/nrand -l 20 | - $sptk4/pnorm -m 9 -l 128 -a 0.1 | - $sptk3/bcp +d -n 10 -e 0 > $tmp/2 - run $sptk4/aeq $tmp/1 $tmp/2 + $sptk3/mc2b -m 9 -a 0.1 | + $sptk3/bcp +d -n 9 -s 0 -e 0 | + $sptk3/vopr -s $tmp/2 > $tmp/3 + # mc + $sptk3/nrand -l 20 | + $sptk3/mc2b -m 9 -a 0.1 | + $sptk3/bcp +d -n 9 -s 1 | + $sptk4/merge $tmp/3 -m 8 -M 0 -s 0 | + $sptk3/b2mc -m 9 -a 0.1 | + $sptk4/merge $tmp/1 -m 9 -M 0 -s 0 > $tmp/4 + # mc + $sptk3/nrand -l 20 | + $sptk4/pnorm -m 9 -l 128 -a 0.1 > $tmp/5 + run $sptk4/aeq $tmp/4 $tmp/5 [ "$status" -eq 0 ] } From b28e64a6b84b5b7ad611abfb1025487f9857e10b Mon Sep 17 00:00:00 2001 From: takenori-y Date: Thu, 25 Jul 2024 20:41:47 +0900 Subject: [PATCH 4/6] fix doc --- README.md | 12 ++---------- src/main/mgcep.cc | 2 +- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 2961e07b..f838ad77 100644 --- a/README.md +++ b/README.md @@ -122,25 +122,17 @@ deactivate - Some option names - No memory leaks - Thread-safe -- New features: +- New main features: - Aperiodicity extraction (`ap`) - - Conversion from/to log area ratio (`lar2par` and `par2lar`) - Dynamic range compression (`drc`) - - Entropy calculation (`entropy`) - - Huffman coding (`huffman`, `huffman_encode`, and `huffman_decode`) - Magic number interpolation (`magic_intpl`) - Median filter (`medfilt`) - - Mel-cepstrum postfilter (`mcpf`) - Mel-filter-bank extraction (`fbank`) - Nonrecursive MLPG (`mlpg -R 1`) - Pitch adaptive spectrum estimation (`pitch_spec`) - - Pitch extraction by DIO used in WORLD (`pitch -a 3`) + - Pitch extraction used in WORLD (`pitch -a 3` and `pitch -a 4`) - PLP extraction (`plp`) - - Pole-zero plot (`gpolezero`) - - Scalar quantization (`quantize` and `dequantize`) - Sinusoidal generation from pitch (`pitch2sin`) - - Spectrogram plot (`gspecgram`) - - Stability check of LPC coefficients (`lpccheck`) - Subband decomposition (`pqmf` and `ipqmf`) - WORLD synthesis (`world_synth`) - Windows build support diff --git a/src/main/mgcep.cc b/src/main/mgcep.cc index bbade3d2..49c5d634 100644 --- a/src/main/mgcep.cc +++ b/src/main/mgcep.cc @@ -83,7 +83,7 @@ void PrintUsage(std::ostream* stream) { *stream << " 1 (mlsa filter coefficients)" << std::endl; *stream << " 2 (gain normalized mel-cepstrum)" << std::endl; *stream << " 3 (gain normalized mlsa filter coefficients)" << std::endl; // NOLINT - *stream << " 4 (power + power normalized mel-cepstrum)" << std::endl; // NOLINT + *stream << " 4 (log power + power normalized mel-cepstrum)" << std::endl; // NOLINT *stream << " (level 2)" << std::endl; *stream << " -i i : maximum number of iterations ( int)[" << std::setw(5) << std::right << kDefaultNumIteration << "][ 0 <= i <= ]" << std::endl; // NOLINT *stream << " -d d : convergence threshold (double)[" << std::setw(5) << std::right << kDefaultConvergenceThreshold << "][ 0.0 <= d <= ]" << std::endl; // NOLINT From ba1764d00383eadcbcef514e959d3ad863fb8bf0 Mon Sep 17 00:00:00 2001 From: takenori-y Date: Thu, 25 Jul 2024 21:06:33 +0900 Subject: [PATCH 5/6] mitigate arg range --- src/main/mcpf.cc | 20 +++++++------------- src/main/mgcep.cc | 8 +++++--- src/main/pnorm.cc | 20 +++++++------------- src/postfilter/mel_cepstrum_postfilter.cc | 3 +-- 4 files changed, 20 insertions(+), 31 deletions(-) diff --git a/src/main/mcpf.cc b/src/main/mcpf.cc index 8a0a3876..4ea280e9 100644 --- a/src/main/mcpf.cc +++ b/src/main/mcpf.cc @@ -40,7 +40,7 @@ void PrintUsage(std::ostream* stream) { *stream << " usage:" << std::endl; *stream << " mcpf [ options ] [ infile ] > stdout" << std::endl; *stream << " options:" << std::endl; - *stream << " -m m : order of mel-cepstrum ( int)[" << std::setw(5) << std::right << kDefaultNumOrder << "][ 0 <= m < l ]" << std::endl; // NOLINT + *stream << " -m m : order of mel-cepstrum ( int)[" << std::setw(5) << std::right << kDefaultNumOrder << "][ 0 <= m < ]" << std::endl; // NOLINT *stream << " -l l : length of impulse response ( int)[" << std::setw(5) << std::right << kDefaultImpulseResponseLength << "][ 2 <= l <= ]" << std::endl; // NOLINT *stream << " -s s : onset index ( int)[" << std::setw(5) << std::right << kDefaultOnsetIndex << "][ 0 <= s <= m ]" << std::endl; // NOLINT *stream << " -a a : all-pass constant (double)[" << std::setw(5) << std::right << kDefaultAlpha << "][ -1.0 < a < 1.0 ]" << std::endl; // NOLINT @@ -64,9 +64,9 @@ void PrintUsage(std::ostream* stream) { * @a mcpf [ @e option ] [ @e infile ] * * - @b -m @e int - * - order of mel-cepstral coefficients @f$(0 \le M < L)@f$ + * - order of mel-cepstral coefficients @f$(0 \le M)@f$ * - @b -l @e int - * - length of impulse response @f$(M < L)@f$ + * - length of impulse response * - @b -s @e int * - onset index @f$(0 \le S \le M)@f$ * - @b -a @e double @@ -106,9 +106,11 @@ int main(int argc, char* argv[]) { break; } case 'l': { - if (!sptk::ConvertStringToInteger(optarg, &impulse_response_length)) { + if (!sptk::ConvertStringToInteger(optarg, &impulse_response_length) || + impulse_response_length <= 0) { std::ostringstream error_message; - error_message << "The argument for the -l option must be an integer"; + error_message + << "The argument for the -l option must be a positive integer"; sptk::PrintErrorMessage("mcpf", error_message); return 1; } @@ -156,14 +158,6 @@ int main(int argc, char* argv[]) { } } - if (impulse_response_length <= num_order) { - std::ostringstream error_message; - error_message - << "Order of mel-cepstrum must be less than length of impulse response"; - sptk::PrintErrorMessage("mcpf", error_message); - return 1; - } - if (num_order < onset_index) { std::ostringstream error_message; error_message << "Order of mel-cepstrum must be greater than onset index"; diff --git a/src/main/mgcep.cc b/src/main/mgcep.cc index 49c5d634..ea60cea8 100644 --- a/src/main/mgcep.cc +++ b/src/main/mgcep.cc @@ -144,7 +144,7 @@ void PrintUsage(std::ostream* stream) { * - @b -E @e double * - relative floor in decibels * - @b -n @e int - * - length of impulse response (valid only for @c -o 4 and @c -o 5) + * - length of impulse response (valid only for @c -o 4) * - @b infile @e str * - double-type windowed sequence or spectrum * - @b stdout @@ -313,9 +313,11 @@ int main(int argc, char* argv[]) { break; } case 'n': { - if (!sptk::ConvertStringToInteger(optarg, &impulse_response_length)) { + if (!sptk::ConvertStringToInteger(optarg, &impulse_response_length) || + impulse_response_length <= 0) { std::ostringstream error_message; - error_message << "The argument for the -n option must be an integer"; + error_message + << "The argument for the -n option must be a positive integer"; sptk::PrintErrorMessage("mgcep", error_message); return 1; } diff --git a/src/main/pnorm.cc b/src/main/pnorm.cc index 12338844..f0f4b88a 100644 --- a/src/main/pnorm.cc +++ b/src/main/pnorm.cc @@ -38,7 +38,7 @@ void PrintUsage(std::ostream* stream) { *stream << " usage:" << std::endl; *stream << " pnorm [ options ] [ infile ] > stdout" << std::endl; *stream << " options:" << std::endl; - *stream << " -m m : order of mel-cepstrum ( int)[" << std::setw(5) << std::right << kDefaultNumOrder << "][ 0 <= m < l ]" << std::endl; // NOLINT + *stream << " -m m : order of mel-cepstrum ( int)[" << std::setw(5) << std::right << kDefaultNumOrder << "][ 0 <= m < ]" << std::endl; // NOLINT *stream << " -l l : length of impulse response ( int)[" << std::setw(5) << std::right << kDefaultImpulseResponseLength << "][ 2 <= l <= ]" << std::endl; // NOLINT *stream << " -a a : all-pass constant (double)[" << std::setw(5) << std::right << kDefaultAlpha << "][ -1.0 < a < 1.0 ]" << std::endl; // NOLINT *stream << " -h : print this message" << std::endl; @@ -58,9 +58,9 @@ void PrintUsage(std::ostream* stream) { * @a pnorm [ @e option ] [ @e infile ] * * - @b -m @e int - * - order of mel-cepstral coefficients @f$(0 \le M < L)@f$ + * - order of mel-cepstral coefficients @f$(0 \le M)@f$ * - @b -l @e int - * - length of impulse response @f$(M < L)@f$ + * - length of impulse response * - @b -a @e double * - alpha @f$(|\alpha|<1)@f$ * - @b infile @e str @@ -94,9 +94,11 @@ int main(int argc, char* argv[]) { break; } case 'l': { - if (!sptk::ConvertStringToInteger(optarg, &impulse_response_length)) { + if (!sptk::ConvertStringToInteger(optarg, &impulse_response_length) || + impulse_response_length <= 0) { std::ostringstream error_message; - error_message << "The argument for the -l option must be an integer"; + error_message + << "The argument for the -l option must be a positive integer"; sptk::PrintErrorMessage("mcpf", error_message); return 1; } @@ -124,14 +126,6 @@ int main(int argc, char* argv[]) { } } - if (impulse_response_length <= num_order) { - std::ostringstream error_message; - error_message - << "Order of mel-cepstrum must be less than length of impulse response"; - sptk::PrintErrorMessage("pnorm", error_message); - return 1; - } - const int num_input_files(argc - optind); if (1 < num_input_files) { std::ostringstream error_message; diff --git a/src/postfilter/mel_cepstrum_postfilter.cc b/src/postfilter/mel_cepstrum_postfilter.cc index 21033033..a7e086db 100644 --- a/src/postfilter/mel_cepstrum_postfilter.cc +++ b/src/postfilter/mel_cepstrum_postfilter.cc @@ -34,8 +34,7 @@ MelCepstrumPostfilter::MelCepstrumPostfilter(int num_order, mel_cepstrum_to_mlsa_digital_filter_coefficients_(num_order, alpha), mlsa_digital_filter_coefficients_to_mel_cepstrum_(num_order, alpha), is_valid_(true) { - if (!sptk::IsInRange(num_order, 0, impulse_response_length - 1) || - !sptk::IsInRange(onset_index_, 0, num_order) || + if (!sptk::IsInRange(onset_index_, 0, num_order) || !frequency_transform_.IsValid() || !cepstrum_to_autocorrelation_.IsValid() || !mel_cepstrum_to_mlsa_digital_filter_coefficients_.IsValid() || From bb09af401561e15533feb72fabd925e2f598a9fa Mon Sep 17 00:00:00 2001 From: takenori-y Date: Thu, 25 Jul 2024 21:29:03 +0900 Subject: [PATCH 6/6] fix doc [skip ci] --- src/main/mcpf.cc | 2 +- src/main/pnorm.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/mcpf.cc b/src/main/mcpf.cc index 4ea280e9..f62cda01 100644 --- a/src/main/mcpf.cc +++ b/src/main/mcpf.cc @@ -66,7 +66,7 @@ void PrintUsage(std::ostream* stream) { * - @b -m @e int * - order of mel-cepstral coefficients @f$(0 \le M)@f$ * - @b -l @e int - * - length of impulse response + * - length of impulse response @f$(2 \le L)@f$ * - @b -s @e int * - onset index @f$(0 \le S \le M)@f$ * - @b -a @e double diff --git a/src/main/pnorm.cc b/src/main/pnorm.cc index f0f4b88a..878d6069 100644 --- a/src/main/pnorm.cc +++ b/src/main/pnorm.cc @@ -60,7 +60,7 @@ void PrintUsage(std::ostream* stream) { * - @b -m @e int * - order of mel-cepstral coefficients @f$(0 \le M)@f$ * - @b -l @e int - * - length of impulse response + * - length of impulse response @f$(2 \le L)@f$ * - @b -a @e double * - alpha @f$(|\alpha|<1)@f$ * - @b infile @e str