Merge pull request #65 from sp-nitech/pnorm

Add pnorm
sp-nitech · Jul 25, 2024 · 9c6cb74 · 9c6cb74
2 parents 36cf677 + bb09af4
commit 9c6cb74
Show file tree

Hide file tree

Showing 16 changed files with 944 additions and 44 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -109,6 +109,8 @@ set(CC_SOURCES
     ${SOURCE_DIR}/conversion/linear_predictive_coefficients_to_line_spectral_pairs.cc
     ${SOURCE_DIR}/conversion/linear_predictive_coefficients_to_parcor_coefficients.cc
     ${SOURCE_DIR}/conversion/log_area_ratio_to_parcor_coefficients.cc
+    ${SOURCE_DIR}/conversion/mel_cepstrum_inverse_power_normalization.cc
+    ${SOURCE_DIR}/conversion/mel_cepstrum_power_normalization.cc
     ${SOURCE_DIR}/conversion/mel_cepstrum_to_mlsa_digital_filter_coefficients.cc
     ${SOURCE_DIR}/conversion/mel_generalized_cepstrum_to_mel_generalized_cepstrum.cc
     ${SOURCE_DIR}/conversion/mel_generalized_cepstrum_to_spectrum.cc
@@ -298,6 +300,7 @@ set(MAIN_SOURCES
     ${SOURCE_DIR}/main/impulse.cc
     ${SOURCE_DIR}/main/imsvq.cc
     ${SOURCE_DIR}/main/interpolate.cc
+    ${SOURCE_DIR}/main/ipnorm.cc
     ${SOURCE_DIR}/main/ipqmf.cc
     ${SOURCE_DIR}/main/iulaw.cc
     ${SOURCE_DIR}/main/lar2par.cc
@@ -346,6 +349,7 @@ set(MAIN_SOURCES
     ${SOURCE_DIR}/main/pitch2sin.cc
     ${SOURCE_DIR}/main/pitch_mark.cc
     ${SOURCE_DIR}/main/pitch_spec.cc
+    ${SOURCE_DIR}/main/pnorm.cc
     ${SOURCE_DIR}/main/poledf.cc
     ${SOURCE_DIR}/main/pqmf.cc
     ${SOURCE_DIR}/main/quantize.cc

diff --git a/README.md b/README.md
@@ -122,25 +122,17 @@ deactivate
 - Some option names
 - No memory leaks
 - Thread-safe
-- New features:
+- New main features:
   - Aperiodicity extraction (`ap`)
-  - Conversion from/to log area ratio (`lar2par` and `par2lar`)
   - Dynamic range compression (`drc`)
-  - Entropy calculation (`entropy`)
-  - Huffman coding (`huffman`, `huffman_encode`, and `huffman_decode`)
   - Magic number interpolation (`magic_intpl`)
   - Median filter (`medfilt`)
-  - Mel-cepstrum postfilter (`mcpf`)
   - Mel-filter-bank extraction (`fbank`)
   - Nonrecursive MLPG (`mlpg -R 1`)
   - Pitch adaptive spectrum estimation (`pitch_spec`)
-  - Pitch extraction by DIO used in WORLD (`pitch -a 3`)
+  - Pitch extraction used in WORLD (`pitch -a 3` and `pitch -a 4`)
   - PLP extraction (`plp`)
-  - Pole-zero plot (`gpolezero`)
-  - Scalar quantization (`quantize` and `dequantize`)
   - Sinusoidal generation from pitch (`pitch2sin`)
-  - Spectrogram plot (`gspecgram`)
-  - Stability check of LPC coefficients (`lpccheck`)
   - Subband decomposition (`pqmf` and `ipqmf`)
   - WORLD synthesis (`world_synth`)
   - Windows build support

diff --git a/doc/main/ipnorm.rst b/doc/main/ipnorm.rst
@@ -0,0 +1,11 @@
+.. _ipnorm:
+
+ipnorm
+======
+
+.. doxygenfile:: ipnorm.cc
+
+.. seealso:: :ref:`pnorm`
+
+.. doxygenclass:: sptk::MelCepstrumInversePowerNormalization
+   :members:
diff --git a/doc/main/pnorm.rst b/doc/main/pnorm.rst
@@ -0,0 +1,11 @@
+.. _pnorm:
+
+pnorm
+=====
+
+.. doxygenfile:: pnorm.cc
+
+.. seealso:: :ref:`ipnorm`
+
+.. doxygenclass:: sptk::MelCepstrumPowerNormalization
+   :members:
diff --git a/include/SPTK/conversion/mel_cepstrum_inverse_power_normalization.h b/include/SPTK/conversion/mel_cepstrum_inverse_power_normalization.h
@@ -0,0 +1,102 @@
+// ------------------------------------------------------------------------ //
+// Copyright 2021 SPTK Working Group                                        //
+//                                                                          //
+// Licensed under the Apache License, Version 2.0 (the "License");          //
+// you may not use this file except in compliance with the License.         //
+// You may obtain a copy of the License at                                  //
+//                                                                          //
+//     http://www.apache.org/licenses/LICENSE-2.0                           //
+//                                                                          //
+// Unless required by applicable law or agreed to in writing, software      //
+// distributed under the License is distributed on an "AS IS" BASIS,        //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and      //
+// limitations under the License.                                           //
+// ------------------------------------------------------------------------ //
+
+#ifndef SPTK_CONVERSION_MEL_CEPSTRUM_INVERSE_POWER_NORMALIZATION_H_
+#define SPTK_CONVERSION_MEL_CEPSTRUM_INVERSE_POWER_NORMALIZATION_H_
+
+#include <vector>  // std::vector
+
+#include "SPTK/utils/sptk_utils.h"
+
+namespace sptk {
+
+/**
+ * Convert power-normalized mel-cepstral coefficients to mel-cepstral ones.
+ *
+ * The input are the @f$M@f$-th order power-normalized mel-cepstral
+ * coefficients:
+ * @f[
+ *   \begin{array}{ccccc}
+ *     \tilde{c}'(0), & \tilde{c}'(1), & \ldots, & \tilde{c}'(M),
+ *   \end{array}
+ * @f]
+ * and @f$\log P@f$ where @f$P@f$ is the power and the output is the @f$M@f$-th
+ * order mel-cepstral coefficients:
+ * @f[
+ *   \begin{array}{cccc}
+ *     \tilde{c}(0), & \tilde{c}(1), & \ldots, & \tilde{c}(M),
+ *   \end{array}
+ * @f]
+ * where
+ * @f[
+ *   \tilde{c}(m) = \left\{ \begin{array}{ll}
+ *     \tilde{c}'(0) + \log \sqrt{P}, & m = 0 \\
+ *     \tilde{c}'(m). & 1 \le m \le M
+ *   \end{array} \right.
+ * @f]
+ */
+class MelCepstrumInversePowerNormalization {
+ public:
+  /**
+   * @param[in] num_order Order of coefficients, @f$M@f$.
+   */
+  explicit MelCepstrumInversePowerNormalization(int num_order);
+
+  virtual ~MelCepstrumInversePowerNormalization() {
+  }
+
+  /**
+   * @return Order of coefficients.
+   */
+  int GetNumOrder() const {
+    return num_order_;
+  }
+
+  /**
+   * @return True if this object is valid.
+   */
+  bool IsValid() const {
+    return is_valid_;
+  }
+
+  /**
+   * @param[in] power_normalized_mel_cepstrum @f$M@f$-th order
+   *            power-normalized mel-cepstral coefficients.
+   * @param[in] power Logarithm of power.
+   * @param[out] mel_cepstrum @f$M@f$-th order mel-cepstral coefficients.
+   * @return True on success, false on failure.
+   */
+  bool Run(const std::vector<double>& power_normalized_mel_cepstrum,
+           double power, std::vector<double>* mel_cepstrum) const;
+
+  /**
+   * @param[in,out] input_and_output @f$M@f$-th order coefficients.
+   * @param[in] power Logarithm of power.
+   * @return True on success, false on failure.
+   */
+  bool Run(std::vector<double>* input_and_output, double power) const;
+
+ private:
+  const int num_order_;
+
+  bool is_valid_;
+
+  DISALLOW_COPY_AND_ASSIGN(MelCepstrumInversePowerNormalization);
+};
+
+}  // namespace sptk
+
+#endif  // SPTK_CONVERSION_MEL_CEPSTRUM_INVERSE_POWER_NORMALIZATION_H_
diff --git a/include/SPTK/conversion/mel_cepstrum_power_normalization.h b/include/SPTK/conversion/mel_cepstrum_power_normalization.h
@@ -0,0 +1,129 @@
+// ------------------------------------------------------------------------ //
+// Copyright 2021 SPTK Working Group                                        //
+//                                                                          //
+// Licensed under the Apache License, Version 2.0 (the "License");          //
+// you may not use this file except in compliance with the License.         //
+// You may obtain a copy of the License at                                  //
+//                                                                          //
+//     http://www.apache.org/licenses/LICENSE-2.0                           //
+//                                                                          //
+// Unless required by applicable law or agreed to in writing, software      //
+// distributed under the License is distributed on an "AS IS" BASIS,        //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and      //
+// limitations under the License.                                           //
+// ------------------------------------------------------------------------ //
+
+#ifndef SPTK_CONVERSION_MEL_CEPSTRUM_POWER_NORMALIZATION_H_
+#define SPTK_CONVERSION_MEL_CEPSTRUM_POWER_NORMALIZATION_H_
+
+#include <vector>  // std::vector
+
+#include "SPTK/conversion/cepstrum_to_autocorrelation.h"
+#include "SPTK/math/frequency_transform.h"
+#include "SPTK/utils/sptk_utils.h"
+
+namespace sptk {
+
+/**
+ * Convert mel-cepstral coefficients to power-normalized ones.
+ *
+ * The input is the @f$M@f$-th order mel-cepstral coefficients:
+ * @f[
+ *   \begin{array}{cccc}
+ *     \tilde{c}(0), & \tilde{c}(1), & \ldots, & \tilde{c}(M),
+ *   \end{array}
+ * @f]
+ * and the output are the @f$M@f$-th order power-normalized ones:
+ * @f[
+ *   \begin{array}{ccccc}
+ *     \tilde{c}'(0), & \tilde{c}'(1), & \ldots, & \tilde{c}'(M),
+ *   \end{array}
+ * @f]
+ * and @f$\log P@f$ where @f$P@f$ is the power and
+ * @f[
+ *   \tilde{c}'(m) = \left\{ \begin{array}{ll}
+ *     \tilde{c}(0) - \log \sqrt{P}, & m = 0 \\
+ *     \tilde{c}(m). & 1 \le m \le M
+ *   \end{array} \right.
+ * @f]
+ */
+class MelCepstrumPowerNormalization {
+ public:
+  /**
+   * Buffer for MelCepstrumPowerNormalization class.
+   */
+  class Buffer {
+   public:
+    Buffer() {
+    }
+
+    virtual ~Buffer() {
+    }
+
+   private:
+    std::vector<double> cepstrum_;
+    std::vector<double> autocorrelation_;
+    FrequencyTransform::Buffer buffer_for_frequency_transform_;
+    CepstrumToAutocorrelation::Buffer buffer_for_cepstrum_to_autocorrelation_;
+
+    friend class MelCepstrumPowerNormalization;
+    DISALLOW_COPY_AND_ASSIGN(Buffer);
+  };
+
+  /**
+   * @param[in] num_order Order of coefficients, @f$M@f$.
+   * @param[in] impulse_response_length Length of impuse response.
+   * @param[in] alpha All-pass constant, @f$\alpha@f$.
+   */
+  MelCepstrumPowerNormalization(int num_order, int impulse_response_length,
+                                double alpha);
+
+  virtual ~MelCepstrumPowerNormalization() {
+  }
+
+  /**
+   * @return Order of coefficients.
+   */
+  int GetNumOrder() const {
+    return frequency_transform_.GetNumInputOrder();
+  }
+
+  /**
+   * @return True if this object is valid.
+   */
+  bool IsValid() const {
+    return is_valid_;
+  }
+
+  /**
+   * @param[in] mel_cepstrum @f$M@f$-th order mel-cepstral coefficients.
+   * @param[out] power_normalized_mel_cepstrum @f$M@f$-th order
+   *             power-normalized mel-cepstral coefficients.
+   * @param[out] power Logarithm of power.
+   * @return True on success, false on failure.
+   */
+  bool Run(const std::vector<double>& mel_cepstrum,
+           std::vector<double>* power_normalized_mel_cepstrum, double* power,
+           MelCepstrumPowerNormalization::Buffer* buffer) const;
+
+  /**
+   * @param[in,out] input_and_output @f$M@f$-th order coefficients.
+   * @param[out] power Logarithm of power.
+   * @return True on success, false on failure.
+   */
+  bool Run(std::vector<double>* input_and_output, double* power,
+           MelCepstrumPowerNormalization::Buffer* buffer) const;
+
+ private:
+  const FrequencyTransform frequency_transform_;
+  const CepstrumToAutocorrelation cepstrum_to_autocorrelation_;
+
+  bool is_valid_;
+
+  DISALLOW_COPY_AND_ASSIGN(MelCepstrumPowerNormalization);
+};
+
+}  // namespace sptk
+
+#endif  // SPTK_CONVERSION_MEL_CEPSTRUM_POWER_NORMALIZATION_H_
diff --git a/src/conversion/mel_cepstrum_inverse_power_normalization.cc b/src/conversion/mel_cepstrum_inverse_power_normalization.cc
@@ -0,0 +1,64 @@
+// ------------------------------------------------------------------------ //
+// Copyright 2021 SPTK Working Group                                        //
+//                                                                          //
+// Licensed under the Apache License, Version 2.0 (the "License");          //
+// you may not use this file except in compliance with the License.         //
+// You may obtain a copy of the License at                                  //
+//                                                                          //
+//     http://www.apache.org/licenses/LICENSE-2.0                           //
+//                                                                          //
+// Unless required by applicable law or agreed to in writing, software      //
+// distributed under the License is distributed on an "AS IS" BASIS,        //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and      //
+// limitations under the License.                                           //
+// ------------------------------------------------------------------------ //
+
+#include "SPTK/conversion/mel_cepstrum_inverse_power_normalization.h"
+
+#include <algorithm>  // std::copy
+#include <cstddef>    // std::size_t
+
+namespace sptk {
+
+MelCepstrumInversePowerNormalization::MelCepstrumInversePowerNormalization(
+    int num_order)
+    : num_order_(num_order), is_valid_(true) {
+  if (num_order_ < 0) {
+    is_valid_ = false;
+    return;
+  }
+}
+
+bool MelCepstrumInversePowerNormalization::Run(
+    const std::vector<double>& power_normalized_mel_cepstrum, double power,
+    std::vector<double>* mel_cepstrum) const {
+  // Check inputs.
+  const int length(GetNumOrder() + 1);
+  if (!is_valid_ ||
+      power_normalized_mel_cepstrum.size() !=
+          static_cast<std::size_t>(length) ||
+      NULL == mel_cepstrum) {
+    return false;
+  }
+
+  // Prepare memories.
+  if (mel_cepstrum->size() != static_cast<std::size_t>(length)) {
+    mel_cepstrum->resize(length);
+  }
+
+  // Convert.
+  (*mel_cepstrum)[0] = power_normalized_mel_cepstrum[0] + 0.5 * power;
+  std::copy(power_normalized_mel_cepstrum.begin() + 1,
+            power_normalized_mel_cepstrum.end(), mel_cepstrum->begin() + 1);
+
+  return true;
+}
+
+bool MelCepstrumInversePowerNormalization::Run(
+    std::vector<double>* input_and_output, double power) const {
+  if (NULL == input_and_output) return false;
+  return Run(*input_and_output, power, input_and_output);
+}
+
+}  // namespace sptk