ProtoTracer  1.0
Real-time 3D rendering and animation engine
Loading...
Searching...
No Matches
FFTVoiceDetection.h
Go to the documentation of this file.
1/**
2 * @file FFTVoiceDetection.h
3 * @brief Declares the FFTVoiceDetection template class for real-time viseme detection based on FFT data.
4 *
5 * This file defines the FFTVoiceDetection class, which extends the Viseme class to provide functionality
6 * for detecting mouth shapes (visemes) based on formant frequencies extracted from FFT analysis of voice signals.
7 *
8 * @date 22/12/2024
9 * @author Coela Can't
10 */
11
12#pragma once
13
14#include "../../../../Utils/Filter/RunningAverageFilter.h" // Include for smoothing peaks.
15#include "../../../../Utils/Filter/PeakDetection.h" // Include for peak detection in FFT data.
16#include "../../../../Renderer/Utils/IndexGroup.h" // Include for utility structures.
17#include "../../../../Renderer/Utils/Triangle2D.h" // Include for 2D triangle utilities.
18#include "../../../../Utils/Math/Vector2D.h" // Include for 2D vector utilities.
19
20/**
21 * @class Viseme
22 * @brief Defines the available mouth shapes (visemes).
23 */
24class Viseme {
25public:
26 /**
27 * @enum MouthShape
28 * @brief Enumerates the possible mouth shapes for viseme detection.
29 */
31 EE, ///< Mouth shape corresponding to the "EE" sound.
32 AE, ///< Mouth shape corresponding to the "AE" sound.
33 UH, ///< Mouth shape corresponding to the "UH" sound.
34 AR, ///< Mouth shape corresponding to the "AR" sound.
35 ER, ///< Mouth shape corresponding to the "ER" sound.
36 AH, ///< Mouth shape corresponding to the "AH" sound.
37 OO, ///< Mouth shape corresponding to the "OO" sound.
38 SS ///< Mouth shape corresponding to the "SS" sound (optional).
39 };
40};
41
42/**
43 * @class FFTVoiceDetection
44 * @brief Detects visemes based on FFT voice analysis.
45 *
46 * The FFTVoiceDetection class uses formant frequencies (F1 and F2) derived from FFT peaks
47 * to detect and assign probabilities to various mouth shapes (visemes). It employs peak detection,
48 * smoothing filters, and threshold-based calculations to determine the most probable viseme.
49 *
50 * @tparam peakCount The number of peaks to analyze in the FFT data.
51 */
52template <size_t peakCount>
53class FFTVoiceDetection : public Viseme {
54private:
55 static const uint8_t visemeCount = 7; ///< Number of supported visemes.
56
57 // Formant frequency coordinates for each viseme.
58 Vector2D visEE = Vector2D(350.0f, 3200.0f); ///< Coordinates for "EE".
59 Vector2D visAE = Vector2D(500.0f, 2700.0f); ///< Coordinates for "AE".
60 Vector2D visUH = Vector2D(1100.0f, 2700.0f); ///< Coordinates for "UH".
61 Vector2D visAR = Vector2D(850.0f, 850.0f); ///< Coordinates for "AR".
62 Vector2D visER = Vector2D(1000.0f, 1000.0f); ///< Coordinates for "ER".
63 Vector2D visAH = Vector2D(900.0f, 2400.0f); ///< Coordinates for "AH".
64 Vector2D visOO = Vector2D(600.0f, 600.0f); ///< Coordinates for "OO".
65
66 Vector2D* coordinates[visemeCount] = { &visEE, &visAE, &visUH, &visAR, &visER, &visAH, &visOO }; ///< Array of viseme coordinates.
67
68 // Viseme probabilities.
69 float visRatioEE = 0.0f; ///< Probability for "EE".
70 float visRatioAE = 0.0f; ///< Probability for "AE".
71 float visRatioUH = 0.0f; ///< Probability for "UH".
72 float visRatioAR = 0.0f; ///< Probability for "AR".
73 float visRatioER = 0.0f; ///< Probability for "ER".
74 float visRatioAH = 0.0f; ///< Probability for "AH".
75 float visRatioOO = 0.0f; ///< Probability for "OO".
76
77 float* visRatios[visemeCount] = { &visRatioEE, &visRatioAE, &visRatioUH, &visRatioAR, &visRatioER, &visRatioAH, &visRatioOO }; ///< Array of viseme probabilities.
78
79 PeakDetection<peakCount> peakDetection = PeakDetection<peakCount>(8, 2.0f, 0.5f); ///< Peak detection instance.
80 RunningAverageFilter<10> peakSmoothing = RunningAverageFilter<10>(0.1f); ///< Smoothing filter for peak data.
81
82 bool peaksBinary[peakCount]; ///< Binary array indicating peak presence.
83 float peakDensity[peakCount]; ///< Array of peak densities.
84
85 float f1; ///< Formant frequency F1.
86 float f2; ///< Formant frequency F2.
87
88 float threshold = 400.0f; ///< Threshold for formant calculations.
89
90 /**
91 * @brief Calculates formant frequencies (F1 and F2) from FFT peaks.
92 *
93 * @param peaks Array of FFT peak values.
94 * @param bandwidth Bandwidth of the FFT data.
95 */
96 void CalculateFormants(float* peaks, uint8_t bandwidth);
97
98 /**
99 * @brief Calculates the viseme group probabilities based on formants.
100 */
102
103public:
104 /**
105 * @brief Constructs a new FFTVoiceDetection instance.
106 */
108
109 /**
110 * @brief Sets the threshold for formant calculations.
111 *
112 * @param threshold The new threshold value.
113 */
115
116 /**
117 * @brief Retrieves the probability of a specific viseme.
118 *
119 * @param viseme The viseme to query.
120 * @return The probability of the specified viseme (0.0 - 1.0).
121 */
122 float GetViseme(MouthShape viseme);
123
124 /**
125 * @brief Prints the probabilities of all visemes to the serial console.
126 */
128
129 /**
130 * @brief Resets all viseme probabilities to zero.
131 */
133
134 /**
135 * @brief Updates the viseme probabilities based on new FFT data.
136 *
137 * @param peaks Array of FFT peak values.
138 * @param maxFrequency Maximum frequency represented in the FFT data.
139 */
140 void Update(float* peaks, float maxFrequency);
141};
142
143#include "FFTVoiceDetection.tpp" // Include the template implementation.
Detects visemes based on FFT voice analysis.
RunningAverageFilter< 10 > peakSmoothing
Smoothing filter for peak data.
Vector2D visEE
Coordinates for "EE".
Vector2D visAR
Coordinates for "AR".
float visRatioOO
Probability for "OO".
PeakDetection< peakCount > peakDetection
Peak detection instance.
void CalculateFormants(float *peaks, uint8_t bandwidth)
Calculates formant frequencies (F1 and F2) from FFT peaks.
float f1
Formant frequency F1.
float threshold
Threshold for formant calculations.
float visRatioAR
Probability for "AR".
float peakDensity[peakCount]
Array of peak densities.
Vector2D visAH
Coordinates for "AH".
Vector2D visUH
Coordinates for "UH".
float f2
Formant frequency F2.
float visRatioUH
Probability for "UH".
FFTVoiceDetection()
Constructs a new FFTVoiceDetection instance.
void CalculateVisemeGroup()
Calculates the viseme group probabilities based on formants.
void PrintVisemes()
Prints the probabilities of all visemes to the serial console.
float visRatioAH
Probability for "AH".
Vector2D * coordinates[visemeCount]
Array of viseme coordinates.
float visRatioAE
Probability for "AE".
Vector2D visER
Coordinates for "ER".
float GetViseme(MouthShape viseme)
Retrieves the probability of a specific viseme.
void ResetVisemes()
Resets all viseme probabilities to zero.
static const uint8_t visemeCount
Number of supported visemes.
float * visRatios[visemeCount]
Array of viseme probabilities.
void SetThreshold(float threshold)
Sets the threshold for formant calculations.
Vector2D visOO
Coordinates for "OO".
Vector2D visAE
Coordinates for "AE".
bool peaksBinary[peakCount]
Binary array indicating peak presence.
float visRatioER
Probability for "ER".
void Update(float *peaks, float maxFrequency)
Updates the viseme probabilities based on new FFT data.
float visRatioEE
Probability for "EE".
Detects peaks in time-series data using statistical methods.
Smooths data values using a weighted running average.
Represents a 2D vector (X, Y) and provides methods for vector arithmetic.
Definition Vector2D.h:27
Defines the available mouth shapes (visemes).
MouthShape
Enumerates the possible mouth shapes for viseme detection.
@ ER
Mouth shape corresponding to the "ER" sound.
@ AE
Mouth shape corresponding to the "AE" sound.
@ OO
Mouth shape corresponding to the "OO" sound.
@ AH
Mouth shape corresponding to the "AH" sound.
@ UH
Mouth shape corresponding to the "UH" sound.
@ AR
Mouth shape corresponding to the "AR" sound.
@ SS
Mouth shape corresponding to the "SS" sound (optional).
@ EE
Mouth shape corresponding to the "EE" sound.