Audio to Text Detection

 

[HttpPost("convertBase64ToText")]

public IActionResult ConvertBase64ToText([FromBody] string base64Audio)

{

    try

    {

        // Decode Base64 to raw audio data

        byte[] audioBytes = Convert.FromBase64String(base64Audio);

        string text = "";


        if (IsMp3HeaderPresent(audioBytes))

        {

            byte[] wavData = ConvertMp3ToWav(audioBytes);

            text = RecognizeSpeech(wavData);

        }

        else if (IsWavHeaderPresent(audioBytes))

        {

            text = RecognizeSpeech(audioBytes);

        }

        else 

        {

            return BadRequest("File Format is Not Supported");

        }

        

        // Return the resulting text

        return Ok(text);

    }

    catch (Exception ex)

    {

        return StatusCode(500, $"An error occurred: {ex.Message}");

    }

}

 private string RecognizeSpeech(byte[] audioBytes)

 {

     using (MemoryStream memoryStream = new MemoryStream(audioBytes))

     {

         using (SpeechRecognitionEngine recognizer = new SpeechRecognitionEngine())

         {

             // Set up audio format

             recognizer.SetInputToWaveStream(memoryStream);

             recognizer.LoadGrammar(new DictationGrammar());

             // Increase the silence timeout to 5 seconds

             recognizer.EndSilenceTimeout = TimeSpan.FromSeconds(5);

             recognizer.EndSilenceTimeoutAmbiguous = TimeSpan.FromSeconds(5);

             // Recognize speech

             RecognitionResult result = recognizer.Recognize();


             // Extract text from recognition result

             string text = result?.Text;


             // Return the recognized text

             return text;

         }

     }

 }

 private bool IsMp3HeaderPresent(byte[] audioData)

 {

     // Check for presence of "ID3" marker

     return audioData.Length >= 3 && audioData[0] == 0x49 && audioData[1] == 0x44 && audioData[2] == 0x33;

 }


 private bool IsWavHeaderPresent(byte[] audioData)

 {

     // Check for presence of "RIFF" marker

     return audioData.Length >= 4 && audioData[0] == 0x52 && audioData[1] == 0x49 && audioData[2] == 0x46 && audioData[3] == 0x46;

 }


 private byte[] ConvertMp3ToWav(byte[] mp3Data)

 {

     using (MemoryStream mp3Stream = new MemoryStream(mp3Data))

     {

         using (Mp3FileReader mp3Reader = new Mp3FileReader(mp3Stream))

         {

             using (WaveStream pcmStream = WaveFormatConversionStream.CreatePcmStream(mp3Reader))

             {

                 using (MemoryStream wavStream = new MemoryStream())

                 {

                     WaveFileWriter.WriteWavFileToStream(wavStream, pcmStream);

                     return wavStream.ToArray();

                 }

             }

         }

     }

 }

Comments

Popular posts from this blog

Parameter Query

Final Project

Grid View Paging