AMB82-MINI speech to text

i am working on ameba speech to ext with the help of google speech to text api now facing problem in File file = fs.open("/voice.mp4", FA_READ); // OK my complete code is `#include <WiFi.h>
#include “StreamIO.h”
#include “AudioStream.h”
#include “AudioEncoder.h”
#include “MP4Recording.h”
#include “AmebaFatFS.h”
#include <HttpClient.h>

// Wi-Fi and API Configuration
char ssid = “YOUR_WIFI_SSID”;
char password = “YOUR_WIFI_PASSWORD”;
const char* googleSTTEndpoint = “https://speech.googleapis.com/v1/speech:recognize?key=YOUR_GOOGLE_API_KEY”;

// Audio Setup
AudioSetting configA(3); // 16kHz digital PDM mic
Audio audio;
AAC aac;
MP4Recording mp4;
StreamIO audioStreamer1(1, 1);
StreamIO audioStreamer2(1, 1);

AmebaFatFS fs;
File audioFile;

// Lightweight base64 encoder
String base64Encode(uint8_t* data, size_t length) {
const char base64_chars = “ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/”;
String encoded = “”;
int i = 0;
uint8_t a3[3];

while (length–) {
a3[i++] = *(data++);
if (i == 3) {
encoded += base64_chars[(a3[0] & 0xfc) >> 2];
encoded += base64_chars[((a3[0] & 0x03) << 4) | ((a3[1] & 0xf0) >> 4)];
encoded += base64_chars[((a3[1] & 0x0f) << 2) | ((a3[2] & 0xc0) >> 6)];
encoded += base64_chars[a3[2] & 0x3f];
i = 0;
}
}

if (i) {
for (int j = i; j < 3; j++) {
a3[j] = ‘\0’;
}

encoded += base64_chars[(a3[0] & 0xfc) >> 2];
encoded += base64_chars[((a3[0] & 0x03) << 4) | ((a3[1] & 0xf0) >> 4)];
encoded += (i == 1) ? '=' : base64_chars[((a3[1] & 0x0f) << 2) | ((a3[2] & 0xc0) >> 6)];
encoded += '=';

}

return encoded;
}

void setup() {
Serial.begin(115200);
delay(500);

// Connect Wi-Fi
WiFi.begin(ssid, password);
Serial.print(“Connecting to Wi-Fi”);
while (WiFi.status() != WL_CONNECTED) {
Serial.print(“.”);
delay(500);
}
Serial.println(“\nWi-Fi connected!”);

// Mount filesystem
if (!fs.begin()) {
Serial.println(“File system init failed!”);
return;
}

// Configure audio
audio.configAudio(configA);
audio.begin();

aac.configAudio(configA);
aac.begin();

mp4.configAudio(configA, CODEC_AAC);
mp4.setRecordingDuration(5); // Record for 5 seconds
mp4.setRecordingFileCount(1);
mp4.setRecordingFileName(“voice”);
mp4.setRecordingDataType(STORAGE_AUDIO);

audioStreamer1.registerInput(audio);
audioStreamer1.registerOutput(aac);
if (audioStreamer1.begin() != 0) {
Serial.println(“StreamIO1 start failed”);
}

audioStreamer2.registerInput(aac);
audioStreamer2.registerOutput(mp4);
if (audioStreamer2.begin() != 0) {
Serial.println(“StreamIO2 start failed”);
}

// Start Recording
Serial.println(“Recording 5 seconds…”);
mp4.begin();
delay(6000);
Serial.println(“Recording finished.”);

// Send to Google STT
sendToGoogleSTT(“/voice.mp4”);
}

void loop() {
// No repeated action required
}

void sendToGoogleSTT(String filename) {

File file = fs.open(“/voice.mp4”, FA_READ); // OK

if (!file) {
Serial.println(“Failed to open audio file”);
return;
}

size_t fileSize = file.size();
uint8_t* buffer = (uint8_t*)malloc(fileSize);
if (!buffer) {
Serial.println(“Memory allocation failed”);
file.close();
return;
}

file.read(buffer, fileSize);
file.close();

String audioBase64 = base64Encode(buffer, fileSize);
free(buffer);

// Build JSON request
String payload = “{”;
payload += “"config":{”;
payload += “"encoding":"MP4",”;
payload += “"sampleRateHertz":16000,”;
payload += “"languageCode":"en-US"”;
payload += “},”;
payload += “"audio":{”;
payload += “"content":"” + audioBase64 + “"”;
payload += “}}”;

WiFiClientSecure client;
HttpClient http(client, “speech.googleapis.com”, 443);

http.beginRequest();
http.post(“/v1/speech:recognize?key=YOUR_GOOGLE_API_KEY”);
http.sendHeader(“Content-Type”, “application/json”);
http.sendHeader(“Content-Length”, payload.length());
http.beginBody();
http.print(payload);
http.endRequest();

int status = http.responseStatusCode();
Serial.print("Google STT response code: ");
Serial.println(status);

if (status > 0) {
String response = http.responseBody();
Serial.println(“Response:”);
Serial.println(response);

int start = response.indexOf("\"transcript\":\"") + 14;
int end = response.indexOf("\"", start);
if (start > 13 && end > start) {
  String transcript = response.substring(start, end);
  Serial.print("Recognized Speech: ");
  Serial.println(transcript);
} else {
  Serial.println("No transcript found.");
}

} else {
Serial.println(“STT request failed.”);
}

http.stop();
}
`

can you try review link and API discerption for fs.open

Hi @Robo_Things ,

May I know if you still encounter any issue with fs.open? Kindly elaborate on what issue you are facing or provide logs and we will reproduce on our side. Thank you.

Hi @Robo_Things ,

It is recommended to use Google APIs v2, as it supports MP4 encoding, whereas v1 is limited to MP3.

In the current SDK implementation, only MP4 recording is supported. Therefore, upgrading to v2 ensures full compatibility with the file format used by the SDK.

We will evaluate internally to determine the feasibility of adding MP3 recording support in the future.

Thank you.