All files / src/utils speech-transcriber.ts

0% Statements 0/81
0% Branches 0/1
0% Functions 0/1
0% Lines 0/81

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109                                                                                                                                                                                                                         
import {
    type AudioStream,
    BadRequestException,
    LanguageCode,
    MediaEncoding,
    StartStreamTranscriptionCommand,
    TranscribeStreamingClient,
    type TranscriptResultStream,
} from "@aws-sdk/client-transcribe-streaming";
import {
    StartStreamTranscriptionCommandInput,
} from "@aws-sdk/client-transcribe-streaming/dist-types/commands/StartStreamTranscriptionCommand";
import {AsyncBlockingQueue} from "./async-blocking-queue.ts";
import pcmProcessorUrl from "./pcm-processor.ts?worker&url";
 
const TARGET_SAMPLE_RATE = 16000;
 
export class SpeechTranscriber {
 
    private readonly transcribeClient: TranscribeStreamingClient;
    private readonly onTranscription: (event: TranscriptResultStream) => Promise<void>;
 
    private audioContext?: AudioContext;
    private audioWorkletNode?: AudioWorkletNode;
    private mediaStream?: MediaStream;
    private audioSource?: MediaStreamAudioSourceNode;
 
    private stopped: boolean = false;
 
    constructor(client: TranscribeStreamingClient, onTranscription: (event: TranscriptResultStream) => Promise<void>) {
        this.transcribeClient = client;
        this.onTranscription = onTranscription;
    }
 
    async start(language: LanguageCode) {
        this.stopped = false;
        this.audioContext = new AudioContext();
        const audioQueue = new AsyncBlockingQueue<ArrayBuffer>();
 
        const audioWorkletSetup = this.audioContext.audioWorklet.addModule(pcmProcessorUrl);
 
        this.mediaStream = await navigator.mediaDevices.getUserMedia({
            audio: {
                channelCount: 1,
            },
        });
 
        await audioWorkletSetup;
        this.audioWorkletNode = new AudioWorkletNode(this.audioContext, "pcm-processor");
        this.audioWorkletNode.port.postMessage(TARGET_SAMPLE_RATE);
        this.audioWorkletNode.port.onmessage = event => {
            audioQueue.enqueue(event.data);
        };
 
        this.audioSource = this.audioContext.createMediaStreamSource(this.mediaStream);
        this.audioSource.connect(this.audioWorkletNode);
 
        // eslint-disable-next-line @typescript-eslint/no-this-alias
        const self = this;
        const params: StartStreamTranscriptionCommandInput = {
            IdentifyMultipleLanguages: true,
            LanguageOptions: LanguageCode.EN_GB + "," + language,
            MediaEncoding: MediaEncoding.PCM,
            MediaSampleRateHertz: Math.min(this.audioContext.sampleRate, TARGET_SAMPLE_RATE),
            AudioStream: (async function* (): AsyncGenerator<AudioStream.AudioEventMember> {
                for await (const data of audioQueue) {
                    yield {
                        AudioEvent: {
                            AudioChunk: new Uint8Array(data),
                        },
                    };
                    if (self.stopped) break;
                }
            })(),
        };
 
        const command = new StartStreamTranscriptionCommand(params);
        while (!this.stopped) {
            try {
                const response = await this.transcribeClient.send(command);
                for await (const event of response.TranscriptResultStream!) {
                    await this.onTranscription(event);
                    if (this.stopped) break;
                }
            } catch (e) {
                if (!(e instanceof BadRequestException)) {
                    console.error(e);
                }
            }
        }
    }
 
    setMuted(muted: boolean) {
        this.mediaStream?.getTracks().forEach(track => track.enabled = !muted);
    }
 
    async stop() {
        this.stopped = true;
        this.audioWorkletNode?.disconnect();
        this.audioWorkletNode = undefined;
        this.audioSource?.disconnect();
        this.audioSource = undefined;
        await this.audioContext?.close();
        this.audioContext = undefined;
        this.mediaStream?.getTracks().forEach(track => track.stop());
        this.mediaStream = undefined;
    }
}