utils speech-transcriber.ts

0% Statements 0/81
0% Branches 0/1
0% Functions 0/1
0% Lines 0/81
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  import {
    type AudioStream,
    BadRequestException,
    LanguageCode,
    MediaEncoding,
    StartStreamTranscriptionCommand,
    TranscribeStreamingClient,
    type TranscriptResultStream,
} from "@aws-sdk/client-transcribe-streaming";
import {
    StartStreamTranscriptionCommandInput,
} from "@aws-sdk/client-transcribe-streaming/dist-types/commands/StartStreamTranscriptionCommand";
import {AsyncBlockingQueue} from "./async-blocking-queue.ts";
import pcmProcessorUrl from "./pcm-processor.ts?worker&url";
 
const TARGET_SAMPLE_RATE = 16000;
 
export class SpeechTranscriber {
 
    private readonly transcribeClient: TranscribeStreamingClient;
    private readonly onTranscription: (event: TranscriptResultStream) => Promise<void>;
 
    private audioContext?: AudioContext;
    private audioWorkletNode?: AudioWorkletNode;
    private mediaStream?: MediaStream;
    private audioSource?: MediaStreamAudioSourceNode;
 
    private stopped: boolean = false;
 
    constructor(client: TranscribeStreamingClient, onTranscription: (event: TranscriptResultStream) => Promise<void>) {
        this.transcribeClient = client;
        this.onTranscription = onTranscription;
    }
 
    async start(language: LanguageCode) {
        this.stopped = false;
        this.audioContext = new AudioContext();
        const audioQueue = new AsyncBlockingQueue<ArrayBuffer>();
 
        const audioWorkletSetup = this.audioContext.audioWorklet.addModule(pcmProcessorUrl);
 
        this.mediaStream = await navigator.mediaDevices.getUserMedia({
            audio: {
                channelCount: 1,
            },
        });
 
        await audioWorkletSetup;
        this.audioWorkletNode = new AudioWorkletNode(this.audioContext, "pcm-processor");
        this.audioWorkletNode.port.postMessage(TARGET_SAMPLE_RATE);
        this.audioWorkletNode.port.onmessage = event => {
            audioQueue.enqueue(event.data);
        };
 
        this.audioSource = this.audioContext.createMediaStreamSource(this.mediaStream);
        this.audioSource.connect(this.audioWorkletNode);
 
        // eslint-disable-next-line @typescript-eslint/no-this-alias
        const self = this;
        const params: StartStreamTranscriptionCommandInput = {
            IdentifyMultipleLanguages: true,
            LanguageOptions: LanguageCode.EN_GB + "," + language,
            MediaEncoding: MediaEncoding.PCM,
            MediaSampleRateHertz: Math.min(this.audioContext.sampleRate, TARGET_SAMPLE_RATE),
            AudioStream: (async function* (): AsyncGenerator<AudioStream.AudioEventMember> {
                for await (const data of audioQueue) {
                    yield {
                        AudioEvent: {
                            AudioChunk: new Uint8Array(data),
                        },
                    };
                    if (self.stopped) break;
                }
            })(),
        };
 
        const command = new StartStreamTranscriptionCommand(params);
        while (!this.stopped) {
            try {
                const response = await this.transcribeClient.send(command);
                for await (const event of response.TranscriptResultStream!) {
                    await this.onTranscription(event);
                    if (this.stopped) break;
                }
            } catch (e) {
                if (!(e instanceof BadRequestException)) {
                    console.error(e);
                }
            }
        }
    }
 
    setMuted(muted: boolean) {
        this.mediaStream?.getTracks().forEach(track => track.enabled = !muted);
    }
 
    async stop() {
        this.stopped = true;
        this.audioWorkletNode?.disconnect();
        this.audioWorkletNode = undefined;
        this.audioSource?.disconnect();
        this.audioSource = undefined;
        await this.audioContext?.close();
        this.audioContext = undefined;
        this.mediaStream?.getTracks().forEach(track => track.stop());
        this.mediaStream = undefined;
    }
}