import createVoiceSdkModule from './VoiceSdkWebAssembly.js';
import { VOICE_SDK_LICENSE } from './VOICE_SDK_LICENSE.js';
import {audioConstraints} from "./constrain";

let speechEndpointDetector;
let speechSummaryEngine;
let speechSummaryStream;
let snrComputer;
let checks = 0;
let lastSpeechLength = 0;

export class IDVoiceCapturingModule {
    constructor(
      minSpeechLengthMs,
      maxSilenceLengthMs,
      deviceId,
      onRecorderReady,
      onSpeechLength,
      onRecordingStopped,
      onInitialized,
      onError,
      ) {
        this.audioBuffers = [];
        this.numRecordedSamples = 0;
        this.onSpeechLength = onSpeechLength;
        this.onRecordingStopped = onRecordingStopped;
        this.minSpeechLengthMs = minSpeechLengthMs;
        this.isRecorderReady = false;
        this.isRecording = false;
        this.stream = null;
        this.onError = onError;

        const self = this;

        navigator.mediaDevices.getUserMedia({ audio: audioConstraints(deviceId) }).then((stream) => {
            self.stream = stream;
            self.context = new AudioContext();
            self.audioInput = self.context.createMediaStreamSource(stream);
            self.context.audioWorklet.addModule('/static/workers/audio_worklet.js').then(() => {
                createVoiceSdkModule().then((voicesdk) => {
                    try {
                        voicesdk.setLicense(VOICE_SDK_LICENSE);
                        self.handleVoiceSdkError = (error) => {
                            if (typeof error === 'number') {
                                this.onError(voicesdk.getExceptionMessage(error))
                            } else {
                                this.onError(error)
                            }
                        };
                        speechEndpointDetector = new voicesdk.SpeechEndpointDetector(
                            minSpeechLengthMs,
                            maxSilenceLengthMs,
                            self.getRecordingSampleRate(),
                        );
                        speechSummaryEngine = new voicesdk.SpeechSummaryEngine('/home/speech_summary');
                        speechSummaryStream = speechSummaryEngine.createStream(self.getRecordingSampleRate());
                        snrComputer = new voicesdk.SnrComputer('/home/speech_summary');
                        onInitialized()
                    } catch (error) {
                        self.handleVoiceSdkError(error);
                    }
                    self.isRecorderReady = true;
                    onRecorderReady();
                });
            });
        }).catch(e => {
            this.onError(e);
        });
    }

    destroy() {
        this.manuallyStopRecording();
        if (this.stream) this.stream.getAudioTracks().forEach((t) => t.stop());
        this.stream = null;
        this.audioRecordingNode = null;
        this.audioInput = null;
        this.context = null;
        this.audioInput = null;
        this.audioBuffers = null;
        if (speechEndpointDetector) speechEndpointDetector.delete();
        speechEndpointDetector = null;
        if (speechSummaryStream) speechSummaryStream.delete();
        speechSummaryStream = null;
        if (speechSummaryEngine) speechSummaryEngine.delete();
        speechSummaryEngine = null;
        if (snrComputer) snrComputer.delete();
        snrComputer = null;
        console.log('destroy voice module')
    }

    buildWavSpecification() {
        function createWavHeader(dataLength, sampleRate) {
            const blockAlign = 2; // 2 bytes per sample (16 bit)
            const byteRate = sampleRate * blockAlign;
            const buffer = new ArrayBuffer(44);
            const view = new DataView(buffer);

            // RIFF chunk descriptor
            writeString(view, 0, 'RIFF');
            view.setUint32(4, 36 + dataLength * 2, true); // file size - 8
            writeString(view, 8, 'WAVE');

            // fmt subchunk
            writeString(view, 12, 'fmt ');
            view.setUint32(16, 16, true); // Subchunk1Size (16 for PCM)
            view.setUint16(20, 1, true); // AudioFormat (1 for PCM)
            view.setUint16(22, 1, true); // NumChannels (1 for mono)
            view.setUint32(24, sampleRate, true); // SampleRate
            view.setUint32(28, byteRate, true); // ByteRate
            view.setUint16(32, blockAlign, true); // BlockAlign
            view.setUint16(34, 16, true); // BitsPerSample

            // data subchunk
            writeString(view, 36, 'data');
            view.setUint32(40, dataLength * 2, true); // Subchunk2Size

            return buffer;
        }

        function writeString(view, offset, string) {
            for (let i = 0; i < string.length; i++) {
                view.setUint8(offset + i, string.charCodeAt(i));
            }
        }

        function concatenateInt16Buffers(buffers) {
            const totalLength = buffers.reduce((sum, buf) => sum + buf.length, 0);
            const result = new Int16Array(totalLength);
            let offset = 0;
            for (const buf of buffers) {
                result.set(buf, offset);
                offset += buf.length;
            }
            return result;
        }

        const sampleRate = this.context.sampleRate;
        const pcm16Buffers = this.audioBuffers;

        const int16Array = concatenateInt16Buffers(pcm16Buffers);

        const wavHeader = createWavHeader(int16Array.length, sampleRate);

        const wavData = new Uint8Array(int16Array.length * 2);
        for (let i = 0; i < int16Array.length; i++) {
            wavData[i * 2] = int16Array[i] & 0xFF;
            wavData[i * 2 + 1] = (int16Array[i] >> 8) & 0xFF;
        }

        const wavFile = new Uint8Array(wavHeader.byteLength + wavData.byteLength);
        wavFile.set(new Uint8Array(wavHeader), 0);
        wavFile.set(wavData, wavHeader.byteLength);

        this.audioBuffers = [];
        this.numRecordedSamples = 0;

        return new Blob([wavFile], { type: 'audio/wav' });
    }

    processAudioBuffer(pcm16Samples) {
        this.audioBuffers.push(pcm16Samples);
        this.numRecordedSamples += pcm16Samples.length;
        try {
            speechEndpointDetector.addPcm16Samples(pcm16Samples);
            speechSummaryStream.addPcm16Samples(pcm16Samples);
            const speechLength = speechSummaryStream.getTotalSpeechSummary().speechInfo.speechLengthMs;
            this.onSpeechLength(speechLength);
            if (lastSpeechLength !== speechLength) checks = 0;
            lastSpeechLength = speechLength;
            if (speechLength >= this.minSpeechLengthMs && speechEndpointDetector.isSpeechEnded()) {
                this.manuallyStopRecording();
                this.onRecordingStopped(this.buildWavSpecification());
            } else {
                checks++;
                if (checks >= 20) {
                    this.manuallyStopRecording();
                    this.startRecording();
                }
            }
        } catch (error) {
            this.handleVoiceSdkError(error);
        }
    }

    startRecording() {
        if (!this.isRecorderReady) {
            throw 'Recorder is not ready yet, use onRecorderReady() callback to check';
        }
        if (this.isRecording) {
            throw 'Recorder is already recording';
        }
        speechSummaryStream.reset();
        speechEndpointDetector.reset();
        this.numRecordedSamples = 0;
        checks = 0;
        this.audioBuffers = [];
        this.isRecording = true;
        this.audioRecordingNode = new AudioWorkletNode(this.context, 'audio-worklet');
        this.audioInput.connect(this.audioRecordingNode);
        this.audioRecordingNode.connect(this.context.destination);
        this.audioRecordingNode.port.onmessage = (e) => {
            if (e.data.eventType === 'samples' && this.stream) {
                this.processAudioBuffer(e.data.samples);
            }
        };
    }

    manuallyStopRecording() {
        if (this.isRecording) {
            this.isRecording = false;
            this.audioInput.disconnect(this.audioRecordingNode);
            this.audioRecordingNode.disconnect(this.context.destination);
        }
    }

    // Audio obtained with this method can be used for making requests to
    // IDVoice Server, e.g. /voice_template_factory/create_voice_template_from_samples
    async getRecordedAudioPcm16Samples() {
        const audio = new Int16Array(this.numRecordedSamples);
        let offset = 0;
        for (let i = 0; i < this.audioBuffers.length; i++) {
            audio.set(this.audioBuffers[i], offset);
            offset += this.audioBuffers[i].length;
        }
        return audio;
    }

    getRecordingSampleRate() {
        return this.context.sampleRate;
    }

    async checkRecordedAudioQuality(speechLengthThresholdMs, snrThresholdDb) {
        const samples = await this.getRecordedAudioPcm16Samples();
        const sampleRate = this.getRecordingSampleRate();
        try {
            const result = {};
            result.speechLengthMs = speechSummaryEngine.getSpeechSummaryFromPcm16Samples(samples, sampleRate).speechInfo.speechLengthMs;
            result.snrDb = snrComputer.computeFromPcm16Samples(samples, sampleRate);
            result.message = 'OK';
            if (result.speechLengthMs < speechLengthThresholdMs) {
                result.message = 'QUALITY_TOO_SMALL_SPEECH_TOTAL_LENGTH';
            } else if (result.snrDb < snrThresholdDb) {
                result.message = 'QUALITY_TOO_NOISY';
            }
            return result;
        } catch (error) {
            this.handleVoiceSdkError(error);
        }
    }
}
