import * as jschardet from 'jschardet';
import { environment } from '../../../environments/environment';

interface IDetectedEncoding {
    encoding: string;
    confident: boolean;
    data: any;
}

async function detectEncodingWithJSChardet(
    buffer: Buffer
): Promise<IDetectedEncoding> {
    let detected: jschardet.IDetectedMap;
    try {
        detected = jschardet.detect(buffer);
    } catch (e) {
        if (!environment.production) {
            console.warn(e);
        }
    }

    return (
        detected && {
            encoding: detected.encoding,
            confident: detected.confidence > 0.5,
            data: detected,
        }
    );
}

export async function detectEncoding(
    file: Blob,
    allowedEncodings: string[],
    defaultEncoding: string,
    log?: (...args: any[]) => void
): Promise<string> {
    let sourceBuffer = await file.arrayBuffer();

    // 1 M is more than enough to detect encoding
    if (sourceBuffer.byteLength > 1000000) {
        sourceBuffer = sourceBuffer.slice(0, 1000000);
    }
    const buffer = Buffer.from(sourceBuffer);

    // jschardet detects windows-1252 with high confidence
    const detectedEncoding = await detectEncodingWithJSChardet(buffer);

    log?.('detectEncoding', detectedEncoding);

    let encoding = defaultEncoding;

    if (detectedEncoding?.confident) {
        if (allowedEncodings.includes(detectedEncoding.encoding)) {
            encoding = detectedEncoding.encoding;
        } else {
            log?.(
                'detectEncoding',
                'unhandled encoding',
                detectedEncoding.encoding,
                allowedEncodings
            );
        }
    } else {
        log?.('detectEncoding', 'low confidence', detectedEncoding);
    }

    return encoding;
}
