import * as pdfjs from 'pdfjs-dist';

pdfjs.GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${pdfjs.version}/pdf.worker.min.js`;

const readFileAsync = (file) => {
    return new Promise((resolve, reject) => {
        const reader = new FileReader();
        reader.onload = () => {
            resolve(reader.result);
        };

        reader.onerror = reject;

        reader.readAsArrayBuffer(file);
    });
}

const checkIfNativePdf = async (pdf) => {
    let isNative = false;

    const file = await readFileAsync(pdf)
    const typedarray = new Uint8Array(file);
    const pdfDoc = await pdfjs.getDocument(typedarray).promise;
    const numPages = pdfDoc.numPages;
    let countTextPages = 0;

    // Loop through each page and extract text content
    for (let pageNum = 1; pageNum <= numPages; pageNum++) {
        const page = await pdfDoc.getPage(pageNum)
        const textContent = await page.getTextContent()
        
        // Check if the page contains meaningful text content
        if (textContent.items.length > 0 && textContent.items[0].str.trim() !== '') {
            countTextPages++;
        }
        
        // After processing all pages, make a decision
        if (pageNum === numPages) {
            if (countTextPages > 0) {
                console.log('44 The PDF is likely native.');
                isNative = true;
            } else {
                console.log('44 The PDF is likely scanned.');
                isNative = false;
            }
        }
    }
    return isNative;
}

const getFileSize = (pdf) => {
    const bytes = pdf.size;
    console.log("44 bytes", bytes)
    const kb = bytesToKb(bytes)
    console.log("44 kb", kb)
    return kb;
}

const bytesToKb = (bytes) => {
    return bytes / 1024;
}

export { checkIfNativePdf, getFileSize }