package org.sejda.impl.sambox.ocr;

import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.util.HashSet;
import java.util.Locale;
import java.util.Objects;
import java.util.Optional;
import org.sejda.common.ComponentsUtility;
import org.sejda.core.notification.dsl.ApplicationEventsNotifier;
import org.sejda.core.support.io.IOUtils;
import org.sejda.core.support.io.MultipleOutputWriter;
import org.sejda.core.support.io.OutputWriters;
import org.sejda.core.support.io.model.FileOutput;
import org.sejda.core.support.prefix.NameGenerator;
import org.sejda.core.support.prefix.model.NameGenerationRequest;
import org.sejda.impl.sambox.component.DefaultPdfSourceOpener;
import org.sejda.impl.sambox.component.PDDocumentHandler;
import org.sejda.impl.sambox.ocr.component.OCR;
import org.sejda.impl.sambox.ocr.component.OcrTextExtractor;
import org.sejda.model.exception.TaskException;
import org.sejda.model.exception.TaskExecutionException;
import org.sejda.model.input.PdfSource;
import org.sejda.model.input.PdfSourceOpener;
import org.sejda.model.parameter.OcrTextParameters;
import org.sejda.model.pdf.encryption.PdfAccessPermission;
import org.sejda.model.task.BaseTask;
import org.sejda.model.task.TaskExecutionContext;
import org.sejda.sambox.pdmodel.PDPageTree;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/sejda/impl/sambox/ocr/OcrTextTask.class */
public class OcrTextTask extends BaseTask<OcrTextParameters> {
    private static final Logger LOG = LoggerFactory.getLogger(OcrTextTask.class);
    private int totalSteps;
    private PDDocumentHandler documentHandler = null;
    private MultipleOutputWriter outputWriter;
    private PdfSourceOpener<PDDocumentHandler> documentLoader;

    public void before(OcrTextParameters ocrTextParameters, TaskExecutionContext taskExecutionContext) throws TaskException {
        super.before(ocrTextParameters, taskExecutionContext);
        this.totalSteps = ocrTextParameters.getSourceList().size();
        this.documentLoader = new DefaultPdfSourceOpener();
        this.outputWriter = OutputWriters.newMultipleOutputWriter(ocrTextParameters.getExistingOutputPolicy(), taskExecutionContext);
    }

    public void execute(OcrTextParameters ocrTextParameters) throws TaskException {
        int i = 0;
        for (PdfSource pdfSource : ocrTextParameters.getSourceList()) {
            executionContext().assertTaskNotCancelled();
            i++;
            LOG.debug("Opening {}", pdfSource);
            this.documentHandler = (PDDocumentHandler) pdfSource.open(this.documentLoader);
            this.documentHandler.getPermissions().ensurePermission(PdfAccessPermission.COPY_AND_EXTRACT);
            File createTemporaryBuffer = IOUtils.createTemporaryBuffer();
            LOG.debug("Created output on temporary buffer {}", createTemporaryBuffer);
            HashSet hashSet = new HashSet(ocrTextParameters.getLanguages());
            Optional filter = Optional.ofNullable(this.documentHandler.getUnderlyingPDDocument().getDocumentCatalog().getLanguage()).map(str -> {
                return new Locale.Builder().setLanguageTag(str).build();
            }).filter((v0) -> {
                return Objects.nonNull(v0);
            });
            hashSet.getClass();
            filter.ifPresent((v1) -> {
                r1.add(v1);
            });
            try {
                OcrTextExtractor ocrTextExtractor = new OcrTextExtractor(Files.newBufferedWriter(createTemporaryBuffer.toPath(), Charset.forName(ocrTextParameters.getTextEncoding()), new OpenOption[0]), new OCR());
                Throwable th = null;
                try {
                    try {
                        ocrTextExtractor.setLanguage(hashSet);
                        PDPageTree pages = this.documentHandler.getUnderlyingPDDocument().getPages();
                        ocrTextExtractor.getClass();
                        pages.forEach(ocrTextExtractor::accept);
                        if (ocrTextExtractor != null) {
                            if (0 != 0) {
                                try {
                                    ocrTextExtractor.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            } else {
                                ocrTextExtractor.close();
                            }
                        }
                        this.outputWriter.addOutput(FileOutput.file(createTemporaryBuffer).name(NameGenerator.nameGenerator(ocrTextParameters.getOutputPrefix()).generate(NameGenerationRequest.nameRequest("txt").originalName(pdfSource.getName()).fileNumber(i))));
                        ComponentsUtility.nullSafeCloseQuietly(this.documentHandler);
                        ApplicationEventsNotifier.notifyEvent(executionContext().notifiableTaskMetadata()).stepsCompleted(i).outOf(this.totalSteps);
                    } finally {
                    }
                } finally {
                }
            } catch (IOException e) {
                throw new TaskExecutionException("An error occurred creating a file writer", e);
            } catch (UnsatisfiedLinkError e2) {
                throw new TaskExecutionException("Unable to find Tesseract native libraries", e2);
            }
        }
        ocrTextParameters.getOutput().accept(this.outputWriter);
        LOG.debug("OCR performed, text extracted and written to {}", ocrTextParameters.getOutput());
    }

    public void after() {
        ComponentsUtility.nullSafeCloseQuietly(this.documentHandler);
    }
}
