package org.sejda.impl.sambox.ocr.component;

import java.io.Closeable;
import java.io.IOException;
import java.io.Writer;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.sejda.sambox.contentstream.PDFStreamEngine;
import org.sejda.sambox.contentstream.operator.MissingOperandException;
import org.sejda.sambox.contentstream.operator.Operator;
import org.sejda.sambox.contentstream.operator.OperatorProcessor;
import org.sejda.sambox.cos.COSBase;
import org.sejda.sambox.cos.COSDictionary;
import org.sejda.sambox.cos.COSName;
import org.sejda.sambox.cos.COSStream;
import org.sejda.sambox.pdmodel.MissingResourceException;
import org.sejda.sambox.pdmodel.PDPage;
import org.sejda.sambox.pdmodel.common.PDStream;
import org.sejda.sambox.pdmodel.graphics.PDXObject;
import org.sejda.sambox.pdmodel.graphics.form.PDFormXObject;
import org.sejda.sambox.pdmodel.graphics.form.PDTransparencyGroup;
import org.sejda.sambox.pdmodel.graphics.image.PDImageXObject;
import org.sejda.util.RequireUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/sejda/impl/sambox/ocr/component/OcrTextExtractor.class */
public class OcrTextExtractor extends PDFStreamEngine implements Consumer<PDPage>, Closeable {
    private static final Logger LOG = LoggerFactory.getLogger(OcrTextExtractor.class);
    private Writer writer;
    private OCR ocrEngine;

    /* loaded from: input_file:org/sejda/impl/sambox/ocr/component/OcrTextExtractor$DoOCR.class */
    private class DoOCR extends OperatorProcessor {
        private DoOCR() {
        }

        public void process(Operator operator, List<COSBase> list) throws IOException {
            if (list.isEmpty()) {
                throw new MissingOperandException(operator, list);
            }
            COSName cOSName = (COSBase) list.get(0);
            if (cOSName instanceof COSName) {
                COSName cOSName2 = cOSName;
                COSStream cOSStream = (COSBase) Optional.ofNullable(getContext().getResources().getCOSObject().getDictionaryObject(COSName.XOBJECT, COSDictionary.class)).map(cOSDictionary -> {
                    return cOSDictionary.getDictionaryObject(cOSName2);
                }).orElseThrow(() -> {
                    return new MissingResourceException("Missing XObject: " + cOSName2.getName());
                });
                if (cOSStream instanceof COSStream) {
                    COSStream cOSStream2 = cOSStream;
                    String nameAsString = cOSStream2.getNameAsString(COSName.SUBTYPE);
                    if (COSName.IMAGE.getName().equals(nameAsString)) {
                        OcrTextExtractor.LOG.trace("Performing OCR on {}", cOSName2);
                        PDImageXObject createXObject = PDXObject.createXObject(cOSStream2.getCOSObject(), getContext().getResources());
                        try {
                            OcrTextExtractor.this.writer.write(OcrTextExtractor.this.ocrEngine.ocrTextFrom(createXObject.getImage()));
                        } catch (IOException e) {
                            OcrTextExtractor.LOG.warn("Unable to OCR image", e);
                        }
                        createXObject.getCOSObject().unDecode();
                        return;
                    }
                    if (COSName.FORM.getName().equals(nameAsString)) {
                        PDTransparencyGroup createXObject2 = PDXObject.createXObject(cOSStream.getCOSObject(), getContext().getResources());
                        if (createXObject2 instanceof PDTransparencyGroup) {
                            getContext().showTransparencyGroup(createXObject2);
                        } else if (createXObject2 instanceof PDFormXObject) {
                            getContext().showForm((PDFormXObject) createXObject2);
                        }
                    }
                }
            }
        }

        public String getName() {
            return "Do";
        }
    }

    public OcrTextExtractor(Writer writer, OCR ocr) {
        RequireUtils.requireNotNullArg(writer, "Cannot write text on a null writer");
        RequireUtils.requireNotNullArg(ocr, "OCR engine cannot be null");
        addOperator(new DoOCR());
        this.writer = writer;
        this.ocrEngine = ocr;
        this.ocrEngine.setDatapath(null);
    }

    public void setLanguage(Set<Locale> set) {
        if (!Objects.nonNull(set) || set.isEmpty()) {
            this.ocrEngine.setLanguage("eng");
        } else {
            this.ocrEngine.setLanguage((String) set.stream().map((v0) -> {
                return v0.getISO3Language();
            }).collect(Collectors.joining("+")));
        }
    }

    @Override // java.util.function.Consumer
    public void accept(PDPage pDPage) {
        try {
            if (pDPage.hasContents()) {
                processPage(pDPage);
                unload(pDPage);
            } else {
                LOG.debug("Skipping page with no content");
            }
        } catch (IOException e) {
            LOG.error("An error occurred doing OCR on page, skipping and continuing with next.", e);
        }
    }

    private void unload(PDPage pDPage) {
        Iterator contentStreams = pDPage.getContentStreams();
        while (contentStreams.hasNext()) {
            ((PDStream) contentStreams.next()).getCOSObject().unDecode();
        }
    }

    @Override // java.io.Closeable, java.lang.AutoCloseable
    public void close() {
        IOUtils.closeQuietly(this.writer);
    }
}
