package io.trino.parquet.reader;

import io.airlift.log.Logger;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
import io.trino.parquet.ParquetCorruptionException;
import io.trino.parquet.ParquetDataSource;
import io.trino.parquet.ParquetDataSourceId;
import io.trino.parquet.ParquetMetadataConverter;
import io.trino.parquet.ParquetValidationUtils;
import io.trino.parquet.ParquetWriteValidation;
import io.trino.parquet.metadata.BlockMetadata;
import io.trino.parquet.metadata.ColumnChunkMetadata;
import io.trino.parquet.metadata.FileMetadata;
import io.trino.parquet.metadata.ParquetMetadata;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Optional;
import java.util.Set;
import org.apache.parquet.CorruptStatistics;
import org.apache.parquet.column.Encoding;
import org.apache.parquet.column.statistics.BinaryStatistics;
import org.apache.parquet.column.statistics.Statistics;
import org.apache.parquet.format.ColumnMetaData;
import org.apache.parquet.format.FileMetaData;
import org.apache.parquet.format.KeyValue;
import org.apache.parquet.format.RowGroup;
import org.apache.parquet.format.SchemaElement;
import org.apache.parquet.format.Util;
import org.apache.parquet.hadoop.metadata.ColumnPath;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Type;
import org.apache.parquet.schema.Types;

/* loaded from: input_file:io/trino/parquet/reader/MetadataReader.class */
public final class MetadataReader {
    private static final Logger log = Logger.get(MetadataReader.class);
    private static final Slice MAGIC = Slices.utf8Slice("PAR1");
    private static final int POST_SCRIPT_SIZE = 4 + MAGIC.length();
    private static final int EXPECTED_FOOTER_SIZE = 49152;

    private MetadataReader() {
    }

    public static ParquetMetadata readFooter(ParquetDataSource parquetDataSource, Optional<ParquetWriteValidation> optional) throws IOException {
        ParquetValidationUtils.validateParquet(parquetDataSource.getEstimatedSize() >= ((long) (MAGIC.length() + POST_SCRIPT_SIZE)), parquetDataSource.getId(), "%s is not a valid Parquet File", parquetDataSource.getId());
        long estimatedSize = parquetDataSource.getEstimatedSize();
        Slice readTail = parquetDataSource.readTail(Math.toIntExact(Math.min(estimatedSize, 49152L)));
        Slice slice = readTail.slice(readTail.length() - MAGIC.length(), MAGIC.length());
        ParquetValidationUtils.validateParquet(MAGIC.equals(slice), parquetDataSource.getId(), "Expected magic number: %s got: %s", MAGIC.toStringUtf8(), slice.toStringUtf8());
        int i = readTail.getInt(readTail.length() - POST_SCRIPT_SIZE);
        long j = (estimatedSize - POST_SCRIPT_SIZE) - i;
        ParquetValidationUtils.validateParquet(j >= ((long) MAGIC.length()) && j < estimatedSize - ((long) POST_SCRIPT_SIZE), parquetDataSource.getId(), "Metadata index: %s out of range", Long.valueOf(j));
        int i2 = i + POST_SCRIPT_SIZE;
        if (i2 > readTail.length()) {
            readTail = parquetDataSource.readTail(i2);
        }
        ParquetMetadata createParquetMetadata = createParquetMetadata(Util.readFileMetaData(readTail.slice(readTail.length() - i2, i).getInput()), parquetDataSource.getId());
        validateFileMetadata(parquetDataSource.getId(), createParquetMetadata.getFileMetaData(), optional);
        return createParquetMetadata;
    }

    public static ParquetMetadata createParquetMetadata(FileMetaData fileMetaData, ParquetDataSourceId parquetDataSourceId) throws ParquetCorruptionException {
        List schema = fileMetaData.getSchema();
        ParquetValidationUtils.validateParquet(!schema.isEmpty(), parquetDataSourceId, "Schema is empty", new Object[0]);
        MessageType readParquetSchema = readParquetSchema(schema);
        ArrayList arrayList = new ArrayList();
        List<RowGroup> row_groups = fileMetaData.getRow_groups();
        if (row_groups != null) {
            for (RowGroup rowGroup : row_groups) {
                BlockMetadata blockMetadata = new BlockMetadata();
                blockMetadata.setRowCount(rowGroup.getNum_rows());
                blockMetadata.setTotalByteSize(rowGroup.getTotal_byte_size());
                List<org.apache.parquet.format.ColumnChunk> columns = rowGroup.getColumns();
                ParquetValidationUtils.validateParquet(!columns.isEmpty(), parquetDataSourceId, "No columns in row group: %s", rowGroup);
                String file_path = ((org.apache.parquet.format.ColumnChunk) columns.get(0)).getFile_path();
                for (org.apache.parquet.format.ColumnChunk columnChunk : columns) {
                    ParquetValidationUtils.validateParquet((file_path == null && columnChunk.getFile_path() == null) || (file_path != null && file_path.equals(columnChunk.getFile_path())), parquetDataSourceId, "all column chunks of the same row group must be in the same file", new Object[0]);
                    ColumnMetaData columnMetaData = columnChunk.meta_data;
                    ColumnPath columnPath = ColumnPath.get((String[]) columnMetaData.path_in_schema.stream().map(str -> {
                        return str.toLowerCase(Locale.ENGLISH);
                    }).toArray(i -> {
                        return new String[i];
                    }));
                    PrimitiveType asPrimitiveType = readParquetSchema.getType(columnPath.toArray()).asPrimitiveType();
                    ColumnChunkMetadata columnChunkMetadata = ColumnChunkMetadata.get(columnPath, asPrimitiveType, CompressionCodecName.fromParquet(columnMetaData.codec), ParquetMetadataConverter.convertEncodingStats(columnMetaData.encoding_stats), readEncodings(columnMetaData.encodings), readStats(Optional.ofNullable(fileMetaData.getCreated_by()), Optional.ofNullable(columnMetaData.statistics), asPrimitiveType), columnMetaData.data_page_offset, columnMetaData.dictionary_page_offset, columnMetaData.num_values, columnMetaData.total_compressed_size, columnMetaData.total_uncompressed_size);
                    columnChunkMetadata.setColumnIndexReference(ParquetMetadataConverter.toColumnIndexReference(columnChunk));
                    columnChunkMetadata.setOffsetIndexReference(ParquetMetadataConverter.toOffsetIndexReference(columnChunk));
                    columnChunkMetadata.setBloomFilterOffset(columnMetaData.bloom_filter_offset);
                    blockMetadata.addColumn(columnChunkMetadata);
                }
                blockMetadata.setPath(file_path);
                arrayList.add(blockMetadata);
            }
        }
        HashMap hashMap = new HashMap();
        List<KeyValue> key_value_metadata = fileMetaData.getKey_value_metadata();
        if (key_value_metadata != null) {
            for (KeyValue keyValue : key_value_metadata) {
                hashMap.put(keyValue.key, keyValue.value);
            }
        }
        return new ParquetMetadata(new FileMetadata(readParquetSchema, hashMap, fileMetaData.getCreated_by()), arrayList);
    }

    private static MessageType readParquetSchema(List<SchemaElement> list) {
        Iterator<SchemaElement> it = list.iterator();
        SchemaElement next = it.next();
        Types.MessageTypeBuilder buildMessage = Types.buildMessage();
        readTypeSchema(buildMessage, it, next.getNum_children());
        return buildMessage.named(next.name);
    }

    private static void readTypeSchema(Types.GroupBuilder<?> groupBuilder, Iterator<SchemaElement> it, int i) {
        Types.GroupBuilder groupBuilder2;
        for (int i2 = 0; i2 < i; i2++) {
            SchemaElement next = it.next();
            if (next.type == null) {
                groupBuilder2 = groupBuilder.group(Type.Repetition.valueOf(next.repetition_type.name()));
                readTypeSchema(groupBuilder2, it, next.num_children);
            } else {
                Types.GroupBuilder primitive = groupBuilder.primitive(ParquetMetadataConverter.getPrimitive(next.type), Type.Repetition.valueOf(next.repetition_type.name()));
                if (next.isSetType_length()) {
                    primitive.length(next.type_length);
                }
                if (next.isSetPrecision()) {
                    primitive.precision(next.precision);
                }
                if (next.isSetScale()) {
                    primitive.scale(next.scale);
                }
                groupBuilder2 = primitive;
            }
            LogicalTypeAnnotation logicalTypeAnnotation = null;
            if (next.isSetLogicalType()) {
                logicalTypeAnnotation = ParquetMetadataConverter.getLogicalTypeAnnotation(next.logicalType);
                groupBuilder2.as(logicalTypeAnnotation);
            }
            if (next.isSetConverted_type()) {
                LogicalTypeAnnotation logicalTypeAnnotation2 = ParquetMetadataConverter.getLogicalTypeAnnotation(next.converted_type, next);
                if (logicalTypeAnnotation == null) {
                    groupBuilder2.as(logicalTypeAnnotation2);
                } else if (logicalTypeAnnotation.toOriginalType() != logicalTypeAnnotation2.toOriginalType()) {
                    log.warn("Converted type and logical type metadata map to different OriginalType (convertedType: %s, logical type: %s). Using value in converted type.", new Object[]{next.converted_type, next.logicalType});
                    groupBuilder2.as(logicalTypeAnnotation2);
                }
            }
            if (next.isSetField_id()) {
                groupBuilder2.id(next.field_id);
            }
            groupBuilder2.named(next.name.toLowerCase(Locale.ENGLISH));
        }
    }

    public static Statistics<?> readStats(Optional<String> optional, Optional<org.apache.parquet.format.Statistics> optional2, PrimitiveType primitiveType) {
        org.apache.parquet.format.Statistics orElse = optional2.orElse(null);
        Statistics<?> fromParquetStatistics = ParquetMetadataConverter.fromParquetStatistics(optional.orElse(null), orElse, primitiveType);
        if (isStringType(primitiveType) && orElse != null && !orElse.isSetMin_value() && !orElse.isSetMax_value() && orElse.isSetMin() && orElse.isSetMax() && fromParquetStatistics.genericGetMin() == null && fromParquetStatistics.genericGetMax() == null && !CorruptStatistics.shouldIgnoreStatistics(optional.orElse(null), primitiveType.getPrimitiveTypeName())) {
            fromParquetStatistics = tryReadOldUtf8Stats(orElse, (BinaryStatistics) fromParquetStatistics);
        }
        return fromParquetStatistics;
    }

    private static boolean isStringType(PrimitiveType primitiveType) {
        if (primitiveType.getLogicalTypeAnnotation() == null) {
            return false;
        }
        return ((Boolean) primitiveType.getLogicalTypeAnnotation().accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<Boolean>() { // from class: io.trino.parquet.reader.MetadataReader.1
            public Optional<Boolean> visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalTypeAnnotation) {
                return Optional.of(Boolean.TRUE);
            }
        }).orElse(Boolean.FALSE)).booleanValue();
    }

    private static Statistics<?> tryReadOldUtf8Stats(org.apache.parquet.format.Statistics statistics, BinaryStatistics binaryStatistics) {
        byte[] copyOf;
        byte[] copyOf2;
        byte[] min = statistics.getMin();
        byte[] max = statistics.getMax();
        if (Arrays.equals(min, max)) {
            copyOf = (byte[]) min.clone();
            copyOf2 = copyOf;
        } else {
            int commonPrefix = commonPrefix(min, max);
            int i = commonPrefix;
            while (i < min.length && isAscii(min[i])) {
                i++;
            }
            int i2 = commonPrefix;
            if (i2 < max.length && i2 < min.length && isAscii(min[i2]) && isAscii(max[i2])) {
                i2++;
            }
            while (i2 > 0 && (max[i2 - 1] == Byte.MAX_VALUE || !isAscii(max[i2 - 1]))) {
                i2--;
            }
            if (i2 == 0) {
                return binaryStatistics;
            }
            copyOf = Arrays.copyOf(min, i);
            copyOf2 = Arrays.copyOf(max, i2);
            int i3 = i2 - 1;
            copyOf2[i3] = (byte) (copyOf2[i3] + 1);
        }
        return Statistics.getBuilderForReading(binaryStatistics.type()).withMin(copyOf).withMax(copyOf2).withNumNulls((binaryStatistics.isNumNullsSet() || !statistics.isSetNull_count()) ? binaryStatistics.getNumNulls() : statistics.getNull_count()).build();
    }

    private static boolean isAscii(byte b) {
        return 0 <= b;
    }

    private static int commonPrefix(byte[] bArr, byte[] bArr2) {
        int i = 0;
        while (i < bArr.length && i < bArr2.length && bArr[i] == bArr2[i]) {
            i++;
        }
        return i;
    }

    private static Set<Encoding> readEncodings(List<org.apache.parquet.format.Encoding> list) {
        HashSet hashSet = new HashSet();
        Iterator<org.apache.parquet.format.Encoding> it = list.iterator();
        while (it.hasNext()) {
            hashSet.add(ParquetMetadataConverter.getEncoding(it.next()));
        }
        return Collections.unmodifiableSet(hashSet);
    }

    private static void validateFileMetadata(ParquetDataSourceId parquetDataSourceId, FileMetadata fileMetadata, Optional<ParquetWriteValidation> optional) throws ParquetCorruptionException {
        if (optional.isEmpty()) {
            return;
        }
        ParquetWriteValidation parquetWriteValidation = optional.get();
        parquetWriteValidation.validateTimeZone(parquetDataSourceId, Optional.ofNullable(fileMetadata.getKeyValueMetaData().get("writer.time.zone")));
        parquetWriteValidation.validateColumns(parquetDataSourceId, fileMetadata.getSchema());
    }
}
