/*
 * Decompiled with CFR 0.152.
 */
package uk.ac.starlink.ttools.task;

import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Map;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.PrimitiveType;
import org.w3c.dom.NodeList;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import uk.ac.starlink.parquet.ParquetIO;
import uk.ac.starlink.parquet.ParquetStarTable;
import uk.ac.starlink.parquet.ParquetTableBuilder;
import uk.ac.starlink.parquet.ParquetUtil;
import uk.ac.starlink.table.ColumnInfo;
import uk.ac.starlink.table.StoragePolicy;
import uk.ac.starlink.table.Tables;
import uk.ac.starlink.table.ValueInfo;
import uk.ac.starlink.task.BooleanParameter;
import uk.ac.starlink.task.Environment;
import uk.ac.starlink.task.Executable;
import uk.ac.starlink.task.Parameter;
import uk.ac.starlink.task.ParameterValueException;
import uk.ac.starlink.task.StringParameter;
import uk.ac.starlink.task.Task;
import uk.ac.starlink.task.TaskException;
import uk.ac.starlink.ttools.func.Times;
import uk.ac.starlink.ttools.votlint.SaxMessager;
import uk.ac.starlink.ttools.votlint.VersionDetector;
import uk.ac.starlink.ttools.votlint.VotLintCode;
import uk.ac.starlink.ttools.votlint.VotLintContext;
import uk.ac.starlink.ttools.votlint.VotLinter;
import uk.ac.starlink.util.Bi;
import uk.ac.starlink.util.DataSource;
import uk.ac.starlink.votable.TableElement;
import uk.ac.starlink.votable.Timesys;
import uk.ac.starlink.votable.VODocument;
import uk.ac.starlink.votable.VOElement;
import uk.ac.starlink.votable.VOStarTable;
import uk.ac.starlink.votable.VOTableDOMBuilder;
import uk.ac.starlink.votable.VOTableVersion;

public class VOParquetLint
implements Task {
    private final Parameter<String> locParam_;
    private final Parameter<String> reportParam_;
    private final BooleanParameter ucdParam_;
    private final BooleanParameter unitParam_;
    private final BooleanParameter timeParam_;
    private final StringParameter votableParam_;
    private final BooleanParameter voparquetParam_;
    private final Parameter<?>[] params_;
    private static final VOTableVersion DFLT_VOTABLE_VERSION = VOTableVersion.V15;
    private static final double UNIX_EPOCH_AS_MJD = Times.mjdToJd(Times.unixMillisToMjd(0L));

    public VOParquetLint() {
        ArrayList<Object> paramList = new ArrayList<Object>();
        this.locParam_ = new StringParameter("in");
        this.locParam_.setPosition(1);
        this.locParam_.setPrompt("Location of parquet file");
        this.locParam_.setUsage("<filename>");
        this.locParam_.setDescription(new String[]{"<p>Name of the parquet file to check.", "</p>"});
        paramList.add(this.locParam_);
        this.voparquetParam_ = new BooleanParameter("voparquet");
        this.voparquetParam_.setPrompt("Dataless VOTable required?");
        this.voparquetParam_.setBooleanDefault(false);
        this.voparquetParam_.setDescription(new String[]{"<p>Configures whether a data-less VOTable is required", "in the parquet file or not.", "If this parameter is true, absence of any metadata VOTable", "will generate an Error report.", "Otherwise, it will merely result in an Info report.", "</p>"});
        paramList.add(this.voparquetParam_);
        this.reportParam_ = new StringParameter("report");
        this.reportParam_.setPrompt("Message types to report");
        this.reportParam_.setUsage("[EWI]+");
        this.reportParam_.setDescription(new String[]{"<p>Letters indicating which message types should be output.", "Each character of the string is one of the letters", "<code>E</code> (for Error),", "<code>W</code> (for Warning) and", "<code>I</code> (for Info).", "So to suppress Info messages", "set the value to \"<code>EW</code>\".", "</p>"});
        this.reportParam_.setStringDefault("EWI");
        paramList.add(this.reportParam_);
        this.ucdParam_ = new BooleanParameter("ucd");
        this.ucdParam_.setBooleanDefault(true);
        this.ucdParam_.setPrompt("Check ucd attributes for UCD1+ syntax?");
        this.ucdParam_.setDescription(new String[]{"<p>If true, the <code>ucd</code> attributes", "on <code>FIELD</code> and <code>PARAM</code> elements etc", "in the VOTable metadata table", "are checked for conformance against the UCD1+ standard", "or a list of known UCD1 terms.", "</p>"});
        paramList.add(this.ucdParam_);
        this.unitParam_ = new BooleanParameter("unit");
        this.unitParam_.setNullPermitted(true);
        this.unitParam_.setPrompt("Check unit attributes for VOUnit syntax?");
        this.unitParam_.setDescription(new String[]{"<p>If true, the <code>unit</code> attributes", "on <code>FIELD</code> and <code>PARAM</code> elements", "are checked for conformance against the VOUnits standard;", "if false, no such checks are made.", "</p>", "<p>The VOTable standard version 1.4 and later", "recommends use of VOUnits", "(there are some inconsistencies in the text on this topic", "in VOTable 1.4, but these are cleared up in V1.5).", "Earlier VOTable versions refer to a different (CDS) unit syntax,", "which is not checked by this tool.", "So by default unit syntax is checked when the VOTable is 1.4", "or greater, and not for earlier versions,", "but that can be overridden by giving a <code>true</code>", "or <code>false</code> value for this parameter.", "</p>", "<p>The wording of the VOTable and VOUnit standards", "do not strictly require use of VOUnit syntax even at VOTable 1.4,", "so failed checks result in Warning rather than Error reports.", "</p>"});
        paramList.add(this.unitParam_);
        this.timeParam_ = new BooleanParameter("time");
        this.timeParam_.setBooleanDefault(true);
        this.timeParam_.setPrompt("Check TIMESTAMP/DATE columns");
        this.timeParam_.setDescription(new String[]{"<p>If true, then parquet columns with the logical types", "<code>TIMESTAMP</code> or <code>DATE</code>", "will be checked against their VOTable counterparts", "for suitable metadata.", "Since parquet TIMESTAMP and DATE columns have an associated unit,", "a Warning is reported when the corresponding VOTable FIELD", "does not declare the same unit attribute.", "Since parquet TIMESTAMP and DATE columns also have an implicit", "zero point", "(the Unix epoch 1970-01-01, equivalent to JD 2440587.5),", "a Warning is also reported if no compatible TIMESYS element", "is referenced by the corresponding VOTable FIELD.", "If this parameter is set false, no such reports are made.", "</p>"});
        paramList.add(this.timeParam_);
        this.votableParam_ = new StringParameter("votable");
        this.votableParam_.setNullPermitted(true);
        this.votableParam_.setPrompt("Location of data-less VOTable");
        this.votableParam_.setUsage("<filename-or-url>");
        this.votableParam_.setDescription(new String[]{"<p>This parameter can be used to specify the location", "(filename or URL) of a data-less VOTable document", "that describes the parquet file under evaluation.", "Normally this is not necessary, since the VOTable is found in a", "well-known location in the metadata of the parquet file itself,", "as specified by the", "<webref url='https://www.ivoa.net/documents/Notes/VOParquet/'", ">VOParquet convention</webref>.", "However if this parameter is set to a non-blank value then", "the internal VOTable, if any, will be ignored,", "and the UTF-8-encoded VOTable at the supplied location", "will be used instead.", "This can be useful when debugging a VOParquet file.", "</p>"});
        paramList.add(this.votableParam_);
        this.params_ = paramList.toArray(new Parameter[0]);
    }

    public String getPurpose() {
        return "Checks parquet file compliance with VOParquet convention";
    }

    public Parameter<?>[] getParameters() {
        return this.params_;
    }

    public Executable createExecutable(Environment env) throws TaskException {
        String loc = this.locParam_.stringValue(env);
        boolean requireVoparquet = this.voparquetParam_.booleanValue(env);
        boolean tryUrl = false;
        ParquetTableBuilder builder = new ParquetTableBuilder();
        String rtypes = this.reportParam_.stringValue(env);
        boolean[] ewiFlags = new boolean[3];
        block5: for (int ic = 0; ic < rtypes.length(); ++ic) {
            switch (rtypes.charAt(ic)) {
                case 'E': 
                case 'e': {
                    ewiFlags[0] = true;
                    continue block5;
                }
                case 'W': 
                case 'w': {
                    ewiFlags[1] = true;
                    continue block5;
                }
                case 'I': 
                case 'i': {
                    ewiFlags[2] = true;
                    continue block5;
                }
                default: {
                    throw new ParameterValueException(this.reportParam_, "Not of form [EWI]+");
                }
            }
        }
        Reporter reporter = new Reporter(env.getOutputStream(), ewiFlags);
        String votableLoc = this.votableParam_.stringValue(env);
        final boolean checkUcd = this.ucdParam_.booleanValue(env);
        final Boolean checkUnit = (Boolean)this.unitParam_.objectValue(env);
        final boolean checkTime = this.timeParam_.booleanValue(env);
        LintConfig lintConfig = new LintConfig(){

            @Override
            public boolean isCheckUcd() {
                return checkUcd;
            }

            @Override
            public Boolean isCheckUnit() {
                return checkUnit;
            }

            @Override
            public boolean isCheckTime() {
                return checkTime;
            }
        };
        return () -> {
            String votableTxt;
            ParquetIO parquetIo = ParquetUtil.getIO();
            DataSource datsrc = DataSource.makeDataSource((String)loc);
            boolean useCache = false;
            ParquetStarTable.Config config = new ParquetStarTable.Config(){

                public boolean includeUnsupportedColumns() {
                    return true;
                }
            };
            ParquetStarTable parquetTable = parquetIo.readParquet(datsrc, builder, config, useCache, tryUrl);
            if (votableLoc != null) {
                votableTxt = ParquetTableBuilder.readUtf8FromLocation((String)votableLoc);
                parquetTable.setVOTableMetadataText(votableTxt);
            }
            this.checkVOParquetKeys(parquetTable, reporter);
            votableTxt = parquetTable.getVOTableMetadataText();
            if (votableTxt == null) {
                String msg = "No VOTable document found attached to parquet file";
                if (requireVoparquet) {
                    reporter.error(msg);
                } else {
                    reporter.info(msg);
                }
            } else {
                Bi<VODocument, VOTableVersion> parseResult = this.validatingParseVOTable(votableTxt, lintConfig, reporter);
                VODocument vodoc = (VODocument)parseResult.getItem1();
                VOTableVersion votVersion = (VOTableVersion)parseResult.getItem2();
                TableElement tableEl = this.getVOParquetTableElement(vodoc, reporter);
                if (tableEl == null) {
                    reporter.error("No DATA-less TABLE element found in attached VOTable document");
                } else {
                    VOStarTable votable;
                    try {
                        votable = new VOStarTable(tableEl);
                    }
                    catch (IOException e) {
                        reporter.error("Failed to interpret TABLE element: " + e);
                        votable = null;
                    }
                    if (votable != null) {
                        this.compareMetadata(parquetTable, votable, votVersion, lintConfig, reporter);
                    }
                }
            }
        };
    }

    private void checkVOParquetKeys(ParquetStarTable ptable, Reporter reporter) {
        String votmetaTxt = null;
        String votmetaVersion = null;
        for (Map.Entry entry : ptable.getExtraMetadataMap().entrySet()) {
            String key = (String)entry.getKey();
            String value = (String)entry.getValue();
            if ("IVOA.VOTable-Parquet.content".equals(key)) {
                votmetaTxt = value;
                continue;
            }
            if ("IVOA.VOTable-Parquet.version".equals(key)) {
                votmetaVersion = value;
                continue;
            }
            if (!key.toLowerCase().startsWith("IVOA.VOTable-Parquet.".toLowerCase())) continue;
            reporter.warning("Unknown key " + key + " found in or near VOParquet namespace (not " + "IVOA.VOTable-Parquet.content" + " or " + "IVOA.VOTable-Parquet.version" + ")");
        }
        if (votmetaTxt != null) {
            if (votmetaVersion == null) {
                reporter.error("IVOA.VOTable-Parquet.content present but IVOA.VOTable-Parquet.version missing");
            } else if (!"1.0".equals(votmetaVersion)) {
                reporter.warning("IVOA.VOTable-Parquet.content present but IVOA.VOTable-Parquet.version has unknown value " + votmetaVersion + " (not " + "1.0" + ")");
            }
        } else if ("1.0".equals(votmetaVersion)) {
            reporter.error("IVOA.VOTable-Parquet.content = " + votmetaVersion + " but " + "IVOA.VOTable-Parquet.content" + " is missing");
        } else if (votmetaVersion != null) {
            reporter.warning("IVOA.VOTable-Parquet.version has unknown value " + votmetaVersion + ", " + "IVOA.VOTable-Parquet.content" + " not present");
        }
    }

    private Bi<VODocument, VOTableVersion> validatingParseVOTable(String votableTxt, LintConfig config, final Reporter reporter) {
        String versionString;
        SaxMessager messager = new SaxMessager(){

            @Override
            public void reportMessage(SaxMessager.Level level, VotLintCode code, String msg, Locator locator) {
                String txt = "[" + code + "] " + msg;
                switch (level) {
                    case INFO: {
                        reporter.info(txt);
                        break;
                    }
                    case WARNING: {
                        reporter.warning(txt);
                        break;
                    }
                    case ERROR: {
                        reporter.error(txt);
                        break;
                    }
                    default: {
                        assert (false);
                        reporter.error(txt);
                    }
                }
            }
        };
        try {
            versionString = VersionDetector.getVersionString(new BufferedInputStream(new ByteArrayInputStream(votableTxt.getBytes(StandardCharsets.UTF_8))));
        }
        catch (IOException e) {
            reporter.error(e.toString());
            versionString = null;
        }
        VOTableVersion version = (VOTableVersion)VOTableVersion.getKnownVersions().get(versionString);
        if (version == null) {
            if (versionString == null) {
                reporter.warning("VOTable version undeclared: use default value " + version);
            }
            version = DFLT_VOTABLE_VERSION;
        }
        boolean validate = true;
        VotLintContext vlContext = new VotLintContext(version, validate, messager);
        vlContext.setCheckUcd(config.isCheckUcd());
        Boolean checkUnit = config.isCheckUnit();
        vlContext.setCheckUnit(checkUnit == null ? version.isVOUnitSyntax() : checkUnit.booleanValue());
        VOTableDOMBuilder domHandler = new VOTableDOMBuilder(StoragePolicy.PREFER_MEMORY, true);
        try {
            XMLReader parser = new VotLinter(vlContext).createParser((ContentHandler)domHandler);
            parser.parse(new InputSource(new StringReader(votableTxt)));
        }
        catch (IOException | SAXException e) {
            reporter.error("VOTable parsing failed: " + e);
        }
        return new Bi((Object)domHandler.getDocument(), (Object)version);
    }

    private TableElement getVOParquetTableElement(VODocument vodoc, Reporter reporter) {
        VOElement topel = (VOElement)vodoc.getDocumentElement();
        NodeList tlist = topel.getElementsByVOTagName("TABLE");
        if (tlist.getLength() == 0) {
            reporter.error("No TABLE elements in VOTable");
            return null;
        }
        TableElement tableEl = (TableElement)tlist.item(0);
        if (tableEl.getChildByName("DATA") != null) {
            reporter.error("First TABLE element has a DATA child; use it anyway");
        }
        return tableEl;
    }

    private void compareMetadata(ParquetStarTable pTable, VOStarTable vTable, VOTableVersion votVersion, LintConfig config, Reporter reporter) {
        int nc = pTable.getColumnCount();
        if (vTable.getColumnCount() != nc) {
            reporter.error("Column count mismatch: parquet has " + pTable.getColumnCount() + ", VOTable has " + vTable.getColumnCount());
            return;
        }
        for (int ic = 0; ic < nc; ++ic) {
            String msg;
            String tunit;
            Class vClazz;
            String vName;
            String ic1 = Integer.toString(ic + 1);
            ColumnInfo pInfo = pTable.getColumnInfo(ic);
            ColumnInfo vInfo = vTable.getColumnInfo(ic);
            ColumnDescriptor pDescriptor = pTable.getInputColumn(ic).getColumnDescriptor();
            PrimitiveType ptype = pDescriptor.getPrimitiveType();
            LogicalTypeAnnotation ltype = ptype.getLogicalTypeAnnotation();
            String pName = pInfo.getName();
            if (!pName.equals(vName = vInfo.getName())) {
                reporter.warning("Column name mismatch at column " + ic1 + ": parquet is \"" + pName + "\", VOTable is \"" + vName + "\"");
            }
            if (Boolean.TRUE.equals(pInfo.getAuxDatumValue(ParquetStarTable.UNSUPPORTED_INFO, Boolean.class))) {
                String msg2 = new StringBuffer().append("Parquet column #").append(ic1).append(" (").append(ptype == null ? "?" : ptype.toString()).append(") ").append("not supported by STIL").toString();
                reporter.warning(msg2);
                continue;
            }
            String colTxt = "Column " + pName + " (#" + ic1 + "): ";
            Class pClazz = pInfo.getContentClass();
            if (!pClazz.equals(vClazz = vInfo.getContentClass())) {
                boolean isUnsignedPromotion = pClazz.equals(Byte.class) && vClazz.equals(Short.class) || pClazz.equals(Short.class) && vClazz.equals(Integer.class) || pClazz.equals(Integer.class) && vClazz.equals(Long.class) || pClazz.equals(byte[].class) && vClazz.equals(short[].class) || pClazz.equals(short[].class) && vClazz.equals(int[].class) || pClazz.equals(int[].class) && vClazz.equals(long[].class);
                String msg3 = new StringBuffer().append(colTxt).append("parquet/VOTable type mismatch, ").append(pClazz.getSimpleName()).append(" != ").append(vClazz.getSimpleName()).toString();
                if (isUnsignedPromotion) {
                    reporter.info(msg3 + " - probably something to do with signed/unsigned integers");
                } else {
                    reporter.warning(msg3);
                }
            }
            if (!config.isCheckTime() || !(ltype instanceof LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) && !(ltype instanceof LogicalTypeAnnotation.DateLogicalTypeAnnotation)) continue;
            if (ltype instanceof LogicalTypeAnnotation.DateLogicalTypeAnnotation) {
                tunit = "d";
            } else if (ltype instanceof LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) {
                switch (((LogicalTypeAnnotation.TimestampLogicalTypeAnnotation)ltype).getUnit()) {
                    case MILLIS: {
                        tunit = "ms";
                        break;
                    }
                    case MICROS: {
                        tunit = "us";
                        break;
                    }
                    case NANOS: {
                        tunit = "ns";
                        break;
                    }
                    default: {
                        assert (false);
                        tunit = null;
                        break;
                    }
                }
            } else {
                assert (false);
                tunit = null;
            }
            String vunit = vInfo.getUnitString();
            String voriginTxt = (String)Tables.getAuxDatumValue((ValueInfo)vInfo, (ValueInfo)VOStarTable.TIMESYS_TIMEORIGIN_INFO, String.class);
            double vorigin = Timesys.decodeTimeorigin((String)voriginTxt);
            double torigin = UNIX_EPOCH_AS_MJD;
            if (tunit != null) {
                if (vunit == null) {
                    msg = new StringBuffer().append(colTxt).append("missing VOTable units for ").append(ltype).append("; suggest ").append("unit='").append(tunit).append("'").toString();
                    reporter.warning(msg);
                } else if (!vunit.equals(tunit)) {
                    msg = new StringBuffer().append(colTxt).append("unit mismatch, ").append("parquet ").append(ltype).append(", votable unit='").append(vunit).append("', should be '").append(tunit).append("'").toString();
                    reporter.error(msg);
                }
            }
            if (Double.isNaN(torigin)) continue;
            if (Double.isNaN(vorigin) && votVersion.allowTimesys()) {
                msg = new StringBuffer().append(colTxt).append("missing TIMESYS for ").append(ltype).append("; suggest ").append("<TIMESYS timeorigin='").append(torigin).append("' timescale='UTC' .../>").toString();
                reporter.warning(msg);
                continue;
            }
            if (vorigin == torigin) continue;
            msg = new StringBuffer().append(colTxt).append("time origin mismatch, ").append("parquet ").append(ltype).append(", votable TIMESYS timeorigin=").append(vorigin).append(", should be Unix epoch in JD = ").append(torigin).toString();
            reporter.error(msg);
        }
    }

    private static interface LintConfig {
        public boolean isCheckUcd();

        public Boolean isCheckUnit();

        public boolean isCheckTime();
    }

    private static class Reporter {
        private final PrintStream out_;
        private final boolean hasError_;
        private final boolean hasWarning_;
        private final boolean hasInfo_;

        Reporter(PrintStream out, boolean[] ewiFlags) {
            this.out_ = out;
            this.hasError_ = ewiFlags[0];
            this.hasWarning_ = ewiFlags[1];
            this.hasInfo_ = ewiFlags[2];
        }

        public void info(String txt) {
            if (this.hasInfo_) {
                this.report("INFO", txt);
            }
        }

        public void warning(String txt) {
            if (this.hasWarning_) {
                this.report("WARNING", txt);
            }
        }

        public void error(String txt) {
            if (this.hasError_) {
                this.report("ERROR", txt);
            }
        }

        private void report(String level, String txt) {
            this.out_.println((level == null ? "" : level + ": ") + txt);
        }
    }
}

