package com.chenyang.nse.bussiness.tools.dataprocess.hive.hdfs.format;

import com.chenyang.nse.bussiness.entity.db.ColumnInfo;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.orc.OrcFile;
import org.apache.orc.Reader;
import org.apache.orc.RecordReader;
import org.apache.orc.TypeDescription;

public class OrcFileReader extends HdfsFileReader {
   private final List<ColumnInfo> cols;
   private Path path;
   private Reader reader;
   private RecordReader rowIterator;
   private TypeDescription readSchema;
   private VectorizedRowBatch batch;

   public TypeDescription getSchema() {
      return this.readSchema;
   }

   public OrcFileReader(Path path, List<ColumnInfo> cols) {
      this.path = path;
      this.cols = cols;
   }

   public void load() throws IOException {
      Configuration conf = new Configuration();
      OrcFile.ReaderOptions options = new OrcFile.ReaderOptions(conf);
      this.reader = OrcFile.createReader(this.path, options);
      this.readSchema = this.reader.getSchema();
      System.out.println(this.readSchema.toJson());
      this.rowIterator = this.reader.rows();
      this.batch = this.readSchema.createRowBatch();
   }

   public DataBatch next() throws IOException {
      if (!this.rowIterator.nextBatch(this.batch)) {
         return null;
      } else {
         DataBatch dataBatch = new DataBatch();

         for(int k = 0; k < this.batch.size; ++k) {
            StructColumnVector rowVector = (StructColumnVector)this.batch.cols[5];
            ColumnVector[] fields = rowVector.fields;
            DataBatch.DataRow row = new DataBatch.DataRow();

            for(int i = 0; i < this.cols.size(); ++i) {
               ColumnInfo col = (ColumnInfo)this.cols.get(i);
               DataBatch.DataCell cell = new DataBatch.DataCell();
               cell.column = col.getColumnname();
               DataBatch.DataValue value = new DataBatch.DataValue();
               value.type = col.getTypename();
               switch (col.getTypename().toLowerCase()) {
                  case "string":
                  case "char":
                  case "varchar":
                     BytesColumnVector data = (BytesColumnVector)fields[i];
                     String sval = new String(data.vector[k], data.start[k], data.length[k], StandardCharsets.UTF_8);
                     value.value = sval;
                     break;
                  case "float":
                  case "double":
                     DoubleColumnVector dataNew = (DoubleColumnVector)fields[i];
                     double dval = dataNew.vector[k];
                     value.value = dval;
                     break;
                  case "decimal":
                     DecimalColumnVector dataNew1 = (DecimalColumnVector)fields[i];
                     HiveDecimalWritable decimal128 = dataNew1.vector[k];
                     value.value = decimal128;
                     break;
                  case "int":
                  case "integer":
                  case "long":
                     LongColumnVector dataNew2 = (LongColumnVector)fields[i];
                     long l = dataNew2.vector[k];
                     value.value = l;
                     break;
                  case "date":
                  case "datetime":
                  case "timestamp":
                     ColumnVector field = fields[i];
                     if (field instanceof DateColumnVector) {
                        DateColumnVector vector = (DateColumnVector)field;
                        String val = vector.formatDate(k);
                        value.value = val;
                     } else if (field instanceof TimestampColumnVector) {
                        TimestampColumnVector vector = (TimestampColumnVector)field;
                        long time = vector.time[k];
                        value.value = time;
                     } else {
                        LongColumnVector vector = (LongColumnVector)field;
                        long l1 = vector.vector[k];
                        value.value = l1;
                     }
               }

               cell.value = value;
               row.cells.add(cell);
            }

            dataBatch.rows.add(row);
         }

         return dataBatch;
      }
   }

   public static void main(String[] args) throws IOException {
      List<ColumnInfo> cols = new ArrayList();
      ColumnInfo col = new ColumnInfo();
      col.setTypename("string");
      col.setColumnname("id");
      cols.add(col);
      col = new ColumnInfo();
      col.setTypename("string");
      col.setColumnname("name");
      cols.add(col);
      col = new ColumnInfo();
      col.setTypename("long");
      col.setColumnname("age");
      cols.add(col);
      col = new ColumnInfo();
      col.setTypename("date");
      col.setColumnname("birth");
      cols.add(col);
      new Configuration();
      Path path = new Path("file:///d:/g.orc");
      OrcFileReader reader = new OrcFileReader(path, cols);
      reader.load();
      DataBatch next = null;

      while((next = reader.next()) != null) {
         for(DataBatch.DataRow row : next.rows) {
            List<String> datas = (List)row.cells.stream().map((x) -> x.value.value + "").collect(Collectors.toList());
            System.out.println(String.join(",", datas));
         }
      }

   }
}
