package io.prophecy.libs.data;

import io.prophecy.libs.data.DataMatcher;
import io.prophecy.libs.package$;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.StructType;
import scala.Array$;
import scala.MatchError;
import scala.None$;
import scala.Option$;
import scala.Predef$;
import scala.Predef$ArrowAssoc$;
import scala.Some;
import scala.Tuple2;
import scala.Tuple3;
import scala.collection.Iterator;
import scala.collection.SeqLike;
import scala.collection.SetLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.$colon;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.Set$;
import scala.collection.mutable.ArrayOps;
import scala.collection.mutable.Map$;
import scala.collection.mutable.Set;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.IntRef;
import scala.util.control.Breaks$;

/* compiled from: DataMatcher.scala */
/* loaded from: input_file:io/prophecy/libs/data/DataMatcher$.class */
public final class DataMatcher$ {
    public static DataMatcher$ MODULE$;
    private volatile boolean bitmap$init$0;

    static {
        new DataMatcher$();
    }

    public void prettyDataMatch(Dataset<Row> dataset, Dataset<Row> dataset2, List<String> list) {
        long count = dataset.count();
        long count2 = dataset2.count();
        Predef$.MODULE$.println(new StringBuilder(39).append("New data count: ").append(count).append("; Original data count: ").append(count2).toString());
        if (count == count2) {
            Predef$.MODULE$.println("✅ Data counts are matching!");
        } else {
            Predef$.MODULE$.println("⛔️ Data counts are not matching!");
        }
        Predef$.MODULE$.println("\nMatching column by column:");
        IntRef create = IntRef.create(0);
        IntRef create2 = IntRef.create(0);
        int length = dataset2.columns().length;
        ((List) dataMatch(dataset, dataset2, list).zipWithIndex(List$.MODULE$.canBuildFrom())).foreach(tuple2 -> {
            $anonfun$prettyDataMatch$1(length, create, create2, tuple2);
            return BoxedUnit.UNIT;
        });
        Predef$.MODULE$.println(new StringBuilder(66).append("\nData matching finished. Found ").append(create.elem).append(" matching and ").append(create2.elem).append(" mismatching columns.").toString());
    }

    public List<DataMatcher.Result> dataMatch(Dataset<Row> dataset, Dataset<Row> dataset2, List<String> list) {
        Iterator flatMap = new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(dataset2.columns())).map(str -> {
            return str.toLowerCase();
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class))))).toSet().grouped(100).flatMap(set -> {
            List list2 = ((TraversableOnce) set.$plus$plus(((TraversableOnce) list.map(str2 -> {
                return str2.toLowerCase();
            }, List$.MODULE$.canBuildFrom())).toSet()).map(str3 -> {
                return functions$.MODULE$.col(str3);
            }, Set$.MODULE$.canBuildFrom())).toList();
            return MODULE$.dataMatchPartial(dataset.select(list2), dataset2.select(list2), (List) list.map(str4 -> {
                return str4.toLowerCase();
            }, List$.MODULE$.canBuildFrom()));
        });
        Set apply = scala.collection.mutable.Set$.MODULE$.apply(Nil$.MODULE$);
        return flatMap.flatMap(result -> {
            if (apply.contains(result.columnName())) {
                return Option$.MODULE$.option2Iterable(None$.MODULE$);
            }
            apply.$plus$eq(result.columnName());
            return Option$.MODULE$.option2Iterable(new Some(result));
        }).toList();
    }

    private List<DataMatcher.Result> dataMatchPartial(Dataset<Row> dataset, Dataset<Row> dataset2, List<String> list) {
        scala.collection.immutable.Set set = new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(dataset2.columns())).map(str -> {
            return str.toLowerCase();
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class))))).toSet();
        Dataset join = dataset.join(dataset2.select(((SetLike) set.map(str2 -> {
            return functions$.MODULE$.col(str2).alias(new StringBuilder(9).append("original_").append(str2).toString());
        }, Set$.MODULE$.canBuildFrom())).toSeq()), (Column) list.foldLeft(functions$.MODULE$.lit(BoxesRunTime.boxToBoolean(true)), (column, str3) -> {
            Tuple2 tuple2 = new Tuple2(column, str3);
            if (tuple2 == null) {
                throw new MatchError(tuple2);
            }
            Column column = (Column) tuple2._1();
            String str3 = (String) tuple2._2();
            return column.and(functions$.MODULE$.col(str3).$eq$eq$eq(functions$.MODULE$.col(new StringBuilder(9).append("original_").append(str3).toString())));
        }));
        Dataset select = join.select(new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(join.columns())).filter(str4 -> {
            return BoxesRunTime.boxToBoolean($anonfun$dataMatchPartial$6(str4));
        }))).map(str5 -> {
            return functions$.MODULE$.when(functions$.MODULE$.col(str5).isNull().$amp$amp(functions$.MODULE$.col(new StringBuilder(9).append("original_").append(str5).toString()).isNull()).$bar$bar(functions$.MODULE$.col(str5).$eq$eq$eq(functions$.MODULE$.col(new StringBuilder(9).append("original_").append(str5).toString()))), functions$.MODULE$.lit("MATCH")).otherwise(functions$.MODULE$.lit("MISMATCH")).as(new StringBuilder(6).append("match_").append(str5).toString());
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Column.class))))).toList().$colon$colon$colon(((TraversableOnce) set.flatMap(str6 -> {
            return new $colon.colon(functions$.MODULE$.lit(BoxesRunTime.boxToLong(0L)).as(new StringBuilder(12).append("match_count_").append(str6).toString()), new $colon.colon(functions$.MODULE$.lit(BoxesRunTime.boxToLong(0L)).as(new StringBuilder(15).append("mismatch_count_").append(str6).toString()), Nil$.MODULE$));
        }, Set$.MODULE$.canBuildFrom())).toList()).$colon$colon$colon(new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(join.columns())).map(str7 -> {
            return functions$.MODULE$.col(str7);
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Column.class))))).toList()));
        StructType schema = select.schema();
        String[] strArr = (String[]) set.toArray(ClassTag$.MODULE$.apply(String.class));
        GenericRowWithSchema genericRowWithSchema = new GenericRowWithSchema((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(schema.fields())).map(structField -> {
            if (structField.name().startsWith("match_count_") || structField.name().startsWith("mismatch_count_")) {
                return BoxesRunTime.boxToLong(0L);
            }
            return null;
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.Any())), schema);
        Row row = (Row) package$.MODULE$.measure(() -> {
            return (Row) select.rdd().fold(genericRowWithSchema, (row2, row3) -> {
                Tuple2 tuple2 = new Tuple2(row2, row3);
                if (tuple2 == null) {
                    throw new MatchError(tuple2);
                }
                Row row2 = (Row) tuple2._1();
                Row row3 = (Row) tuple2._2();
                Map map = new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(strArr)).flatMap(str8 -> {
                    return new ArrayOps.ofRef($anonfun$dataMatchPartial$11(row3, row2, str8));
                }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class))))).toMap(Predef$.MODULE$.$conforms());
                return new GenericRowWithSchema((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(schema.fields())).map(structField2 -> {
                    if (structField2.name().startsWith("match_count_") || structField2.name().startsWith("mismatch_count_")) {
                        return map.apply(structField2.name());
                    }
                    return null;
                }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.Any())), schema);
            });
        }, "Data matching - counting matches and mismatches");
        scala.collection.immutable.Set set2 = (scala.collection.immutable.Set) set.map(str8 -> {
            return new Tuple3(str8, BoxesRunTime.boxToLong(field$1(new StringBuilder(15).append("mismatch_count_").append(str8).toString(), row)), BoxesRunTime.boxToLong(field$1(new StringBuilder(12).append("match_count_").append(str8).toString(), row)));
        }, Set$.MODULE$.canBuildFrom());
        scala.collection.immutable.Set set3 = (scala.collection.immutable.Set) ((SetLike) set2.filter(tuple3 -> {
            return BoxesRunTime.boxToBoolean($anonfun$dataMatchPartial$14(tuple3));
        })).map(tuple32 -> {
            return (String) tuple32._1();
        }, Set$.MODULE$.canBuildFrom());
        Dataset filter = select.filter((Column) ((TraversableOnce) set.map(str9 -> {
            return functions$.MODULE$.col(new StringBuilder(6).append("match_").append(str9).toString()).$eq$eq$eq("MISMATCH");
        }, Set$.MODULE$.canBuildFrom())).reduce((column2, column3) -> {
            return column2.or(column3);
        }));
        scala.collection.mutable.Map apply = Map$.MODULE$.apply(Nil$.MODULE$);
        java.util.Iterator localIterator = filter.limit(100).toLocalIterator();
        package$.MODULE$.measure(() -> {
            Breaks$.MODULE$.breakable(() -> {
                while (localIterator.hasNext()) {
                    Row row2 = (Row) localIterator.next();
                    set3.foreach(str10 -> {
                        String string = row2.getString(row2.fieldIndex(new StringBuilder(6).append("match_").append(str10).toString()));
                        if (string != null ? !string.equals("MISMATCH") : "MISMATCH" != 0) {
                            return BoxedUnit.UNIT;
                        }
                        String str10 = (String) Option$.MODULE$.apply(row2.get(row2.fieldIndex(str10))).map(obj -> {
                            return obj.toString();
                        }).getOrElse(() -> {
                            return "null";
                        });
                        String str11 = (String) Option$.MODULE$.apply(row2.get(row2.fieldIndex(new StringBuilder(9).append("original_").append(str10).toString()))).map(obj2 -> {
                            return obj2.toString();
                        }).getOrElse(() -> {
                            return "null";
                        });
                        List list2 = (List) apply.getOrElse(str10, () -> {
                            return Nil$.MODULE$;
                        });
                        return list2.size() < 5 ? apply.put(str10, list2.$colon$colon(new Tuple2(str10, str11))) : BoxedUnit.UNIT;
                    });
                    if (apply.forall(tuple2 -> {
                        return BoxesRunTime.boxToBoolean($anonfun$dataMatchPartial$26(tuple2));
                    })) {
                        throw Breaks$.MODULE$.break();
                    }
                }
            });
        }, "Data matching - fetching mimsmatched examples");
        return ((TraversableOnce) set2.map(tuple33 -> {
            if (tuple33 == null) {
                throw new MatchError(tuple33);
            }
            String str10 = (String) tuple33._1();
            return new DataMatcher.Result(str10, BoxesRunTime.unboxToLong(tuple33._2()), BoxesRunTime.unboxToLong(tuple33._3()), (List) apply.getOrElse(str10, () -> {
                return Nil$.MODULE$;
            }));
        }, Set$.MODULE$.canBuildFrom())).toList();
    }

    public DataMatcher.Results Results(List<DataMatcher.Result> list) {
        return new DataMatcher.Results(list);
    }

    public static final /* synthetic */ void $anonfun$prettyDataMatch$1(int i, IntRef intRef, IntRef intRef2, Tuple2 tuple2) {
        BoxedUnit boxedUnit;
        if (tuple2 == null) {
            throw new MatchError(tuple2);
        }
        DataMatcher.Result result = (DataMatcher.Result) tuple2._1();
        int _2$mcI$sp = tuple2._2$mcI$sp();
        if (result.mismatches() <= 0) {
            Predef$.MODULE$.println(new StringBuilder(61).append("\t✅ (").append(_2$mcI$sp).append("/").append(i).append(") Column `").append(result.columnName()).append("` successful: matched ").append(result.matches()).append(" rows / mismatched ").append(result.mismatches()).append(" rows").toString());
            intRef.elem++;
            boxedUnit = BoxedUnit.UNIT;
        } else {
            Predef$.MODULE$.println(new StringBuilder(58).append("\t⛔️ (").append(_2$mcI$sp).append("/").append(i).append(") Column `").append(result.columnName()).append("` failed: matched ").append(result.matches()).append(" rows / mismatched ").append(result.mismatches()).append(" rows").toString());
            Predef$.MODULE$.println(new StringBuilder(17).append("\tFailed records: ").append(result.prettyMisMatchExamples()).toString());
            Predef$.MODULE$.println("");
            intRef2.elem++;
            boxedUnit = BoxedUnit.UNIT;
        }
    }

    public static final /* synthetic */ boolean $anonfun$dataMatchPartial$6(String str) {
        return !str.startsWith("original_");
    }

    public static final /* synthetic */ Object[] $anonfun$dataMatchPartial$11(Row row, Row row2, String str) {
        Tuple2.mcJJ.sp spVar;
        String sb = new StringBuilder(6).append("match_").append(str).toString();
        if (row.isNullAt(row.fieldIndex(sb))) {
            spVar = new Tuple2.mcJJ.sp(row.getLong(row.fieldIndex(new StringBuilder(12).append("match_count_").append(str).toString())), row.getLong(row.fieldIndex(new StringBuilder(15).append("mismatch_count_").append(str).toString())));
        } else {
            String string = row.getString(row.fieldIndex(sb));
            spVar = new Tuple2.mcJJ.sp((string != null ? !string.equals("MATCH") : "MATCH" != 0) ? 0L : 1L, (string != null ? !string.equals("MISMATCH") : "MISMATCH" != 0) ? 0L : 1L);
        }
        Tuple2.mcJJ.sp spVar2 = spVar;
        if (spVar2 == null) {
            throw new MatchError(spVar2);
        }
        Tuple2.mcJJ.sp spVar3 = new Tuple2.mcJJ.sp(spVar2._1$mcJ$sp(), spVar2._2$mcJ$sp());
        return Predef$.MODULE$.refArrayOps(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$u2192$extension(Predef$.MODULE$.ArrowAssoc(new StringBuilder(12).append("match_count_").append(str).toString()), BoxesRunTime.boxToLong(row2.getLong(row2.fieldIndex(new StringBuilder(12).append("match_count_").append(str).toString())) + spVar3._1$mcJ$sp())), Predef$ArrowAssoc$.MODULE$.$u2192$extension(Predef$.MODULE$.ArrowAssoc(new StringBuilder(15).append("mismatch_count_").append(str).toString()), BoxesRunTime.boxToLong(row2.getLong(row2.fieldIndex(new StringBuilder(15).append("mismatch_count_").append(str).toString())) + spVar3._2$mcJ$sp()))});
    }

    private static final long field$1(String str, Row row) {
        return BoxesRunTime.unboxToLong(row.getAs(str));
    }

    public static final /* synthetic */ boolean $anonfun$dataMatchPartial$14(Tuple3 tuple3) {
        return BoxesRunTime.unboxToLong(tuple3._2()) > 0;
    }

    public static final /* synthetic */ boolean $anonfun$dataMatchPartial$26(Tuple2 tuple2) {
        return ((SeqLike) tuple2._2()).size() > 5;
    }

    private DataMatcher$() {
        MODULE$ = this;
    }
}
