package comirva.web.ie.countryoforigin;

import comirva.util.external.TextFormatTool;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Vector;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TermsFilter;
import org.apache.lucene.store.SimpleFSDirectory;

/* loaded from: input_file:comirva/web/ie/countryoforigin/CountryOfOriginDeterminer_TextDistance.class */
public class CountryOfOriginDeterminer_TextDistance {
    protected static BufferedWriter BW;
    private static IndexReader reader;
    private static Searcher searcher;
    public static boolean USE_SYNONYMS = false;
    protected static final File TERMS_FILE = new File("C:/Research/Teaching and Supervision/Bachelor's Theses/Conny/nationality/countries_synonyms_terms.txt");
    protected static ArrayList<String> artists = new ArrayList<>();
    protected static ArrayList<String> countries = new ArrayList<>();
    protected static ArrayList<String> allCountries = new ArrayList<>();
    private static Vector<String> terms = new Vector<>();
    private static Hashtable<String, Vector<String>> ct = new Hashtable<>();
    private static Analyzer analyzer = new CountryOfOriginAnalyzer();
    private static String field = "contents";
    private static String[] KEY_TERMS = {"born", "founded"};
    protected static HashMap<String, Float> keyCountrySumDistance = null;
    protected static HashMap<String, Float> keyCountryAvgDistance = null;
    protected static HashMap<String, Float> keyCountryMinDistance = null;

    public void run() {
        new Date();
        try {
            int size = artists.size();
            int i = 0;
            int size2 = artists.size();
            for (int i2 = 0; i2 < size2; i2++) {
                String str = artists.get(i2);
                String str2 = countries.get(i2);
                String removeUnwantedChars = TextFormatTool.removeUnwantedChars(str);
                new Hashtable();
                TermsFilter termsFilter = new TermsFilter();
                termsFilter.addTerm(new Term("artist", removeUnwantedChars));
                MatchAllDocsQuery matchAllDocsQuery = new MatchAllDocsQuery();
                keyCountrySumDistance = new HashMap<>();
                keyCountryAvgDistance = new HashMap<>();
                keyCountryMinDistance = new HashMap<>();
                new Vector();
                searcher.search(matchAllDocsQuery.createWeight(searcher), termsFilter, new Collector() { // from class: comirva.web.ie.countryoforigin.CountryOfOriginDeterminer_TextDistance.1
                    private int docBase;

                    public void setScorer(Scorer scorer) {
                    }

                    public boolean acceptsDocsOutOfOrder() {
                        return true;
                    }

                    public void collect(int i3) {
                        int indexOf;
                        int indexOf2;
                        try {
                            ArrayList arrayList = new ArrayList();
                            TermPositionVector termFreqVector = CountryOfOriginDeterminer_TextDistance.reader.getTermFreqVector(i3, CountryOfOriginDeterminer_TextDistance.field);
                            if (termFreqVector != null) {
                                for (int i4 = 0; i4 < CountryOfOriginDeterminer_TextDistance.KEY_TERMS.length; i4++) {
                                    int indexOf3 = termFreqVector.indexOf(CountryOfOriginDeterminer_TextDistance.KEY_TERMS[i4]);
                                    if (indexOf3 != -1) {
                                        for (int i5 : termFreqVector.getTermPositions(indexOf3)) {
                                            arrayList.add(Integer.valueOf(i5));
                                        }
                                    }
                                }
                                if (arrayList.isEmpty()) {
                                    return;
                                }
                                for (int i6 = 0; i6 < CountryOfOriginDeterminer_TextDistance.allCountries.size(); i6++) {
                                    String str3 = CountryOfOriginDeterminer_TextDistance.allCountries.get(i6);
                                    Vector vector = (Vector) CountryOfOriginDeterminer_TextDistance.ct.get(TextFormatTool.removeUnwantedChars(str3));
                                    if (CountryOfOriginDeterminer_TextDistance.USE_SYNONYMS && vector != null && !vector.isEmpty()) {
                                        Iterator it = vector.iterator();
                                        while (it.hasNext()) {
                                            String str4 = (String) it.next();
                                            String[] split = str4.toLowerCase().split(" ");
                                            if (split.length > 1) {
                                                int[] indexesOf = termFreqVector.indexesOf(split, 0, split.length);
                                                boolean z = true;
                                                int i7 = 0;
                                                for (int i8 = 0; i8 < indexesOf.length - 1; i8++) {
                                                    if (indexesOf[i8] == -1 || indexesOf[i8 + 1] == -1) {
                                                        z = false;
                                                    }
                                                    if (indexesOf[i8 + 1] - indexesOf[i8] < 1) {
                                                        z = false;
                                                    }
                                                    i7 += indexesOf[i8];
                                                }
                                                indexOf2 = z ? (i7 + indexesOf[indexesOf.length - 1]) / indexesOf.length : termFreqVector.indexOf(str4.toLowerCase());
                                            } else {
                                                indexOf2 = termFreqVector.indexOf(str4.toLowerCase());
                                            }
                                            if (indexOf2 != -1) {
                                                float f = 0.0f;
                                                float f2 = Float.MAX_VALUE;
                                                for (int i9 : termFreqVector.getTermPositions(indexOf2)) {
                                                    for (int i10 = 0; i10 < arrayList.size(); i10++) {
                                                        float abs = Math.abs(((Integer) arrayList.get(i10)).intValue() - i9);
                                                        f += abs;
                                                        if (abs < f2) {
                                                            f2 = abs;
                                                        }
                                                    }
                                                }
                                                float length = f / (r0.length * arrayList.size());
                                                if (CountryOfOriginDeterminer_TextDistance.keyCountrySumDistance.containsKey(str3)) {
                                                    CountryOfOriginDeterminer_TextDistance.keyCountrySumDistance.put(str3, new Float(f + CountryOfOriginDeterminer_TextDistance.keyCountrySumDistance.get(str3).floatValue()));
                                                } else {
                                                    CountryOfOriginDeterminer_TextDistance.keyCountrySumDistance.put(str3, new Float(f));
                                                }
                                                if (CountryOfOriginDeterminer_TextDistance.keyCountryAvgDistance.containsKey(str3)) {
                                                    CountryOfOriginDeterminer_TextDistance.keyCountryMinDistance.put(str3, new Float(CountryOfOriginDeterminer_TextDistance.keyCountryMinDistance.get(str3).floatValue() < length ? CountryOfOriginDeterminer_TextDistance.keyCountryAvgDistance.get(str3).floatValue() : length));
                                                } else {
                                                    CountryOfOriginDeterminer_TextDistance.keyCountryAvgDistance.put(str3, new Float(length));
                                                }
                                                if (CountryOfOriginDeterminer_TextDistance.keyCountryAvgDistance.containsKey(str3)) {
                                                    CountryOfOriginDeterminer_TextDistance.keyCountryAvgDistance.put(str3, new Float(CountryOfOriginDeterminer_TextDistance.keyCountryAvgDistance.get(str3).floatValue() < length ? CountryOfOriginDeterminer_TextDistance.keyCountryAvgDistance.get(str3).floatValue() : length));
                                                } else {
                                                    CountryOfOriginDeterminer_TextDistance.keyCountryAvgDistance.put(str3, new Float(length));
                                                }
                                                if (CountryOfOriginDeterminer_TextDistance.keyCountryMinDistance.containsKey(str3)) {
                                                    CountryOfOriginDeterminer_TextDistance.keyCountryMinDistance.put(str3, new Float(CountryOfOriginDeterminer_TextDistance.keyCountryMinDistance.get(str3).floatValue() < f2 ? CountryOfOriginDeterminer_TextDistance.keyCountryMinDistance.get(str3).floatValue() : f2));
                                                } else {
                                                    CountryOfOriginDeterminer_TextDistance.keyCountryMinDistance.put(str3, new Float(f2));
                                                }
                                            }
                                        }
                                    }
                                    String[] split2 = str3.toLowerCase().split(" ");
                                    if (split2.length > 1) {
                                        int[] indexesOf2 = termFreqVector.indexesOf(split2, 0, split2.length);
                                        boolean z2 = true;
                                        int i11 = 0;
                                        for (int i12 = 0; i12 < indexesOf2.length - 1; i12++) {
                                            if (indexesOf2[i12] == -1 || indexesOf2[i12 + 1] == -1) {
                                                z2 = false;
                                            }
                                            if (indexesOf2[i12 + 1] - indexesOf2[i12] < 1) {
                                                z2 = false;
                                            }
                                            i11 += indexesOf2[i12];
                                        }
                                        indexOf = z2 ? (i11 + indexesOf2[indexesOf2.length - 1]) / indexesOf2.length : termFreqVector.indexOf(str3.toLowerCase());
                                    } else {
                                        indexOf = termFreqVector.indexOf(str3.toLowerCase());
                                    }
                                    if (indexOf != -1) {
                                        float f3 = 0.0f;
                                        float f4 = Float.MAX_VALUE;
                                        for (int i13 : termFreqVector.getTermPositions(indexOf)) {
                                            for (int i14 = 0; i14 < arrayList.size(); i14++) {
                                                float abs2 = Math.abs(((Integer) arrayList.get(i14)).intValue() - i13);
                                                f3 += abs2;
                                                if (abs2 < f4) {
                                                    f4 = abs2;
                                                }
                                            }
                                        }
                                        float length2 = f3 / (r0.length * arrayList.size());
                                        if (CountryOfOriginDeterminer_TextDistance.keyCountrySumDistance.containsKey(str3)) {
                                            CountryOfOriginDeterminer_TextDistance.keyCountrySumDistance.put(str3, new Float(f3 + CountryOfOriginDeterminer_TextDistance.keyCountrySumDistance.get(str3).floatValue()));
                                        } else {
                                            CountryOfOriginDeterminer_TextDistance.keyCountrySumDistance.put(str3, new Float(f3));
                                        }
                                        if (CountryOfOriginDeterminer_TextDistance.keyCountryAvgDistance.containsKey(str3)) {
                                            CountryOfOriginDeterminer_TextDistance.keyCountryAvgDistance.put(str3, new Float(CountryOfOriginDeterminer_TextDistance.keyCountryAvgDistance.get(str3).floatValue() < length2 ? CountryOfOriginDeterminer_TextDistance.keyCountryAvgDistance.get(str3).floatValue() : length2));
                                        } else {
                                            CountryOfOriginDeterminer_TextDistance.keyCountryAvgDistance.put(str3, new Float(length2));
                                        }
                                        if (CountryOfOriginDeterminer_TextDistance.keyCountryMinDistance.containsKey(str3)) {
                                            CountryOfOriginDeterminer_TextDistance.keyCountryMinDistance.put(str3, new Float(CountryOfOriginDeterminer_TextDistance.keyCountryMinDistance.get(str3).floatValue() < f4 ? CountryOfOriginDeterminer_TextDistance.keyCountryMinDistance.get(str3).floatValue() : f4));
                                        } else {
                                            CountryOfOriginDeterminer_TextDistance.keyCountryMinDistance.put(str3, new Float(f4));
                                        }
                                    }
                                }
                            }
                        } catch (Exception e) {
                            e.printStackTrace();
                        }
                    }

                    public void setNextReader(IndexReader indexReader, int i3) {
                        this.docBase = i3;
                    }
                });
                HashMap<String, Float> hashMap = keyCountryAvgDistance;
                float f = Float.MAX_VALUE;
                String str3 = "";
                for (String str4 : hashMap.keySet()) {
                    float floatValue = hashMap.get(str4).floatValue();
                    if (floatValue < f) {
                        f = floatValue;
                        str3 = TextFormatTool.removeUnwantedChars(str4);
                    }
                }
                System.out.println(String.valueOf(TextFormatTool.removeUnwantedChars(str)) + ";" + str2 + ";" + str3);
                i += str3.equals(str2) ? 1 : 0;
            }
            System.out.println("Precision: " + ((i / size) * 100.0f));
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private void performCountryAndSynonymsSearch() {
    }

    public static String escapeSpecialChars(String str) {
        return str.replace("*", "\\*").replace("+", "\\+").replace("-", "\\-").replace("&&", "\\&&").replace("!", "\\!").replace("?", "\\?").replace("||", "\\||").replace("(", "\\(").replace(")", "\\)").replace("{", "\\{").replace("}", "\\}").replace("[", "\\[").replace("]", "\\]").replace("^", "\\^").replace("\"", "\\\"").replace("~", "\\~").replace(":", "\\:").replace("~", "\\~").replace("\\", "\\");
    }

    public static void main(String[] strArr) throws Exception {
        File[] listFiles = new File(WebRetrievalConfig.ROOT_DIR).listFiles();
        for (int i = 0; i < listFiles.length; i++) {
            if (listFiles[i].isDirectory()) {
                File[] listFiles2 = listFiles[i].listFiles();
                for (int i2 = 0; i2 < listFiles2.length; i2++) {
                    if (listFiles2[i2].isDirectory()) {
                        String name = listFiles2[i2].getName();
                        String name2 = listFiles[i].getName();
                        artists.add(name);
                        countries.add(name2);
                    }
                }
            }
        }
        System.out.println("number of artists: " + artists.size());
        BufferedReader bufferedReader = new BufferedReader(new FileReader(TERMS_FILE));
        boolean z = true;
        String str = "";
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                System.out.println("countries: " + ct.size());
                reader = IndexReader.open(new SimpleFSDirectory(new File(WebRetrievalConfig.PATH_TO_INDEX)));
                searcher = new IndexSearcher(reader);
                Date date = new Date();
                new CountryOfOriginDeterminer_TextDistance().run();
                reader.close();
                System.out.print(new Date().getTime() - date.getTime());
                System.out.println(" total milliseconds");
                return;
            }
            terms.addElement(readLine.toLowerCase());
            if (z) {
                if (!allCountries.contains(readLine)) {
                    allCountries.add(readLine);
                }
                str = TextFormatTool.removeUnwantedChars(readLine);
                Vector<String> vector = new Vector<>();
                vector.addElement(readLine.toLowerCase());
                ct.put(str, vector);
            } else if (!readLine.equals("")) {
                Vector<String> vector2 = ct.get(str);
                vector2.addElement(readLine.toLowerCase());
                ct.put(str, vector2);
            }
            z = readLine.equals("");
        }
    }
}
