package comirva.web.crawling.agmis;

import comirva.util.TermProfileUtils;
import comirva.util.external.TextFormatTool;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Date;
import java.util.StringTokenizer;
import java.util.Vector;

/* loaded from: input_file:comirva/web/crawling/agmis/ExaleadRetriever.class */
public class ExaleadRetriever extends Thread {
    private static String STORAGE_PATH = "C:/Research/Data/amg-artists/exalead/M/rock";
    private static File INPUT_FILE = new File("C:/Research/Data/amg-artists/windows-ansi/artists_rock.txt");
    private static File ERROR_REPORT_FILE = new File("C:/Research/Data/amg-artists/exalead-error.txt");
    private static String EXALEAD_URL = "http://www.exalead.com/search?scm=never&dcm=never&q=";
    private static String ADDITIONAL_KEYWORDS = " NEAR music";
    private static boolean SKIP_EXISTING_ARTISTS = true;
    private static boolean STORE_IN_FIRSTLETTER_SUBDIRS = true;
    private static String EXALEAD_COOKIE = "db_1_0=eJxtUu1uozAQfBVe4BK+IZHuR4mQWikNEdBWPSFFLiwEBWxkzJG+/dkYmsAFRAI7uzOzu4ZrA7SsAbM2URLlJK9E+fN09OPYP+lJkii74BCHwV68Jkr0HITx7i2OTpoIxKH/FL/6h1h+cvwYHKIgjGTpDdYlrDQUcqCAU5grtmdCWdqx9gh0X2IQ6ZaswSSErKSQsgD7V1QBynakFliOqhZkUgY56ioWQst/2/cSehEfTbWAaHreI1x0qODCQ5A0gMf0F3yA/qPEGekXtLQszmxHqq7Gfp5zCwu8rAUh93zk/yKkT5Ioh2iQfSWZREZGKXlXoqnqRNbwIeyhQOl3NM1DQIx28zafSQ3NWD4Wdy3QF8yA5iiFqVUBAb636lUovXj8KSjpcLZoh2+h97OSTepexxjBCw9D0jWtugz4pi7tkmPo2kPcTctQVcFSRA4+50v+JB2VQ/rfh+z+reWm9/AXKgH8Gofbl5ey4WcCLVZ6qxOH53a4hrIzY812ve77fvUNVUV6McB2lZJ6LeDPIZbISzlOpPlvx3VtdWPalmXrjqqZtmnO3Mr5Jz+7kVb0hWQPiJ2B/sjdfUsZzTQ0Vzcd3VBdy7Ad1XLsmY72UMdY6kyjWRFaDEofU0TquK5lcnpd3agbTXNc25q3oz+UMRcyX18pt77qLoOE5+0kuWVtDEtTLdXgt2tyCXdGbjwkt6bo9ExnzXyYrmy3W01zbc1w+UIMw/gHWf6ePw==";
    private Vector<String> searchWords;
    private String storagePath = STORAGE_PATH;

    public ExaleadRetriever(Vector vector) {
        this.searchWords = vector;
    }

    @Override // java.lang.Thread, java.lang.Runnable
    public void run() {
        try {
            if (this.storagePath.charAt(this.storagePath.length() - 1) != '/' && this.storagePath.charAt(this.storagePath.length() - 1) != '\\') {
                this.storagePath = String.valueOf(this.storagePath) + "/";
            }
            File file = new File(this.storagePath);
            if (!file.isDirectory()) {
                file.mkdir();
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        for (int i = 0; i < this.searchWords.size(); i++) {
            String elementAt = this.searchWords.elementAt(i);
            boolean z = true;
            if (SKIP_EXISTING_ARTISTS) {
                File file2 = new File(String.valueOf(this.storagePath) + (STORE_IN_FIRSTLETTER_SUBDIRS ? String.valueOf(TextFormatTool.removeUnwantedChars(elementAt).substring(0, 1)) + "/" : "") + TextFormatTool.removeUnwantedChars(elementAt));
                if (file2.exists() && file2.isDirectory()) {
                    z = false;
                }
            }
            if (z) {
                System.out.print("retrieving URLs for artist " + elementAt + " (" + (Math.round((i / this.searchWords.size()) * 1000.0f) / 10.0f) + "%)");
                long currentTimeMillis = System.currentTimeMillis();
                String retrieveExaleadResultPage = retrieveExaleadResultPage(elementAt);
                long currentTimeMillis2 = System.currentTimeMillis() - currentTimeMillis;
                if (retrieveExaleadResultPage != null) {
                    ArrayList<String> extractURLs = extractURLs(retrieveExaleadResultPage);
                    long extractPageCount = extractPageCount(retrieveExaleadResultPage);
                    System.out.println("\tpage count: " + extractPageCount);
                    try {
                        Thread.sleep(Math.max(10L, GoldenRetriever.WAIT_BETWEEN_RETRIEVALS_FROM_SAME_HOST - currentTimeMillis2));
                        String str = this.storagePath;
                        if (STORE_IN_FIRSTLETTER_SUBDIRS) {
                            str = String.valueOf(str) + TextFormatTool.removeUnwantedChars(elementAt).substring(0, 1) + "/";
                            File file3 = new File(str);
                            if (!file3.isDirectory()) {
                                file3.mkdir();
                            }
                        }
                        File file4 = new File(String.valueOf(str) + TextFormatTool.removeUnwantedChars(elementAt));
                        if (!file4.isDirectory()) {
                            file4.mkdir();
                        }
                        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File(String.valueOf(str) + TextFormatTool.removeUnwantedChars(elementAt) + "/urls.dat")));
                        for (int i2 = 0; i2 < extractURLs.size(); i2++) {
                            bufferedWriter.write(String.valueOf(extractURLs.get(i2).toString()) + "\n");
                        }
                        bufferedWriter.flush();
                        bufferedWriter.close();
                        File file5 = new File(String.valueOf(str) + TextFormatTool.removeUnwantedChars(elementAt) + "/info.xml");
                        BufferedWriter bufferedWriter2 = !file5.exists() ? new BufferedWriter(new FileWriter(file5)) : new BufferedWriter(new FileWriter(file5, true));
                        bufferedWriter2.append((CharSequence) "<ExaleadRetrieval>\n");
                        bufferedWriter2.append((CharSequence) ("<Timestamp>" + new Date(System.currentTimeMillis()) + "</Timestamp>\n"));
                        bufferedWriter2.append((CharSequence) ("\t<Query>" + createExaleadSearchURL(elementAt) + "</Query>\n"));
                        bufferedWriter2.append((CharSequence) ("\t<PageCount>" + Long.toString(extractPageCount) + "</PageCount>\n"));
                        for (int i3 = 0; i3 < extractURLs.size(); i3++) {
                            bufferedWriter2.write("\t<URL rank=\"" + Integer.toString(i3 + 1) + "\">" + extractURLs.get(i3).toString() + "</URL>\n");
                        }
                        bufferedWriter2.append((CharSequence) "</ExaleadRetrieval>");
                        bufferedWriter2.flush();
                        bufferedWriter2.close();
                    } catch (Exception e2) {
                        e2.printStackTrace();
                    }
                }
            }
        }
    }

    private ArrayList<String> extractURLs(String str) {
        ArrayList<String> arrayList = new ArrayList<>();
        int indexOf = str.indexOf("<a class=\"c307\" href=\"");
        while (true) {
            int i = indexOf;
            if (i == -1) {
                return arrayList;
            }
            str = str.substring(i + "<a class=\"c307\" href=\"".length(), str.length());
            arrayList.add(str.substring(0, str.indexOf("\"")));
            indexOf = str.indexOf("<a class=\"c307\" href=\"");
        }
    }

    private long extractPageCount(String str) {
        int indexOf = str.indexOf("</b> of about <b>");
        if (indexOf == -1) {
            return 0L;
        }
        String substring = str.substring(indexOf + "</b> of about <b>".length(), indexOf + "</b> of about <b>".length() + 50);
        return new Long(substring.substring(0, substring.indexOf("</b>")).replace(",", "")).longValue();
    }

    private URL createExaleadSearchURL(String str) {
        URL url = null;
        try {
            url = new URL(String.valueOf(EXALEAD_URL) + URLEncoder.encode("\"" + str + "\"" + ADDITIONAL_KEYWORDS, "UTF-8"));
        } catch (Exception e) {
            e.printStackTrace();
        }
        return url;
    }

    private String retrieveExaleadResultPage(String str) {
        try {
            URLConnection openConnection = createExaleadSearchURL(str).openConnection();
            openConnection.setRequestProperty("Cookie", EXALEAD_COOKIE);
            openConnection.connect();
            InputStreamReader inputStreamReader = new InputStreamReader(openConnection.getInputStream());
            BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
            StringBuffer stringBuffer = new StringBuffer();
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    bufferedReader.close();
                    inputStreamReader.close();
                    return stringBuffer.toString();
                }
                stringBuffer.append(String.valueOf(readLine) + System.getProperty("line.separator"));
            }
        } catch (Exception e) {
            e.printStackTrace();
            try {
                BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(ERROR_REPORT_FILE, true));
                bufferedWriter.append((CharSequence) ("Search Term: " + str + "\n"));
                bufferedWriter.append((CharSequence) ("Storage Path: " + STORAGE_PATH + "\n"));
                bufferedWriter.append((CharSequence) ("Error Message: " + e.getMessage() + "\n"));
                bufferedWriter.flush();
                bufferedWriter.close();
                return null;
            } catch (Exception e2) {
                return null;
            }
        }
    }

    public static void main(String[] strArr) {
        StringTokenizer stringTokenizer = new StringTokenizer(TermProfileUtils.getFileContent(INPUT_FILE), "\n");
        Vector vector = new Vector();
        while (stringTokenizer.hasMoreElements()) {
            vector.addElement(stringTokenizer.nextToken().trim());
        }
        new ExaleadRetriever(vector).start();
    }
}
