package comirva.util.external;

import comirva.data.EntityTermProfile;
import comirva.io.filefilter.HTMLFileFilter;
import comirva.util.TermProfileUtils;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.util.StringTokenizer;
import java.util.Vector;
import javax.xml.stream.XMLOutputFactory;

/* loaded from: input_file:comirva/util/external/TermProfileContentIndexing.class */
public class TermProfileContentIndexing {
    private File rootDir;
    private File termFile;
    private Vector<String> terms;
    private Vector<String> audioFE;
    private Vector<String> imageFE;
    private Vector<String> videoFE;

    public TermProfileContentIndexing(File file, File file2, File file3, File file4, File file5) {
        this.rootDir = file;
        this.termFile = file2;
        StringTokenizer stringTokenizer = new StringTokenizer(TermProfileUtils.getFileContent(file2), System.getProperty("line.separator"));
        this.terms = new Vector<>();
        while (stringTokenizer.hasMoreElements()) {
            this.terms.add(stringTokenizer.nextToken());
        }
        StringTokenizer stringTokenizer2 = new StringTokenizer(TermProfileUtils.getFileContent(file3), System.getProperty("line.separator"));
        this.audioFE = new Vector<>();
        while (stringTokenizer2.hasMoreElements()) {
            this.audioFE.add(stringTokenizer2.nextToken());
        }
        StringTokenizer stringTokenizer3 = new StringTokenizer(TermProfileUtils.getFileContent(file4), System.getProperty("line.separator"));
        this.imageFE = new Vector<>();
        while (stringTokenizer3.hasMoreElements()) {
            this.imageFE.add(stringTokenizer3.nextToken());
        }
        StringTokenizer stringTokenizer4 = new StringTokenizer(TermProfileUtils.getFileContent(file5), System.getProperty("line.separator"));
        this.videoFE = new Vector<>();
        while (stringTokenizer4.hasMoreElements()) {
            this.videoFE.add(stringTokenizer4.nextToken());
        }
    }

    public void generateEntityTermProfiles() {
        if (this.rootDir.isDirectory()) {
            File[] listFiles = this.rootDir.listFiles();
            for (int i = 0; i < listFiles.length; i++) {
                if (listFiles[i].isDirectory() && listFiles[i].getName() != "." && listFiles[i].getName() != "..") {
                    File file = listFiles[i];
                    System.out.println("processing " + file.toString());
                    EntityTermProfile entityTermProfile = new EntityTermProfile(file);
                    entityTermProfile.setExtAudio(this.audioFE);
                    entityTermProfile.setExtImage(this.imageFE);
                    entityTermProfile.setExtVideo(this.videoFE);
                    entityTermProfile.calculateOccurrences(this.terms, new HTMLFileFilter());
                    entityTermProfile.setEntityName(file.toString());
                    try {
                        OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(new File(String.valueOf(file.getPath()) + ".xml")), "UTF8");
                        entityTermProfile.writeXML(XMLOutputFactory.newInstance().createXMLStreamWriter(outputStreamWriter));
                        outputStreamWriter.close();
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }
            }
        }
    }

    public static void main(String[] strArr) {
        new TermProfileContentIndexing(new File(String.valueOf("/Research/Data/co-occurrences/C112a/cob_terms/") + "crawl_1000_MR/"), new File(String.valueOf("/Research/Data/co-occurrences/C112a/cob_terms/") + "terms.txt"), new File(String.valueOf("/Research/Data/co-occurrences/C112a/cob_terms/") + "fileext_audio.txt"), new File(String.valueOf("/Research/Data/co-occurrences/C112a/cob_terms/") + "fileext_image.txt"), new File(String.valueOf("/Research/Data/co-occurrences/C112a/cob_terms/") + "fileext_video.txt")).generateEntityTermProfiles();
    }
}
