package marytts.language.de.preprocess;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.regex.Pattern;
import marytts.util.MaryUtils;
import marytts.util.dom.MaryDomUtils;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

/* loaded from: input_file:marytts/language/de/preprocess/AbbrevEP.class */
public class AbbrevEP extends ExpansionPattern {
    private final String[] _knownTypes = {"acronym"};
    private final List<String> knownTypes = Arrays.asList(this._knownTypes);
    private final Pattern reMatchingChars = null;
    private static final Map<String, String[]> abbrevDict = new HashMap();
    private static final Logger logger = MaryUtils.getLogger("AbbrevEP");

    @Override // marytts.language.de.preprocess.ExpansionPattern
    public List<String> knownTypes() {
        return this.knownTypes;
    }

    @Override // marytts.language.de.preprocess.ExpansionPattern
    public Pattern reMatchingChars() {
        return this.reMatchingChars;
    }

    @Override // marytts.language.de.preprocess.ExpansionPattern
    protected boolean isCandidate(Element element) {
        String str = MaryDomUtils.tokenText(element);
        return isAbbrev(str) || REPattern.onlyDigits.matcher(str).find() || ".".equals(str);
    }

    @Override // marytts.language.de.preprocess.ExpansionPattern
    protected int canDealWith(String str, int i) {
        return match(str, i);
    }

    @Override // marytts.language.de.preprocess.ExpansionPattern
    protected int match(String str, int i) {
        if (str.length() <= 1 || !isAbbrev(str)) {
            return -1;
        }
        return i;
    }

    protected boolean isAbbrev(String str) {
        return REPattern.letterDot.matcher(str).find() || REPattern.nonInitialCapital.matcher(str).find() || REPattern.onlyConsonants.matcher(str).find() || (abbrevDict.containsKey(str) || abbrevDict.containsKey(new StringBuilder().append(str).append(".").toString()));
    }

    @Override // marytts.language.de.preprocess.ExpansionPattern
    protected List<Element> expand(List<Element> list, String str, int i) {
        if (list == null) {
            throw new NullPointerException("Received null argument");
        }
        if (list.isEmpty()) {
            throw new IllegalArgumentException("Received empty list");
        }
        List<Element> expandAbbrev = expandAbbrev(list);
        replaceTokens(list, expandAbbrev);
        return expandAbbrev;
    }

    private List<Element> expandAbbrev(List<Element> list) {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList(list);
        ArrayList arrayList3 = new ArrayList(arrayList2);
        boolean z = false;
        if (MaryDomUtils.isFirstOfItsKindIn((Element) arrayList2.get(0), "s") && REPattern.initialCapitalLetter.matcher(MaryDomUtils.tokenText((Element) arrayList2.get(0))).find()) {
            z = true;
        }
        StringBuilder sb = new StringBuilder();
        while (!arrayList3.isEmpty()) {
            sb.setLength(0);
            Iterator it = arrayList3.iterator();
            while (it.hasNext()) {
                sb.append(MaryDomUtils.tokenText((Element) it.next()));
            }
            logger.debug("Looking up abbreviation in dictionary: `" + sb.toString() + "'");
            if (abbrevDict.containsKey(sb.toString())) {
                break;
            }
            if (z) {
                sb.setCharAt(0, Character.toLowerCase(sb.charAt(0)));
                logger.debug("Looking up abbreviation in dictionary: `" + sb.toString() + "'");
                if (abbrevDict.containsKey(sb.toString())) {
                    break;
                }
            }
            sb.append(".");
            logger.debug("Looking up abbreviation in dictionary: `" + sb.toString() + "'");
            if (abbrevDict.containsKey(sb.toString())) {
                break;
            }
            arrayList3.remove(arrayList3.size() - 1);
        }
        if (arrayList3.isEmpty()) {
            Element element = (Element) arrayList2.get(0);
            String str = MaryDomUtils.tokenText(element);
            if (REPattern.onlyDigits.matcher(str).find()) {
                logger.debug("Expanding as integer: `" + str + "'");
                arrayList.addAll(makeNewTokens(element.getOwnerDocument(), number.expandInteger(str)));
            } else if (str.length() > 1) {
                logger.debug("Expanding one token by rule: `" + str + "'");
                slowDown(element);
                arrayList.addAll(ruleExpandAbbrev(element));
            } else {
                arrayList.addAll(makeNewTokens(element.getOwnerDocument(), str));
            }
            arrayList2.remove(0);
        } else {
            arrayList.addAll(dictionaryExpandAbbrev(arrayList3, sb.toString()));
            arrayList2.removeAll(arrayList3);
            logger.debug("Have found abbreviation in dictionary: `" + sb.toString() + "'");
        }
        if (!arrayList2.isEmpty()) {
            arrayList.addAll(expandAbbrev(arrayList2));
        }
        if (logger.getEffectiveLevel().equals(Level.DEBUG)) {
            StringBuilder sb2 = new StringBuilder();
            Iterator it2 = arrayList.iterator();
            while (it2.hasNext()) {
                Element element2 = (Element) it2.next();
                if (element2.getTagName().equals("t")) {
                    sb2.append(MaryDomUtils.tokenText(element2));
                } else {
                    sb2.append(element2.getTagName());
                }
                sb2.append(" ");
            }
            logger.debug("Expanded abbreviation: " + sb2.toString());
        }
        return arrayList;
    }

    private List<Element> dictionaryExpandAbbrev(List<Element> list, String str) {
        Element element;
        Document ownerDocument = list.get(0).getOwnerDocument();
        ArrayList arrayList = new ArrayList();
        String[] strArr = abbrevDict.get(str);
        String str2 = strArr[0];
        String str3 = strArr[1];
        if (MaryDomUtils.isFirstOfItsKindIn(list.get(0), "div") && REPattern.initialLowercaseLetter.matcher(str3).find()) {
            StringBuilder sb = new StringBuilder(str3);
            sb.setCharAt(0, Character.toUpperCase(sb.charAt(0)));
            str3 = sb.toString();
            sb.setLength(0);
            sb.append(str);
            sb.setCharAt(0, Character.toUpperCase(sb.charAt(0)));
            str = sb.toString();
        }
        arrayList.addAll(makeNewTokens(ownerDocument, str3, true, str));
        if (arrayList.isEmpty()) {
            return arrayList;
        }
        if (str2 != null && str2.length() > 0) {
            Element element2 = (Element) arrayList.get(0);
            while (true) {
                element = element2;
                if (element == null || element.getTagName().equals("t")) {
                    break;
                }
                element2 = MaryDomUtils.getFirstChildElement(element);
            }
            if (element != null) {
                String str4 = MaryDomUtils.tokenText(element);
                element.setAttribute("ending", str2);
                element.setAttribute("sounds_like", str4);
                MaryDomUtils.setTokenText(element, str);
            }
        }
        return arrayList;
    }

    protected List<Element> ruleExpandAbbrev(Element element) {
        Document ownerDocument = element.getOwnerDocument();
        String str = MaryDomUtils.tokenText(element);
        return makeNewTokens(ownerDocument, ruleExpandAbbrev(str, false), true, str, true);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String ruleExpandAbbrev(String str, boolean z) {
        if ((str.indexOf(46) == -1 && str.length() <= 5) || (str.indexOf(46) == str.length() - 1 && str.length() <= 6)) {
            return spellOutAbbrev(str, z);
        }
        if (!specialChar.reMatchingChars().matcher(str).find()) {
            return str;
        }
        StringBuilder sb = new StringBuilder();
        StringTokenizer stringTokenizer = new StringTokenizer(str, specialChar.sMatchingCharsSimpleString, z);
        while (stringTokenizer.hasMoreTokens()) {
            sb.append(ruleExpandAbbrev(stringTokenizer.nextToken(), z));
            sb.append(" ");
        }
        return sb.toString().trim();
    }

    private String spellOutAbbrev(String str, boolean z) {
        if (str == null || str.length() == 0) {
            return str;
        }
        StringBuilder sb = new StringBuilder();
        int i = 0;
        while (i < str.length()) {
            if (specialChar.matchSpecialChar(str.substring(i, i + 1))) {
                if (z) {
                    sb.append(specialChar.expandSpecialChar(str.substring(i, i + 1)));
                    sb.append(" ");
                }
            } else if ((i + 2 == str.length() || (i + 2 < str.length() && specialChar.matchSpecialChar(str.substring(i + 2, i + 3)))) && Character.isUpperCase(str.charAt(i)) && str.charAt(i + 1) == 's') {
                sb.append(str.substring(i, i + 1));
                sb.append("[*s]");
                i++;
            } else {
                sb.append(str.substring(i, i + 1));
                sb.append(" ");
            }
            i++;
        }
        return sb.toString().trim();
    }

    private static void loadAbbrevDict() throws FileNotFoundException, IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(AbbrevEP.class.getResourceAsStream("abbrev.dat"), "UTF-8"));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return;
            }
            if (!Pattern.compile("^\\#").matcher(readLine).find() && !REPattern.emptyLine.matcher(readLine).find()) {
                StringTokenizer stringTokenizer = new StringTokenizer(readLine, "/");
                String trim = stringTokenizer.nextToken().trim();
                String trim2 = stringTokenizer.nextToken().trim();
                String replaceAll = stringTokenizer.nextToken().trim().replaceAll("\\s+", " ");
                if (Pattern.compile("\\s").matcher(trim).find()) {
                    logger.info("In abbrev.dat: Abbreviation \"" + trim + "\" contains whitespace. Ignoring.");
                } else {
                    abbrevDict.put(trim, new String[]{trim2, replaceAll});
                }
            }
        }
    }

    static {
        try {
            loadAbbrevDict();
        } catch (FileNotFoundException e) {
            logger.warn("Could not load abbreviation file", e);
        } catch (IOException e2) {
            logger.warn("Could not load abbreviation file", e2);
        }
    }
}
