package org.unicode.cldr.tool;

import com.ibm.icu.impl.Relation;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.impl.locale.LanguageTag;
import com.ibm.icu.impl.number.Padder;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.Collator;
import com.ibm.icu.text.NumberFormat;
import com.ibm.icu.text.Transliterator;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
import com.ibm.icu.util.ULocale;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.unicode.cldr.draft.FileUtilities;
import org.unicode.cldr.icu.LDMLConstants;
import org.unicode.cldr.test.SubmissionLocales;
import org.unicode.cldr.util.CollatorHelper;
import org.unicode.cldr.util.Pair;

/* loaded from: input_file:org/unicode/cldr/tool/MakeTransliterator.class */
public class MakeTransliterator {
    private static final String TEST_STRING = "territories";
    private static final boolean SHOW_OVERRIDES = true;
    private static final int MINIMUM_FREQUENCY = 9999;
    private static Transliterator fixBadIpa;
    private static UnicodeSet targetCharacters;
    private static UnicodeSet sourceCharacters;
    private static UnicodeSet allowedSourceCharacters;
    private static UnicodeSet allowedTargetCharacters;
    private static int countSkipped;
    private static long skippedFrequency;
    private static long frequency;
    private static long totalFrequency;
    private static Transliterator coreBase;
    static int forceSeparateIfShorter = 4;
    private static final String CHECK_BASE = null;
    private static final String CHECK_BUILT = null;
    static boolean isIPA = true;
    static boolean onlyToTarget = true;
    static NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH);
    static Collator col = CollatorHelper.ROOT_COLLATOR;
    static String cldrDataDir = "C:\\cvsdata\\unicode\\cldr\\tools\\java\\org\\unicode\\cldr\\util\\data\\transforms\\";
    public static UnicodeSet vowels = new UnicodeSet("[aeiou æ ɑ ə ɛ ɪ ʊ â î ô]").freeze();
    public static UnicodeSet short_vowels = new UnicodeSet("[ɑ æ ə ɛ ɪ ʊ]").freeze();
    static UnicodeSet targetChars = new UnicodeSet();
    static UnicodeSet targetCoreChars = new UnicodeSet();
    static UnicodeSet tempDiff = new UnicodeSet();
    static Transliterator distinguishLongVowels = Transliterator.createFromRules("faa", "ɑʊ > â ;ɑɪ > î ;oɪ > ô ;", 0);
    static final Transliterator skeletonize = Transliterator.createFromRules("faa", "ɑʊ > âʊ ;ɑɪ > âi ;oɪ > oi ;ɑr > âr ;ær > er ;ɛr > er ;ɪr > ir ;ʊr > ur ;", 0);
    static Transliterator spellout = Transliterator.createFromRules("foo", "a > e ;b > bi ;c > si ;d > di ;e > i ;f > ɛf ;g > dʒi ;h > etʃ ;i > ɑɪ ;j > dʒe ;k > ke ;l > ɛl ;m > ɛm ;n > ɛn ;o > o ;p > pi ;q > kwu ;r > ɑr ;s > ɛs ;t > ti ;u > ju ;v > vi ;w > dəbjə ;x > ɛks ;y > wɑɪ ;z > zi ;", 0);
    static Comparator MyComparator = new Comparator() { // from class: org.unicode.cldr.tool.MakeTransliterator.1
        @Override // java.util.Comparator
        public int compare(Object obj, Object obj2) {
            String str = (String) obj;
            String str2 = (String) obj2;
            if (str.length() < str2.length()) {
                return -1;
            }
            if (str.length() > str2.length()) {
                return 1;
            }
            int compare = MakeTransliterator.col.compare(str, str2);
            return compare != 0 ? compare : str.compareTo(str2);
        }
    };
    static int LIMIT = Integer.MAX_VALUE;

    public static void main(String[] strArr) throws IOException {
        setTranslitDebug(true);
        System.out.println(new Locale("fil"));
        System.out.println(new Locale("fil", LDMLConstants.US));
        String str = cldrDataDir + "internal_raw_IPA.txt";
        String str2 = cldrDataDir + "en-IPA.txt";
        String str3 = cldrDataDir + "en-IPA_count.txt";
        PrintWriter openUTF8Writer = FileUtilities.openUTF8Writer(SubmissionLocales.DEFAULT_EXTENDED_SUBMISSION, "C:\\DATA\\GEN\\SkippedIPA.txt");
        fixBadIpa = Transliterator.createFromRules("foo", createFromFile(cldrDataDir + "internal_fixBadIpa.txt", null, null), 0);
        Map<String, String> overrides = getOverrides();
        String createFromFile = createFromFile(cldrDataDir + "internal_baseEnglishToIpa.txt", null, null);
        coreBase = Transliterator.createFromRules("foo", createFromFile, 0);
        if (CHECK_BASE != null) {
            setTranslitDebug(true);
            System.out.println(coreBase.transliterate(CHECK_BASE));
            return;
        }
        if (CHECK_BUILT != null) {
            Transliterator createFromRules = Transliterator.createFromRules("foo", createFromFile(cldrDataDir + "en-IPA.txt", null, null), 0);
            setTranslitDebug(true);
            System.out.println(createFromRules.transliterate(CHECK_BUILT));
            return;
        }
        String createFromFile2 = createFromFile(cldrDataDir + "internal_English-IPA-backwards.txt", null, null);
        checkCoreReversibility(openUTF8Writer, createFromFile, createFromFile2);
        String str4 = createFromFile + createFromFile2;
        System.out.println(str4);
        Relation of = Relation.of(new TreeMap(MyComparator), TreeSet.class);
        targetCharacters = new UnicodeSet();
        sourceCharacters = new UnicodeSet();
        allowedSourceCharacters = new UnicodeSet("[[:Letter:]’]").freeze();
        allowedTargetCharacters = new UnicodeSet("[æ ɑ b dð e ə ɛ f-i ɪ j-n ŋ o p r s ʃ t u ʊ v w z ʒ θ]").freeze();
        countSkipped = 0;
        totalFrequency = 0L;
        skippedFrequency = 0L;
        int i = isIPA ? 2 : 1;
        BufferedReader openUTF8Reader = FileUtilities.openUTF8Reader(SubmissionLocales.DEFAULT_EXTENDED_SUBMISSION, str);
        while (true) {
            String readLine = openUTF8Reader.readLine();
            if (readLine == null) {
                break;
            }
            if (readLine.startsWith("\ufeff")) {
                readLine = readLine.substring(1);
            }
            String str5 = readLine;
            int indexOf = readLine.indexOf(35);
            if (indexOf >= 0) {
                readLine = readLine.substring(0, indexOf);
            }
            String trim = readLine.trim();
            frequency = -1L;
            String[] split = trim.split(" *[\\t,] *");
            if (split.length > i) {
                String str6 = split[0];
                if ("territories" != 0 && str6.equals("territories")) {
                    System.out.println(trim);
                }
                String lowerCase = UCharacter.toLowerCase(ULocale.ENGLISH, str6.replace("'", "’"));
                if (lowerCase.endsWith(".")) {
                    lowerCase = lowerCase.substring(0, lowerCase.length() - 1);
                }
                if (lowerCase.contains(Padder.FALLBACK_PADDING_STRING) || lowerCase.contains(LanguageTag.SEP)) {
                    openUTF8Writer.println(str5 + "\tspace or hyphen");
                    countSkipped++;
                    skippedFrequency += frequency;
                } else {
                    String str7 = overrides.get(lowerCase);
                    String transliterate = spellout.transliterate(lowerCase);
                    int i2 = 1;
                    while (true) {
                        if (i2 < split.length) {
                            String str8 = split[i2];
                            if (str8.startsWith("%")) {
                                frequency = Long.parseLong(str8.substring(1));
                            } else if (str7 != null) {
                                System.out.println("Overriding\t" + lowerCase + " → ! " + str8 + " → " + str7);
                                if (str7.length() != 0) {
                                    if ("territories" != 0 && lowerCase.equals("territories")) {
                                        setTranslitDebug(true);
                                    }
                                    String transliterate2 = fixBadIpa.transliterate(str7);
                                    setTranslitDebug(false);
                                    addSourceTarget(openUTF8Writer, lowerCase, transliterate2, str5, of);
                                }
                            } else if (frequency < 9999) {
                                countSkipped++;
                            } else {
                                String replace = UCharacter.toLowerCase(ULocale.ENGLISH, str8).replace(Padder.FALLBACK_PADDING_STRING, SubmissionLocales.DEFAULT_EXTENDED_SUBMISSION);
                                if (!replace.startsWith(LanguageTag.SEP) && !replace.endsWith(LanguageTag.SEP)) {
                                    String transliterate3 = fixBadIpa.transliterate(replace);
                                    if (transliterate3.equals(transliterate)) {
                                        openUTF8Writer.println(str5 + "\tspellout");
                                        countSkipped++;
                                    } else {
                                        if (!transliterate3.equals(replace)) {
                                            openUTF8Writer.println("\t### fixed IPA:\t" + lowerCase + "\t" + transliterate3 + "\twas: " + replace);
                                        }
                                        addSourceTarget(openUTF8Writer, lowerCase, transliterate3, str5, of);
                                    }
                                }
                            }
                            i2++;
                        }
                    }
                }
            }
        }
        for (String str9 : overrides.keySet()) {
            if (!of.containsKey(str9)) {
                String str10 = overrides.get(str9);
                if (str10.length() != 0) {
                    System.out.println("New overrides:\t" + str9 + " → " + str10);
                    addSourceTarget(openUTF8Writer, str9, str10, "overrides", of);
                }
            }
        }
        openUTF8Reader.close();
        System.out.println("total count: " + nf.format(of.size()));
        System.out.println("skipped count: " + nf.format(countSkipped));
        System.out.println("total frequency-weighted: " + nf.format(totalFrequency));
        System.out.println("skipped frequency-weighted: " + nf.format(skippedFrequency));
        int i3 = 0;
        int i4 = 0;
        long j = 0;
        long j2 = 0;
        Transliterator createFromRules2 = Transliterator.createFromRules("foo", str4, 0);
        ArrayList arrayList = new ArrayList();
        StringBuilder sb = new StringBuilder();
        int i5 = 1;
        Relation of2 = Relation.of(new TreeMap(), TreeSet.class);
        for (String str11 : of.keySet()) {
            if ("territories" != 0 && str11.equals("territories")) {
                System.out.println(str11 + "\t" + of.getAll(str11));
            }
            i4++;
            if (str11.length() != i5 && str11.length() >= forceSeparateIfShorter) {
                System.out.println("Building transliterator for length " + i5 + " : " + arrayList.size());
                System.out.flush();
                openUTF8Writer.flush();
                createFromRules2 = Transliterator.createFromRules("foo", buildRules(str4, arrayList, sb), 0);
                i5 = str11.length();
            }
            Set all = of.getAll(str11);
            String transliterate4 = createFromRules2.transliterate(str11);
            String str12 = null;
            int i6 = 999;
            long j3 = 0;
            Iterator it = all.iterator();
            while (true) {
                if (it.hasNext()) {
                    Pair pair = (Pair) it.next();
                    String str13 = (String) pair.getFirst();
                    if (str13.length() == 0) {
                        throw new IllegalArgumentException(str11 + " → " + str13);
                    }
                    j3 = ((Long) pair.getSecond()).longValue();
                    if (transliterate4.equals(str13)) {
                        openUTF8Writer.println("# skipping " + str11 + " → " + str13 + " ;");
                        j2 += j3;
                        break;
                    } else if (mostlyEqual(str11, str13, transliterate4)) {
                        openUTF8Writer.println("# skipping " + str11 + " → " + str13 + " ; # close enough to " + transliterate4);
                        j2 += j3;
                        break;
                    } else {
                        int distance = distance(str11, str13, transliterate4);
                        if (i6 > distance) {
                            str12 = str13;
                            i6 = distance;
                        }
                    }
                } else if (str12 != null) {
                    boolean z = false;
                    if (str11.length() < forceSeparateIfShorter || str12.length() * 2 > str11.length() * 3) {
                        z = true;
                    } else if (str12.equals(spellout.transliterate(str11))) {
                        z = true;
                    } else if (str11.endsWith("e")) {
                        z = true;
                    }
                    String transliterate5 = coreBase.transliterate(str11);
                    String str14 = z ? "$x{" + str11 + "}$x" : "$x{" + str11;
                    arrayList.add((str14.startsWith("use") ? "'" + str14 + "'" : str14) + " → " + str12 + " ; # " + transliterate4 + (transliterate5.equals(transliterate4) ? SubmissionLocales.DEFAULT_EXTENDED_SUBMISSION : "\t\t" + transliterate5) + "\n");
                    openUTF8Writer.println("# couldn't replace  " + str14 + " → " + str12 + " ; # " + transliterate4);
                    of2.put(Long.valueOf(-j3), str14 + " → " + str12 + " ; # " + transliterate4);
                    i3++;
                    j += j3;
                }
            }
        }
        String buildRules = buildRules(str4, arrayList, sb);
        Transliterator.createFromRules("foo", buildRules, 0);
        PrintWriter openUTF8Writer2 = FileUtilities.openUTF8Writer(SubmissionLocales.DEFAULT_EXTENDED_SUBMISSION, str2);
        openUTF8Writer2.println(buildRules);
        openUTF8Writer2.close();
        PrintWriter openUTF8Writer3 = FileUtilities.openUTF8Writer(SubmissionLocales.DEFAULT_EXTENDED_SUBMISSION, str3);
        Iterator it2 = of2.keySet().iterator();
        while (it2.hasNext()) {
            long longValue = ((Long) it2.next()).longValue();
            for (String str15 : of2.getAll(Long.valueOf(longValue))) {
                openUTF8Writer3.println(longValue + "\t" + openUTF8Writer3);
            }
        }
        openUTF8Writer3.close();
        openUTF8Writer.close();
        System.out.println("countTotal: " + nf.format(i4));
        System.out.println("countAdded: " + nf.format(i3));
        System.out.println("countSkipped: " + nf.format(i4 - i3));
        System.out.println("frequencyTotal: " + nf.format(j + j2));
        System.out.println("frequencyAdded: " + nf.format(j));
        System.out.println("frequencySkipped: " + nf.format(j2));
    }

    private static void setTranslitDebug(boolean z) {
        try {
            Transliterator.class.getField("DEBUG").setBoolean(Transliterator.class, z);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private static void addSourceTarget(PrintWriter printWriter, String str, String str2, String str3, Relation<String, Pair<String, Long>> relation) {
        if (str.equals("teh")) {
            System.out.println("debug");
        }
        if (!allowedSourceCharacters.containsAll(str)) {
            printWriter.println(str3 + "\t# Strange source values:\t" + str + "\t" + new UnicodeSet().addAll(str).removeAll(allowedSourceCharacters).toPattern(false));
            countSkipped++;
            skippedFrequency += frequency;
        } else if (!allowedTargetCharacters.containsAll(str2)) {
            System.out.println(str3 + "\t# Strange target values:\t" + str2 + "\t" + new UnicodeSet().addAll(str2).removeAll(allowedTargetCharacters).toPattern(false));
            countSkipped++;
            skippedFrequency += frequency;
        } else {
            sourceCharacters.addAll(str);
            targetCharacters.addAll(str2);
            relation.put(str, new Pair<>(str2, Long.valueOf(frequency)));
            totalFrequency += frequency;
        }
    }

    private static void checkCoreReversibility(PrintWriter printWriter, String str, String str2) {
        Transliterator createFromRules = Transliterator.createFromRules("foo", str, 0);
        Transliterator createFromRules2 = Transliterator.createFromRules("foo2", str2, 1);
        for (String str3 : "bat bait bet beet bit bite bot boat but bute bout boot book boy pat bat vat fat mat tat dat thew father nat sat zoo ash asia gate cat late rate hate yet rang chat jet".split("\\s")) {
            String transliterate = createFromRules.transliterate(str3);
            printWriter.println(str3 + "\t " + transliterate + "\t " + createFromRules2.transliterate(transliterate));
        }
        printWriter.flush();
    }

    private static String buildRules(String str, List<String> list, StringBuilder sb) {
        sb.setLength(0);
        sb.append("# Author: M Davis\n# Email: mark.davis@icu-project.org\n# Description: English to IPA\n:: lower(); \n$x = [:^letter:] ;\n");
        for (int size = list.size() - 1; size >= 0; size--) {
            sb.append(list.get(size));
        }
        sb.append(str);
        return sb.toString();
    }

    private static void showSet(UnicodeSet unicodeSet) {
        UnicodeSetIterator unicodeSetIterator = new UnicodeSetIterator(unicodeSet);
        while (unicodeSetIterator.next()) {
            System.out.println(Utility.hex(unicodeSetIterator.codepoint) + "\t(" + UTF16.valueOf(unicodeSetIterator.codepoint) + ")\t" + UCharacter.getName(unicodeSetIterator.codepoint));
        }
    }

    private static int distance(String str, String str2, String str3) {
        if (str2.equals(str3)) {
            return 0;
        }
        if (mostlyEqual(str, str2, str3)) {
            return 1;
        }
        String transliterate = distinguishLongVowels.transliterate(str2);
        String transliterate2 = distinguishLongVowels.transliterate(str3);
        targetChars.clear().addAll(transliterate);
        targetCoreChars.clear().addAll(transliterate2);
        if (targetChars.equals(targetCoreChars)) {
            return 3;
        }
        targetChars.removeAll(short_vowels);
        targetCoreChars.removeAll(short_vowels);
        if (targetChars.equals(targetCoreChars)) {
            return 5;
        }
        targetChars.removeAll(vowels);
        targetCoreChars.removeAll(vowels);
        if (targetChars.equals(targetCoreChars)) {
            return 5;
        }
        tempDiff.clear().addAll(targetChars).removeAll(targetCoreChars);
        int size = 7 + tempDiff.size();
        tempDiff.clear().addAll(targetCoreChars).removeAll(targetChars);
        return size + tempDiff.size();
    }

    private static boolean mostlyEqual(String str, String str2, String str3) {
        if (str2.length() != str3.length()) {
            return false;
        }
        String transliterate = skeletonize.transliterate(str2);
        String transliterate2 = skeletonize.transliterate(str3);
        for (int i = 0; i < transliterate.length(); i++) {
            char charAt = transliterate.charAt(i);
            char charAt2 = transliterate2.charAt(i);
            if (charAt != charAt2 && ((charAt != 601 || !short_vowels.contains(charAt2)) && ((!short_vowels.contains(charAt) || charAt2 != 601) && ((charAt != 618 || charAt2 != 603) && (charAt != 618 || charAt2 != 603))))) {
                return false;
            }
        }
        return true;
    }

    public static String createFromFile(String str, Transliterator transliterator, Transliterator transliterator2) throws IOException {
        StringBuilder sb = new StringBuilder();
        BufferedReader openUTF8Reader = FileUtilities.openUTF8Reader(SubmissionLocales.DEFAULT_EXTENDED_SUBMISSION, str);
        while (true) {
            String readLine = openUTF8Reader.readLine();
            if (readLine == null) {
                openUTF8Reader.close();
                return sb.toString();
            }
            if (readLine.startsWith("\ufeff")) {
                readLine = readLine.substring(1);
            }
            if (transliterator != null) {
                readLine = transliterator.transliterate(readLine);
            }
            if (transliterator2 != null) {
                readLine = transliterator2.transliterate(readLine);
            }
            sb.append(readLine);
            sb.append("\n");
        }
    }

    public static Map<String, String> getOverrides() throws IOException {
        String readLine;
        TreeMap treeMap = new TreeMap();
        BufferedReader openUTF8Reader = FileUtilities.openUTF8Reader(cldrDataDir, "internal_overrides.txt");
        while (0 < LIMIT && (readLine = openUTF8Reader.readLine()) != null) {
            try {
                String trim = readLine.trim();
                if (trim.length() != 0) {
                    String[] split = trim.split("\\s*→\\s*");
                    String trim2 = split[0].trim();
                    if (treeMap.containsKey(trim2)) {
                        System.out.println("Overrides already contain: " + trim2);
                    } else if (split.length < 2) {
                        treeMap.put(trim2, SubmissionLocales.DEFAULT_EXTENDED_SUBMISSION);
                    } else {
                        treeMap.put(trim2, fixBadIpa.transliterate(split[1].trim()));
                    }
                }
            } finally {
                openUTF8Reader.close();
            }
        }
        return treeMap;
    }
}
