/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.trees.international.pennchinese;

import edu.stanford.nlp.io.EncodingPrintWriter;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.trees.BobChrisTreeNormalizer;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.trees.TreeTransformer;
import edu.stanford.nlp.trees.international.pennchinese.ChineseTreebankLanguagePack;
import edu.stanford.nlp.util.Filter;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;

public class CTBErrorCorrectingTreeNormalizer
extends BobChrisTreeNormalizer {
    private static final long serialVersionUID = -8203853817025401845L;
    private static final Pattern NPTmpPattern = Pattern.compile("NP.*-TMP.*");
    private static final Pattern PPTmpPattern = Pattern.compile("PP.*-TMP.*");
    private static final Pattern TmpPattern = Pattern.compile(".*-TMP.*");
    private TreeTransformer tagExtender;
    private boolean splitNPTMP;
    private boolean splitPPTMP;
    private boolean splitXPTMP;
    private Filter<Tree> chineseEmptyFilter = new ChineseEmptyFilter();

    public CTBErrorCorrectingTreeNormalizer() {
        this(false, false, false, false);
    }

    public CTBErrorCorrectingTreeNormalizer(boolean splitNPTMP, boolean splitPPTMP, boolean splitXPTMP, boolean charTags) {
        this.splitNPTMP = splitNPTMP;
        this.splitPPTMP = splitPPTMP;
        this.splitXPTMP = splitXPTMP;
        if (charTags) {
            try {
                this.tagExtender = (TreeTransformer)Class.forName("edu.stanford.nlp.trees.international.pennchinese.CharacterLevelTagExtender").newInstance();
            }
            catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
    }

    protected String cleanUpLabel(String label) {
        if (label == null) {
            return "ROOT";
        }
        boolean nptemp = NPTmpPattern.matcher(label).matches();
        boolean pptemp = PPTmpPattern.matcher(label).matches();
        boolean anytemp = TmpPattern.matcher(label).matches();
        label = this.tlp.basicCategory(label);
        if (anytemp && this.splitXPTMP) {
            label = label + "-TMP";
        } else if (pptemp && this.splitPPTMP) {
            label = label + "-TMP";
        } else if (nptemp && this.splitNPTMP) {
            label = label + "-TMP";
        }
        return label;
    }

    public Tree normalizeWholeTree(Tree tree, TreeFactory tf) {
        Tree newTree = tree.prune(this.chineseEmptyFilter, tf).spliceOut(this.aOverAFilter);
        Tree[] kids = newTree.children();
        if (kids.length > 1) {
            EncodingPrintWriter.err.println("Possible error: non-unary initial rewrite: " + newTree.localTree(), "GB18030");
        } else if (kids.length > 0) {
            Tree child = kids[0];
            if (!child.isPhrasal()) {
                EncodingPrintWriter.err.println("Correcting error: treebank tree is not phrasal; wrapping in FRAG: " + child, "GB18030");
                Tree added = tf.newTreeNode("FRAG", Arrays.asList(kids));
                newTree.setChild(0, added);
            }
        } else {
            EncodingPrintWriter.err.println("Error: tree with no children: " + tree, "GB18030");
        }
        for (Tree subtree : newTree) {
            if (subtree.value().equals("ROOT") && subtree.firstChild().isLeaf() && "CP".equals(subtree.firstChild().value())) {
                EncodingPrintWriter.err.println("Correcting error: seriously messed up tree in CTB6: " + newTree, "GB18030");
                List<Tree> children = subtree.getChildrenAsList();
                children = children.subList(1, children.size() - 1);
                subtree.setChildren(children);
            }
            if (subtree.isPreTerminal()) {
                if (subtree.value().matches("NP")) {
                    if (ChineseTreebankLanguagePack.chineseDouHaoAcceptFilter().accept(subtree.firstChild().value())) {
                        EncodingPrintWriter.err.println("Correcting error: NP preterminal over douhao; preterminal changed to PU: " + subtree, "GB18030");
                        subtree.setValue("PU");
                        continue;
                    }
                    if (subtree.parent(newTree).value().matches("NP")) {
                        EncodingPrintWriter.err.println("Correcting error: NP preterminal w/ NP parent; preterminal changed to NN: " + subtree.parent(newTree), "GB18030");
                        subtree.setValue("NN");
                        continue;
                    }
                    EncodingPrintWriter.err.println("Correcting error: NP preterminal w/o NP parent, changing preterminal to NN: " + subtree.parent(newTree), "GB18030");
                    subtree.setValue("NN");
                    continue;
                }
                if (!subtree.value().matches("PU")) continue;
                if (subtree.firstChild().value().matches("\u4ed6")) {
                    EncodingPrintWriter.err.println("Correcting error: \"\u4ed6\" under PU tag; tag changed to PN: " + subtree, "GB18030");
                    subtree.setValue("PN");
                    continue;
                }
                if (subtree.firstChild().value().matches("tw|\u534a\u7a74\u5f0f")) {
                    EncodingPrintWriter.err.println("Correcting error: \"" + subtree.firstChild().value() + "\" under PU tag; tag changed to NN: " + subtree, "GB18030");
                    subtree.setValue("NN");
                    continue;
                }
                if (!subtree.firstChild().value().matches("33")) continue;
                EncodingPrintWriter.err.println("Correcting error: \"33\" under PU tag; tag changed to CD: " + subtree, "GB18030");
                subtree.setValue("CD");
                continue;
            }
            if (subtree.value().matches("NN")) {
                EncodingPrintWriter.err.println("Correcting error: NN phrasal tag changed to NP: " + subtree, "GB18030");
                subtree.setValue("NP");
                continue;
            }
            if (!subtree.value().matches("MSP")) continue;
            EncodingPrintWriter.err.println("Correcting error: MSP phrasal tag changed to VP: " + subtree, "GB18030");
            subtree.setValue("VP");
        }
        if (this.tagExtender != null) {
            newTree = this.tagExtender.transformTree(newTree);
        }
        return newTree;
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    private static class ChineseEmptyFilter
    implements Filter<Tree> {
        private static final long serialVersionUID = 8914098359495987617L;

        private ChineseEmptyFilter() {
        }

        @Override
        public boolean accept(Tree t) {
            Tree[] kids = t.children();
            Label l = t.label();
            if (l != null && l.value() != null && l.value().matches("-NONE-.*") && !t.isLeaf() && kids.length == 1 && kids[0].isLeaf()) {
                if (!l.value().equals("-NONE-")) {
                    EncodingPrintWriter.err.println("Deleting errant node " + l.value() + " as if -NONE-: " + t, "GB18030");
                }
                return false;
            }
            return true;
        }
    }
}

