/*
 * Decompiled with CFR 0.152.
 */
package fitlibrary.spider.utility;

import fitlibrary.exception.FitLibraryException;
import java.io.StringReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.xerces.dom.DocumentImpl;
import org.cyberneko.html.parsers.DOMFragmentParser;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;

public class HtmlTextUtility {
    private static final String UNICODE_NON_BREAKING_SPACE = "\u00a0";

    public static String brToSpace(String s) {
        return HtmlTextUtility.replaceBr(s, " ");
    }

    private static String replaceBr(String stringWithBrTag, String replacement) {
        return stringWithBrTag.replaceAll("<(br|BR)\\s?\\/?>", " ");
    }

    public static String crLfRemoved(String s) {
        return s.replaceAll("\\r?\\n", "");
    }

    public static String nonBreakingSpaceToSpace(String s) {
        return s.replaceAll("\\&nbsp\\;", " ").replace(UNICODE_NON_BREAKING_SPACE, " ");
    }

    public static String spacesToSingleSpace(String s) {
        return s.replaceAll("\\s{2,}", " ");
    }

    public static String tabToSpace(String s) {
        return s.replaceAll("\\t", " ");
    }

    public static String lowerCaseTags(String html) {
        Pattern patt = Pattern.compile("(</?[A-Z,0-9]+/?>)", 32);
        Matcher m = patt.matcher(html);
        StringBuffer sb = new StringBuffer(html.length());
        while (m.find()) {
            String text = m.group(1);
            text = text.toLowerCase();
            m.appendReplacement(sb, Matcher.quoteReplacement(text));
        }
        m.appendTail(sb);
        return sb.toString();
    }

    public static String removeInnerHtml(String stringWithInnerHtml) {
        DOMFragmentParser parser = new DOMFragmentParser();
        try {
            parser.setFeature("http://cyberneko.org/html/features/document-fragment", true);
            DocumentImpl document = new DocumentImpl();
            DocumentFragment fragment = document.createDocumentFragment();
            parser.parse(new InputSource(new StringReader(stringWithInnerHtml)), fragment);
            if (fragment.hasChildNodes()) {
                StringBuilder content = new StringBuilder();
                NodeList childNodesAndFragmentText = fragment.getChildNodes();
                int node = 0;
                while (node < childNodesAndFragmentText.getLength()) {
                    if (childNodesAndFragmentText.item(node).getNodeType() == 3) {
                        content.append(childNodesAndFragmentText.item(node).getTextContent());
                    }
                    ++node;
                }
                return content.toString();
            }
            return "";
        }
        catch (Exception e) {
            throw new FitLibraryException(e);
        }
    }

    public static String tagless(String text) {
        int endPos;
        int pos;
        String s = text;
        while ((pos = s.indexOf("  ")) >= 0) {
            s = String.valueOf(s.substring(0, pos)) + s.substring(pos + 1);
        }
        while ((pos = s.indexOf("<")) >= 0 && (endPos = s.indexOf(">", pos)) >= 0) {
            s = String.valueOf(s.substring(0, pos)) + s.substring(endPos + 1);
        }
        return s;
    }
}

