/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.parsefilter.naivebayes;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.HashMap;
import java.util.HashSet;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class Train {
    public static String replacefirstoccuranceof(String tomatch, String line) {
        int index = line.indexOf(tomatch);
        if (index == -1) {
            return line;
        }
        return line.substring(0, index) + line.substring(index + tomatch.length());
    }

    public static void updateHashMap(HashMap<String, Integer> dict, String key) {
        if (!key.equals("")) {
            if (dict.containsKey(key)) {
                dict.put(key, dict.get(key) + 1);
            } else {
                dict.put(key, 1);
            }
        }
    }

    public static String flattenHashMap(HashMap<String, Integer> dict) {
        Object result = "";
        for (String key : dict.keySet()) {
            result = (String)result + key + ":" + String.valueOf(dict.get(key)) + ",";
        }
        result = ((String)result).substring(0, ((String)result).length() - 1);
        return result;
    }

    public static void start(String filepath) throws IOException {
        int numof_ir = 0;
        int numof_r = 0;
        int numwords_ir = 0;
        int numwords_r = 0;
        HashSet<String> uniquewords = new HashSet<String>();
        HashMap<String, Integer> wordfreq_ir = new HashMap<String, Integer>();
        HashMap<String, Integer> wordfreq_r = new HashMap<String, Integer>();
        String line = "";
        String target = "";
        String[] linearray = null;
        Configuration configuration = new Configuration();
        FileSystem fs = FileSystem.get((Configuration)configuration);
        BufferedReader bufferedReader = new BufferedReader(configuration.getConfResourceAsReader(filepath));
        while ((line = bufferedReader.readLine()) != null) {
            int i;
            target = line.split("\t")[0];
            line = Train.replacefirstoccuranceof(target + "\t", line);
            linearray = line.replaceAll("[^a-zA-Z ]", "").toLowerCase().split(" ");
            if (target.equals("0")) {
                ++numof_ir;
                numwords_ir += linearray.length;
                for (i = 0; i < linearray.length; ++i) {
                    uniquewords.add(linearray[i]);
                    Train.updateHashMap(wordfreq_ir, linearray[i]);
                }
                continue;
            }
            ++numof_r;
            numwords_r += linearray.length;
            for (i = 0; i < linearray.length; ++i) {
                uniquewords.add(linearray[i]);
                Train.updateHashMap(wordfreq_r, linearray[i]);
            }
        }
        Path path = new Path("naivebayes-model");
        BufferedWriter writer = new BufferedWriter(new OutputStreamWriter((OutputStream)fs.create(path, true)));
        writer.write(String.valueOf(uniquewords.size()) + "\n");
        writer.write("0\n");
        writer.write(String.valueOf(numof_ir) + "\n");
        writer.write(String.valueOf(numwords_ir) + "\n");
        writer.write(Train.flattenHashMap(wordfreq_ir) + "\n");
        writer.write("1\n");
        writer.write(String.valueOf(numof_r) + "\n");
        writer.write(String.valueOf(numwords_r) + "\n");
        writer.write(Train.flattenHashMap(wordfreq_r) + "\n");
        ((Writer)writer).close();
        bufferedReader.close();
    }
}

