package edu.umd.cloud9.collection.trecweb;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/umd/cloud9/collection/trecweb/RepackGov2Documents.class */
public class RepackGov2Documents extends Configured implements Tool {
    private static final Logger sLogger = Logger.getLogger(RepackGov2Documents.class);

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/umd/cloud9/collection/trecweb/RepackGov2Documents$Documents.class */
    public enum Documents {
        Count;

        /* renamed from: values, reason: to resolve conflict with enum method */
        public static Documents[] valuesCustom() {
            Documents[] valuesCustom = values();
            int length = valuesCustom.length;
            Documents[] documentsArr = new Documents[length];
            System.arraycopy(valuesCustom, 0, documentsArr, 0, length);
            return documentsArr;
        }
    }

    /* loaded from: input_file:edu/umd/cloud9/collection/trecweb/RepackGov2Documents$MyMapper.class */
    private static class MyMapper extends MapReduceBase implements Mapper<LongWritable, TrecWebDocument, LongWritable, TrecWebDocument> {
        private MyMapper() {
        }

        public void map(LongWritable longWritable, TrecWebDocument trecWebDocument, OutputCollector<LongWritable, TrecWebDocument> outputCollector, Reporter reporter) throws IOException {
            reporter.incrCounter(Documents.Count, 1L);
            outputCollector.collect(longWritable, trecWebDocument);
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((LongWritable) obj, (TrecWebDocument) obj2, (OutputCollector<LongWritable, TrecWebDocument>) outputCollector, reporter);
        }
    }

    private RepackGov2Documents() {
    }

    private static int printUsage() {
        System.out.println("usage: [base-path] [output-path] (block|record|none)");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    public int run(String[] strArr) throws Exception {
        if (strArr.length != 3) {
            printUsage();
            return -1;
        }
        String str = strArr[0];
        String str2 = strArr[1];
        String str3 = strArr[2];
        if (!str3.equals("block") && !str3.equals("record") && !str3.equals("none")) {
            System.err.println("Error: \"" + str3 + "\" unknown compression type!");
            System.exit(-1);
        }
        JobConf jobConf = new JobConf(RepackGov2Documents.class);
        jobConf.setJobName("RepackGov2Documents");
        sLogger.info("Tool name: RepackGov2Documents");
        sLogger.info(" - base path: " + str);
        sLogger.info(" - output path: " + str2);
        sLogger.info(" - compression type: " + str3);
        if (str3.equals("block")) {
            sLogger.info(" - block size: 1000000");
        }
        jobConf.setNumMapTasks(10);
        jobConf.setNumReduceTasks(0);
        for (int i = 0; i <= 272; i++) {
            String str4 = String.valueOf(str) + "/GX";
            String num = Integer.toString(i);
            if (num.length() == 1) {
                str4 = String.valueOf(str4) + "00";
            }
            if (num.length() == 2) {
                str4 = String.valueOf(str4) + "0";
            }
            FileInputFormat.addInputPath(jobConf, new Path(String.valueOf(str4) + num));
        }
        SequenceFileOutputFormat.setOutputPath(jobConf, new Path(str2));
        if (str3.equals("none")) {
            SequenceFileOutputFormat.setCompressOutput(jobConf, false);
        } else {
            SequenceFileOutputFormat.setCompressOutput(jobConf, true);
            if (str3.equals("record")) {
                SequenceFileOutputFormat.setOutputCompressionType(jobConf, SequenceFile.CompressionType.RECORD);
            } else {
                SequenceFileOutputFormat.setOutputCompressionType(jobConf, SequenceFile.CompressionType.BLOCK);
                jobConf.setInt("io.seqfile.compress.blocksize", 1000000);
            }
        }
        jobConf.setInputFormat(TrecWebDocumentInputFormat.class);
        jobConf.setOutputFormat(SequenceFileOutputFormat.class);
        jobConf.setOutputKeyClass(LongWritable.class);
        jobConf.setOutputValueClass(TrecWebDocument.class);
        jobConf.setMapperClass(MyMapper.class);
        FileSystem.get(jobConf).delete(new Path(str2), true);
        JobClient.runJob(jobConf);
        return 0;
    }

    public static void main(String[] strArr) throws Exception {
        System.exit(ToolRunner.run(new Configuration(), new RepackGov2Documents(), strArr));
    }
}
