Click here to load reader
Upload
christopher-curtin
View
643
Download
0
Embed Size (px)
DESCRIPTION
Example code using the Hadoop APIs directly from my April 2011 Atlanta Java Users Group presentation.
Citation preview
package org.ajug;
import org.apache.hadoop.fs.Path;import org.apache.hadoop.conf.*;import org.apache.hadoop.io.*;import org.apache.hadoop.mapreduce.*;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class MnM {
public static void main(String[] args) throws Exception { Configuration conf = new Configuration();
Job job = new Job(conf, "ajug");
job.setOutputKeyClass(Text.class); job.setOutputValueClass(Data.class);
job.setMapperClass(MnMMapper.class); job.setReducerClass(MnMReducer.class);
job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true); }
}
================================================
package org.ajug;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;
public class Data implements WritableComparable<Data> { private DoubleWritable width = new DoubleWritable(); private DoubleWritable weight = new DoubleWritable();
void set(double a_width, double a_weight) { width.set(a_width); weight.set(a_weight); }
public double getWidth() {
return width.get();} public double getWeight() { return weight.get(); }
public void write(DataOutput out) throws IOException { width. write(out); weight. write(out); }
public void readFields(DataInput in) throws IOException { width. readFields(in); weight. readFields(in); }
public int hashCode() { return width.hashCode() * 163 + weight.hashCode(); }
public int compareTo(Data tp) { int cmp = width.compareTo(tp.width); if (cmp != 0) { return cmp; } return weight.compareTo(tp.weight); }
public String toString() { return "" + width + "\t" + weight; }
}========================================package org.ajug;
import org.apache.hadoop.io.*;import org.apache.hadoop.mapreduce.*;
import java.io.IOException;
public class MnMMapper extends Mapper<LongWritable, Text, Text, Data> {
private Text color = new Text(); private Data data = new Data();
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] fields = line.split(","); if (fields.length > 2) { color.set(fields[0]); double weight = Double.parseDouble(fields[1]); double width = Double.parseDouble(fields[2]); data.set(width, weight); context.write(color, data); }
}}======================================package org.ajug;
import org.apache.hadoop.io.*;import org.apache.hadoop.mapreduce.*;
import java.io.IOException;import java.util.Iterator;
public class MnMReducer extends Reducer <Text, Data, Text, Data> {
private Data data = new Data();
public void reduce(Text key, Iterable<Data> values, Context context) throws IOException, InterruptedException { double weights = 0; double widths=0; int count = 0;
Iterator iter = values.iterator(); while (iter.hasNext()) { Data value = (Data)iter.next(); count++; weights += value.getWeight(); widths += value.getWidth(); } data.set(widths/count, weights/count); context.write(key, data); }}