3

Click here to load reader

AJUG April 2011 Raw hadoop example

Embed Size (px)

DESCRIPTION

Example code using the Hadoop APIs directly from my April 2011 Atlanta Java Users Group presentation.

Citation preview

Page 1: AJUG April 2011 Raw hadoop example

package org.ajug;

import org.apache.hadoop.fs.Path;import org.apache.hadoop.conf.*;import org.apache.hadoop.io.*;import org.apache.hadoop.mapreduce.*;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class MnM {

public static void main(String[] args) throws Exception { Configuration conf = new Configuration();

Job job = new Job(conf, "ajug");

job.setOutputKeyClass(Text.class); job.setOutputValueClass(Data.class);

job.setMapperClass(MnMMapper.class); job.setReducerClass(MnMReducer.class);

job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class);

FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1]));

job.waitForCompletion(true); }

}

================================================

package org.ajug;

import org.apache.hadoop.io.DoubleWritable;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;

public class Data implements WritableComparable<Data> { private DoubleWritable width = new DoubleWritable(); private DoubleWritable weight = new DoubleWritable();

void set(double a_width, double a_weight) { width.set(a_width); weight.set(a_weight); }

public double getWidth() {

Page 2: AJUG April 2011 Raw hadoop example

return width.get();} public double getWeight() { return weight.get(); }

public void write(DataOutput out) throws IOException { width. write(out); weight. write(out); }

public void readFields(DataInput in) throws IOException { width. readFields(in); weight. readFields(in); }

public int hashCode() { return width.hashCode() * 163 + weight.hashCode(); }

public int compareTo(Data tp) { int cmp = width.compareTo(tp.width); if (cmp != 0) { return cmp; } return weight.compareTo(tp.weight); }

public String toString() { return "" + width + "\t" + weight; }

}========================================package org.ajug;

import org.apache.hadoop.io.*;import org.apache.hadoop.mapreduce.*;

import java.io.IOException;

public class MnMMapper extends Mapper<LongWritable, Text, Text, Data> {

private Text color = new Text(); private Data data = new Data();

public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] fields = line.split(","); if (fields.length > 2) { color.set(fields[0]); double weight = Double.parseDouble(fields[1]); double width = Double.parseDouble(fields[2]); data.set(width, weight); context.write(color, data); }

Page 3: AJUG April 2011 Raw hadoop example

}}======================================package org.ajug;

import org.apache.hadoop.io.*;import org.apache.hadoop.mapreduce.*;

import java.io.IOException;import java.util.Iterator;

public class MnMReducer extends Reducer <Text, Data, Text, Data> {

private Data data = new Data();

public void reduce(Text key, Iterable<Data> values, Context context) throws IOException, InterruptedException { double weights = 0; double widths=0; int count = 0;

Iterator iter = values.iterator(); while (iter.hasNext()) { Data value = (Data)iter.next(); count++; weights += value.getWeight(); widths += value.getWidth(); } data.set(widths/count, weights/count); context.write(key, data); }}