AJUG April 2011 Raw hadoop example

package org.ajug;

import org.apache.hadoop.fs.Path;import org.apache.hadoop.conf.*;import org.apache.hadoop.io.*;import org.apache.hadoop.mapreduce.*;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class MnM {

public static void main(String[] args) throws Exception { Configuration conf = new Configuration();

Job job = new Job(conf, "ajug");

job.setOutputKeyClass(Text.class); job.setOutputValueClass(Data.class);

job.setMapperClass(MnMMapper.class); job.setReducerClass(MnMReducer.class);

job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class);

FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1]));

job.waitForCompletion(true); }

}

================================================

package org.ajug;

import org.apache.hadoop.io.DoubleWritable;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;

public class Data implements WritableComparable<Data> { private DoubleWritable width = new DoubleWritable(); private DoubleWritable weight = new DoubleWritable();

void set(double a_width, double a_weight) { width.set(a_width); weight.set(a_weight); }

public double getWidth() {

return width.get();} public double getWeight() { return weight.get(); }

public void write(DataOutput out) throws IOException { width. write(out); weight. write(out); }

public void readFields(DataInput in) throws IOException { width. readFields(in); weight. readFields(in); }

public int hashCode() { return width.hashCode() * 163 + weight.hashCode(); }

public int compareTo(Data tp) { int cmp = width.compareTo(tp.width); if (cmp != 0) { return cmp; } return weight.compareTo(tp.weight); }

public String toString() { return "" + width + "\t" + weight; }

}========================================package org.ajug;

import org.apache.hadoop.io.*;import org.apache.hadoop.mapreduce.*;

import java.io.IOException;

public class MnMMapper extends Mapper<LongWritable, Text, Text, Data> {

private Text color = new Text(); private Data data = new Data();

public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] fields = line.split(","); if (fields.length > 2) { color.set(fields[0]); double weight = Double.parseDouble(fields[1]); double width = Double.parseDouble(fields[2]); data.set(width, weight); context.write(color, data); }

}}======================================package org.ajug;

import org.apache.hadoop.io.*;import org.apache.hadoop.mapreduce.*;

import java.io.IOException;import java.util.Iterator;

public class MnMReducer extends Reducer <Text, Data, Text, Data> {

private Data data = new Data();

public void reduce(Text key, Iterable<Data> values, Context context) throws IOException, InterruptedException { double weights = 0; double widths=0; int count = 0;

Iterator iter = values.iterator(); while (iter.hasNext()) { Data value = (Data)iter.next(); count++; weights += value.getWeight(); widths += value.getWidth(); } data.set(widths/count, weights/count); context.write(key, data); }}

Technology

AJUG April 2011 Raw hadoop example