| Forum Home > Bigdata Learnings( Hadoop, HBase,Hive and other bigdata technologies) > Hadoop: Map Reduce Example: Calculate mean in Map Reduce | ||
|---|---|---|
|
Site Owner Posts: 83 |
Following Map Reduce job will calculate mean of all the values of a column of a file placed on HDFS
package com.impetus;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MeanDriver { public static class MeanMapper extends Mapper<Object, Text, Text, IntWritable> { Text word = new Text("key"); IntWritable val = new IntWritable(); public void map(Object key, Text value, Context context) throws InterruptedException, IOException { String columns[] = value.toString().split("\t"); val.set(Integer.parseInt(columns[0])); context.write(word, val); } } public static class MeanReducer extends Reducer<Text, IntWritable, NullWritable, IntWritable> { NullWritable out = NullWritable.get(); IntWritable result = new IntWritable(); int count = 0; public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); count += 1; } sum = sum / count; result.set(sum); context.write(out, result); } } public static void main(String args[]) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); if (args.length != 2) { System.err.println("Usage: Wordcount <in> <out>"); System.exit(2); } Job job = new Job(conf, "calculate mean"); job.setJarByClass(MeanDriver.class); job.setMapperClass(MeanMapper.class); job.setReducerClass(MeanReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
Note : While running this program , two arguments should be provided . First argument should be the name of the input file and second argument should be the name of the output file .
| |
| ||