TECH SOLUTIONS

Click here to edit subtitle

Forums

Post Reply
Forum Home > Bigdata Learnings( Hadoop, HBase,Hive and other bigdata technologies) > Hadoop: Map Reduce Example: Calculate mean in Map Reduce

Sourav Gulati
Site Owner
Posts: 83

Following Map Reduce job will calculate mean of all the values of a column of a file placed on HDFS 

 

package com.impetus;

 

import java.io.IOException;

 

import org.apache.hadoop.conf.Configuration;

 

import org.apache.hadoop.fs.Path;

 

import org.apache.hadoop.io.IntWritable;

 

import org.apache.hadoop.io.NullWritable;

 

import org.apache.hadoop.io.Text;

 

import org.apache.hadoop.mapreduce.Job;

 

import org.apache.hadoop.mapreduce.Mapper;

 

import org.apache.hadoop.mapreduce.Reducer;

 

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

 

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

 

public class MeanDriver {

    public static class MeanMapper extends

            Mapper<Object, Text, Text, IntWritable> {

        Text word = new Text("key");

        IntWritable val = new IntWritable();

        public void map(Object key, Text value, Context context)

                throws InterruptedException, IOException {

            String columns[] = value.toString().split("\t");

            val.set(Integer.parseInt(columns[0]));

            context.write(word, val);

        }

    }

    public static class MeanReducer extends

            Reducer<Text, IntWritable, NullWritable, IntWritable> {

        NullWritable out = NullWritable.get();

        IntWritable result = new IntWritable();

        int count = 0;

        public void reduce(Text key, Iterable<IntWritable> values,

                Context context) throws IOException, InterruptedException {

            int sum = 0;

            for (IntWritable val : values) {

                sum += val.get();

                count += 1;

            }

            sum = sum / count;

            result.set(sum);

            context.write(out, result);

        }

    }

    public static void main(String args[]) throws IOException,

            InterruptedException, ClassNotFoundException {

        Configuration conf = new Configuration();

        if (args.length != 2) {

            System.err.println("Usage: Wordcount <in> <out>");

            System.exit(2);

        }

        Job job = new Job(conf, "calculate mean");

        job.setJarByClass(MeanDriver.class);

        job.setMapperClass(MeanMapper.class);

        job.setReducerClass(MeanReducer.class);

        job.setMapOutputKeyClass(Text.class);

        job.setMapOutputValueClass(IntWritable.class);

        job.setOutputKeyClass(NullWritable.class);

        job.setOutputValueClass(IntWritable.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));

        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        System.exit(job.waitForCompletion(true) ? 0 : 1);

    }

}

 

 



Note : While running this program , two arguments should be provided . First argument should be the name of the input file and second argument should be the name of the output file .



Click here for Other topics of BigData Technologies

 

January 18, 2013 at 7:12 AM Flag Quote & Reply

You must login to post.