package com.bjsxt.mr;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/**
* 单词统计
* @author tingyu
* @date 2016-02-29 00:44
*/
/**
* KEYIN:一句话或单词的下标
* VALUEIN:输入的VALUE为文本
* KEYOUT: 输出的KEY为文本
* VALUEOUT: 输出为数字
*/
public class WcMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
/*
* 每次调用map方法时会传入split(分片)中一行数据,key为该行数据在分片中的下标位置
*
*/
protected void map(LongWritable key, Text value, Context context) throws IOException ,InterruptedException {
String line=value.toString();
StringTokenizer st=new StringTokenizer(line); //默认按空格进行切分
while(st.hasMoreTokens()){
String world=st.nextToken();
context.write(new Text(world), new IntWritable(1)); //map输出
}
};
}
package com.bjsxt.mr;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
/**
* 单词统计
* @author tingyu
* @date 2016-02-29 00:44
*/
/*
* KEYIN: 即map的输出key
* VALUEIN: 即map输出的value
* KEYOUT: 文本
* VALUEOUT: 数值
*/
public class WcReduce extends Reducer<Text, IntWritable, Text, IntWritable>{
protected void reduce(Text key, java.lang.Iterable<IntWritable> iterable, Context context)
throws java.io.IOException ,InterruptedException {
int sum=0;
for(IntWritable val:iterable){
sum+=val.get();
}
context.write(key, new IntWritable(sum));
};
}
package com.bjsxt.mr;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* 单词统计
* @author tingyu
* @date 2016-02-29 00:44
*/
public class JobRun {
public static void main(String[] args) {
Configuration config=new Configuration();
config.set("mapred.job.tracker", "192.168.0.200:9001"); //即hadoop-1.2/conf/mapred-site.xml中的配置
config.set("fs.default.name", "hdfs://192.168.0.200:9000");
//如果本地Eclipse不行,就需要设置jar文件的位置
//config.set("mapred.jar", "C:\\Users\\tingyu\\Desktop\\hadoop\\wordCount.jar");
try {
Job job=new Job(config,"world count");
job.setJarByClass(JobRun.class);
job.setMapperClass(WcMapper.class);
job.setReducerClass(WcReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//设置reduceTask的任务数
//job.setNumReduceTasks(2);
FileInputFormat.addInputPath(job, new Path("/opt/input/wc"));
FileOutputFormat.setOutputPath(job, new Path("/opt/output/wc"));
System.exit(job.waitForCompletion(true)?0:1);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
分享到:
相关推荐
该资源包里面包含eclipse上的hadoop-1.2.1版本插件的jar包和hadoop-1.2.1.tar.gz,亲测可用~~请在下载完该包后解压,将hadoop-1.2.1放置于Eclipse\plugins目录下,然后重启eclipse,将hadoop-1.2.1.tar.gz放到D:\...
hadoop-eclipse-plugin-1.2.1hadoop-eclipse-plugin-1.2.1hadoop-eclipse-plugin-1.2.1hadoop-eclipse-plugin-1.2.1
ERROR security.UserGroupInformation: PriviledgedActionException as:Administrator cause:java.io.IOException: Failed to set permissions of path:)重新编译了hadoop-core-1.2.1,注释了包中org.apache.hadoop....
hadoop -1.2.1 所有Jar包,包括例子.ant包,测试包.......................................................
hadoop-core-1.2.1在maven中央仓库中没有源码,此源码包是在hadoop的svn上打的包,包含hadoop1.2.1的所有源文件
该Jar包是用来替换Nutch1.7在windows下执行因权限报错的问题。 替换掉原来的Hadoop-core-1.2.0.jar
基于之前“搭建hadoop-1.2.1环境”搭建的hadoop环境,运行wordcount例子
hadoop-examples-1.2.1.jar
hadoop-eclipse-plugin-1.2.1.jar,给 Eclipse 用的 Hadoop 插件,1.2.1 版本。可以自己编译,请参考: http://blog.iamteer.com/archives/hadoop-eclipse-plugin.html 嫌麻烦可以直接下载,放到 Eclipse 的 ...
hadoop的安装文件,linux下使用
用来配置myeclipse或eclipse对应的hadoop 插件,方便开发
hadoop-1.2.1-1.x86_64.rpm 安装包 官方源码,下载不到的同学来这里下载吧。 也是给自己一个副本保留。利人利己
hadoop-1.2.1.tar.gz做大数据处理,分布式计算的,安装在linux服务器上。