#####HDFS指令
查看HDFS目录
bin/hdfs dfs -ls hdfs://vm01:9000
#####HBase操作指令
启动HBase
bin/start-hbase.sh
HBase lib下面的Hadoop相关的jar不是2.3.0,需要替换成2.3.0的jar包
rm lib/hadoop*.jar
MAC OS X下
find $HADOOP_HOME/share/hadoop/ -name “hadoop*.jar” |grep -v “test” |grep -v “sources.jar” | xargs -I{} cp {} $HBASE_HOME/lib
Linux下
find $HADOOP_HOME/share/hadoop/ -name “hadoop*.jar” |grep -v “test” |grep -v “sources.jar” | xargs -i cp {} $HBASE_HOME/lib
create ‘table1’, ‘cf1’
#####Hive操作
$ tar -xzvf hive-x.y.z.tar.gz
$ cd hive-x.y.z
$ export HIVE_HOME=
$ export PATH=$HIVE_HOME/bin:$PATH
CREATE EXTERNAL TABLE IF NOT EXISTS history_role_ext
(
day
string,
grp
string,
mac
string,
dur
double,
role
string)
PARTITIONED BY (
d
string,
g
string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ‘,’
STORED AS INPUTFORMAT
‘org.apache.hadoop.mapred.TextInputFormat’
OUTPUTFORMAT
‘org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat’
LOCATION
‘hdfs://vm01:9000/hive/warehouse/history_role_ext’
load data local inpath ‘/root/input.txt’ overwrite into table history_role_ext partition(d=’20140513’, g=’all’);
CREATE EXTERNAL TABLE IF NOT EXISTS history_role_external
(
day
string,
grp
string,
mac
string,
dur
double,
role
string)
PARTITIONED BY (
d
string,
g
string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ‘,’
STORED AS TEXTFILE
LOCATION
‘hdfs://vm01:9000/hive/warehouse/history_role_external’
load data local inpath ‘/root/input.txt’ overwrite into table history_role_external partition(d=’20140513’, g=’all’);
#####Hadoop操作
配置Single Node方法 http://www.alexjf.net/blog/distributed-systems/hadoop-yarn-installation-definitive-guide
http://codesfusion.blogspot.jp/2013/10/hadoop-wordcount-with-new-map-reduce-api.html
Start HDFS daemons
$HADOOP_PREFIX/bin/hdfs namenode -format
Start the namenode daemon
$HADOOP_PREFIX/sbin/hadoop-daemon.sh start namenode
Start the datanode daemon
$HADOOP_PREFIX/sbin/hadoop-daemon.sh start datanode
Start YARN daemons
Start the resourcemanager daemon
$HADOOP_PREFIX/sbin/yarn-daemon.sh start resourcemanager
Start the nodemanager daemon
$HADOOP_PREFIX/sbin/yarn-daemon.sh start nodemanager
http://wenku.baidu.com/view/d282172055270722192ef7ba.html
#####Git操作
http://www.infoq.com/cn/news/2011/03/git-adventures-branch-merge
Hive性能调优
1、数据存TextFile,查询性能比较慢,使用ORCFile,速度快
2、加载数据,先加载TextFile到table1,再重table1加载到table2, table2采用ORCFile格式存储
设置reduce job个数, set mapreduce.job.reduces=6;
MapReduce
1、出现Type错误,大部分错误原因是 job.setOutputValueClass(LongWritable.class); 设置不正确。这里的OutputValueClass不是Reduce的Output,而是Map的Output。
CREATE EXTERNAL TABLE IF NOT EXISTS history_role
(
day
string,
grp
int,
mac
string,
st
string,
et
string,
dur
int,
role
int,
fac
string)
PARTITIONED BY (
d
string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ‘,’
STORED AS TEXTFILE
LOCATION ‘hdfs://vm01:9000/hive/warehouse/history_role’;
CREATE EXTERNAL TABLE IF NOT EXISTS history_role
(
day
string,
grp
int,
mac
string,
st
string,
et
string,
dur
int,
role
int,
fac
string)
PARTITIONED BY (
d
string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ‘,’
STORED AS TEXTFILE
LOCATION ‘hdfs://localhost/hive/warehouse/history_role’;
CREATE EXTERNAL TABLE IF NOT EXISTS history_role
(
day
string,
grp
int,
mac
string,
st
string,
et
string,
dur
int,
role
int,
fac
string)
PARTITIONED BY (
d
string)
CLUSTERED BY (day)
SORTED BY (grp)
INTO 8 BUCKETS
ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,’
STORED AS orc tblproperties (“orc.compress”=”NONE”, “orc.row.index.stride”=”10000”, “orc.stripe.size”=”10240000”)
设置分区
set hive.exec.dynamic.partition.mode=nonstrict;
设置reduce个数
set mapreduce.job.reduces=8;
set set hive.enforce.bucketing=true;
set set hive.enforce.sorting=true;
######从history_role1中加载数据到history_role2
from history_role_text
insert overwrite table history_role
partition(d)
select day ,grp, mac, dur, role, day, d as d where day = ‘20140515’;
######load data from local path
load data local inpath ‘/root/part-00000’ overwrite into table history_role1 partition(d=’20140515’);
hdfs dfs -ls hdfs://vm01:9000/hive/warehouse/history_role
load data local inpath ‘/usr/java/data/role/2014-06-13/part-00000’ overwrite into table history_role partition(d=’2014-06-13’);
LOAD DATA INFILE ‘/root/role.txt’ INTO TABLE historyrole
FIELDS TERMINATED BY ‘,’
OPTIONALLY ENCLOSED BY ‘“‘
ESCAPED BY ‘’
LINES TERMINATED BY ‘\n’
(day, grp, mac, st, et, dur, role, fac)