HBase Book

#####HDFS指令
查看HDFS目录
bin/hdfs dfs -ls hdfs://vm01:9000

#####HBase操作指令

启动HBase
bin/start-hbase.sh

HBase lib下面的Hadoop相关的jar不是2.3.0，需要替换成2.3.0的jar包
rm lib/hadoop*.jar

MAC OS X下
find $HADOOP_HOME/share/hadoop/ -name “hadoop*.jar” |grep -v “test” |grep -v “sources.jar” | xargs -I{} cp {} $HBASE_HOME/lib

Linux下

find $HADOOP_HOME/share/hadoop/ -name “hadoop*.jar” |grep -v “test” |grep -v “sources.jar” | xargs -i cp {} $HBASE_HOME/lib

create ‘table1’, ‘cf1’

#####Hive操作

$ tar -xzvf hive-x.y.z.tar.gz
$ cd hive-x.y.z
$ export HIVE_HOME=
$ export PATH=$HIVE_HOME/bin:$PATH

CREATE EXTERNAL TABLE IF NOT EXISTS history_role_ext(
day string,
grp string,
mac string,
dur double,
role string)
PARTITIONED BY (
d string,
g string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ‘,’
STORED AS INPUTFORMAT
‘org.apache.hadoop.mapred.TextInputFormat’
OUTPUTFORMAT
‘org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat’
LOCATION
‘hdfs://vm01:9000/hive/warehouse/history_role_ext’

load data local inpath ‘/root/input.txt’ overwrite into table history_role_ext partition(d=’20140513’, g=’all’);

CREATE EXTERNAL TABLE IF NOT EXISTS history_role_external(
day string,
grp string,
mac string,
dur double,
role string)
PARTITIONED BY (
d string,
g string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ‘,’
STORED AS TEXTFILE
LOCATION
‘hdfs://vm01:9000/hive/warehouse/history_role_external’

load data local inpath ‘/root/input.txt’ overwrite into table history_role_external partition(d=’20140513’, g=’all’);

#####Hadoop操作
配置Single Node方法 http://www.alexjf.net/blog/distributed-systems/hadoop-yarn-installation-definitive-guide

http://codesfusion.blogspot.jp/2013/10/hadoop-wordcount-with-new-map-reduce-api.html

Start HDFS daemons

$HADOOP_PREFIX/bin/hdfs namenode -format

Start the namenode daemon

$HADOOP_PREFIX/sbin/hadoop-daemon.sh start namenode

Start the datanode daemon

$HADOOP_PREFIX/sbin/hadoop-daemon.sh start datanode

Start YARN daemons

Start the resourcemanager daemon

$HADOOP_PREFIX/sbin/yarn-daemon.sh start resourcemanager

Start the nodemanager daemon

$HADOOP_PREFIX/sbin/yarn-daemon.sh start nodemanager

http://wenku.baidu.com/view/d282172055270722192ef7ba.html

#####Git操作

http://www.infoq.com/cn/news/2011/03/git-adventures-branch-merge

Hive性能调优

1、数据存TextFile，查询性能比较慢，使用ORCFile，速度快
2、加载数据，先加载TextFile到table1,再重table1加载到table2, table2采用ORCFile格式存储
设置reduce job个数, set mapreduce.job.reduces=6;

MapReduce
1、出现Type错误，大部分错误原因是 job.setOutputValueClass(LongWritable.class); 设置不正确。这里的OutputValueClass不是Reduce的Output，而是Map的Output。

CREATE EXTERNAL TABLE IF NOT EXISTS history_role(
day string,
grp int,
mac string,
st string,
et string,
dur int,
role int,
fac string)
PARTITIONED BY (
d string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ‘,’
STORED AS TEXTFILE
LOCATION ‘hdfs://vm01:9000/hive/warehouse/history_role’;

CREATE EXTERNAL TABLE IF NOT EXISTS history_role(
day string,
grp int,
mac string,
st string,
et string,
dur int,
role int,
fac string)
PARTITIONED BY (
d string)
CLUSTERED BY (day)
SORTED BY (grp)
INTO 8 BUCKETS
ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,’
STORED AS orc tblproperties (“orc.compress”=”NONE”, “orc.row.index.stride”=”10000”, “orc.stripe.size”=”10240000”)

设置分区
set hive.exec.dynamic.partition.mode=nonstrict;

设置reduce个数
set mapreduce.job.reduces=8;

set set hive.enforce.bucketing=true;
set set hive.enforce.sorting=true;

######从history_role1中加载数据到history_role2

from history_role_text
insert overwrite table history_role
partition(d)
select day ,grp, mac, dur, role, day, d as d where day = ‘20140515’;

######load data from local path

load data local inpath ‘/root/part-00000’ overwrite into table history_role1 partition(d=’20140515’);

hdfs dfs -ls hdfs://vm01:9000/hive/warehouse/history_role

load data local inpath ‘/usr/java/data/role/2014-06-13/part-00000’ overwrite into table history_role partition(d=’2014-06-13’);

LOAD DATA INFILE ‘/root/role.txt’ INTO TABLE historyrole
FIELDS TERMINATED BY ‘,’
OPTIONALLY ENCLOSED BY ‘“‘
ESCAPED BY ‘’
LINES TERMINATED BY ‘\n’
(day, grp, mac, st, et, dur, role, fac)