爱宝儿
游戏开发及物联网大数据集群维护及部署开发,数据存储及相关设计,大数据、ETL、Flink、kafka,hadoop框架等多种技术开>发技术。涵盖语言涉及Java、Go、Python、C、C#等。
仅供学习参考

HDFS 常用 shell 命令
1. 显示当前目录结构
1
2
3
4
5
6
|
# 显示当前目录结构
hadoop fs -ls <path>
# 递归显示当前目录结构
hadoop fs -ls -R <path>
# 显示根目录下内容
hadoop fs -ls /
|
2. 创建目录

core-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
<configuration>
<!-- 把两个NameNode的地址组装成一个集群opencluster -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://opencluster</value>
</property>
<!-- 指定hadoop运行时产生文件的存储目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/bigdata/zzwl/data/ha/tmp</value>
</property>
<!-- 指定ZKFC故障自动切换转移 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>bigdata01:2181,bigdata02:2181,bigdata03:2181</value>
</property>
<!--修改core-site.xml中的ipc参数,防止出现连接journalnode服务ConnectException-->
<property>
<name>ipc.client.connect.max.retries</name>
<value>100</value>
<description>Indicates the number of retries a client will make to establish a server connection.</description>
</property>
</configuration>
|
hadoop-env.sh
1
|
export JAVA_HOME=/usr/local/java
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
|
<configuration>
<!-- 设置dfs副本数,默认3个 -->
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<!-- 完全分布式集群名称 -->
<property>
<name>dfs.nameservices</name>
<value>opencluster</value>
</property>
<!-- 集群中NameNode节点都有哪些 -->
<property>
<name>dfs.ha.namenodes.opencluster</name>
<value>nn1,nn2</value>
</property>
<!-- nn1的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.opencluster.nn1</name>
<value>bigdata01:8020</value>
</property>
<!-- nn2的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.opencluster.nn2</name>
<value>bigdata02:8020</value>
</property>
<!-- nn1的http通信地址 -->
<property>
<name>dfs.namenode.http-address.opencluster.nn1</name>
<value>bigdata01:50070</value>
</property>
<!-- nn2的http通信地址 -->
<property>
<name>dfs.namenode.http-address.opencluster.nn2</name>
<value>bigdata02:50070</value>
</property>
<!-- 指定NameNode元数据在JournalNode上的存放位置 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://bigdata01:8485;bigdata02:8485;bigdata03:8485/opencluster</value>
</property>
<!-- 配置隔离机制,即同一时刻只能有一台服务器对外响应 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>shell(/bin/true)</value>
</property>
<!-- 使用隔离机制时需要ssh无秘钥登录-->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<!-- 声明journalnode服务器存储目录-->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/bigdata/zzwl/data/ha/jn</value>
</property>
<!-- 关闭权限检查-->
<property>
<name>dfs.permissions.enable</name>
<value>false</value>
</property>
<!-- 访问代理类:client,opencluster,active配置失败自动切换实现方式-->
<property>
<name>dfs.client.failover.proxy.provider.opencluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- 配置自动故障转移-->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/bigdata/zzwl/data/storage/hdfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/bigdata/zzwl/data/storage/hdfs/data</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>512000000</value> <!--大文件系统HDFS块大小为512M,默认为64MB-->
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value>100</value>
</property>
|
slaves
1
2
3
|
bigdata01
bigdata02
bigdata03
|
yarn-env.sh
1
2
|
JAVA=$JAVA_HOME/bin/java
JAVA_HEAP_MAX=-Xmx1000m
|

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
#!/bin/bash
#author yundd by
#this script is only for CentOS 7.x
#check the OS
platform=`uname -i`
if [ $platform != "x86_64" ];then
echo "this script is only for 64bit Operating System !"
exit 1
fi
echo "the platform is ok"
cat << EOF
your system is CentOS 7 x86_64
EOF
#添加公网DNS地址
cat >> /etc/resolv.conf << EOF
nameserver 8.8.8.8
nameserver 8.8.4.4
EOF
#Yum源更换为国内阿里源
#yum install wget telnet -y
#mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.backup
#wget -O /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo
#添加阿里的epel源
#add the epel
#wget -O /etc/yum.repos.d/epel.repo http://mirrors.aliyun.com/repo/epel-7.repo
# rpm -ivh http://dl.Fedoraproject.org/pub/epel/7/x86_64/e/epel-release-7-8.noarch.rpm
#yum重新建立缓存
#yum clean all
#yum makecache
#清理缓存
#echo "* 4 * * * /bin/sync;/bin/sync;/bin/sync>/proc/sys/vm/drop_caches > /dev/null 2>&1" >> /var/spool/cron/root
#echo "* 4 * * * /bin/echo 3>/proc/sys/vm/drop_caches > /dev/null 2>&1" >> /var/spool/cron/root
#systemctl restart crond.service
#安装vim
yum -y install vim
ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
#修改字符集
cat > /etc/locale.conf <<EOF
LANG="zh_CN.UTF-8"
EOF
source /etc/locale.conf
#文件描述符修改
#cat >>/etc/rc.local<<EOF
# open files
ulimit -HSn 65535
# stack size
ulimit -s 65535
EOF
#设置最大打开文件描述符数
echo "ulimit -SHn 102400" >> /etc/rc.local
cat >> /etc/security/limits.conf << EOF
* soft nofile 65535
* hard nofile 65535
EOF
source /etc/profile
#禁用selinux
sed -i 's/SELINUX=enforcing/SELINUX=disabled/' /etc/selinux/config
setenforce 0
#关闭防火墙
systemctl disable firewalld.service
systemctl stop firewalld.service
#set ssh
sed -i 's/^GSSAPIAuthentication yes$/GSSAPIAuthentication no/' /etc/ssh/sshd_config
sed -i 's/#UseDNS yes/UseDNS no/' /etc/ssh/sshd_config
systemctl restart sshd.service
#内核参数优化
cat >> /etc/sysctl.conf << EOF
vm.overcommit_memory = 1
net.ipv4.ip_local_port_range = 1024 65536
net.ipv4.tcp_mem = 94500000 915000000 927000000
net.ipv4.tcp_syncookies=1
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_tw_recycle = 1
net.ipv4.tcp_fin_timeout=120
net.ipv4.tcp_max_tw_buckets = 6000
net.ipv4.tcp_timestamps = 0
net.ipv4.tcp_synack_retries = 1
net.ipv4.tcp_syn_retries = 1
net.ipv4.tcp_abort_on_overflow = 0
net.core.rmem_max = 16777216
net.core.wmem_max = 16777216
net.core.netdev_max_backlog = 262144
net.core.somaxconn = 262144
net.ipv4.tcp_max_orphans = 3276800
net.ipv4.tcp_max_syn_backlog = 262144
net.core.wmem_default = 8388608
net.core.rmem_default = 8388608
net.ipv4.netfilter.ip_conntrack_max = 2097152
net.nf_conntrack_max = 655360
net.netfilter.nf_conntrack_tcp_timeout_established = 1200
EOF
/sbin/sysctl -p
# update soft
# yum -y update
|