# spark安装
# 本内容你将获得
- centos7 上安装 spark
- 验证spark安装
# 软件安装
# 说明
- spark依赖 java环境
- hive on spark引擎依赖hadoop环境
# 安装开始
# 上传spark-2.4.8-bin-without-hadoop-scala-2.12.tgz到服务器/root/tools目录
解压spark安装包
cd /root/tools
tar -xvzf spark-2.4.8-bin-without-hadoop-scala-2.12.tgz
mv spark-2.4.8-bin-without-hadoop-scala-2.12 spark-2.4.8-pure
# 配置spark-defaults.conf,在文件最后增加配置
hadoop fs -mkdir /spark-logs #在hdfs中创建/spark-logs目录
cd /root/tools/spark-2.4.8-pure/conf
vi spark-defaults.conf
spark.eventLog.enabled true
spark.eventLog.dir hdfs://hadoopmaster:9000/spark-logs
spark.history.fs.logDirectory hdfs://hadoopmaster:9000/spark-logs
spark.serializer org.apache.spark.serializer.KryoSerializer
spark.driver.memory 5g
spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
spark.history.ui.port 18080
spark.history.fs.update.interval 10s
# 配置spark-env.sh,在文件最后增加配置
cd /root/tools/spark-2.4.8-pure/conf
vi spark-env.sh
export SPARK_HOME=/root/tools/spark-2.4.8-pure
export SCALA_HOME=/root/tools/scala-2.12.15
export JAVA_HOME=/root/tools/jdk1.8.0_333
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$SCALA_HOME/bin
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop
export SPARK_LOCAL_DIRS=/root/tools/spark-2.4.8-pure
export SPARK_LIBARY_PATH=.:$JAVA_HOME/lib:$JAVA_HOME/jre/lib:$HADOOP_HOME/lib/native
#export SPAR_MASTER_PORT=7077
export SPARK_MASTER_HOST=hadoopmaster
export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=18080 -Dspark.history.retainedApplications=3 -Dspark.history.fs.logDirectory=hdfs://hadoopmaster:9000/spark-logs"
# HADOOP_HOME 会从hive-env.sh 带过来,这里仅为Spark 单独运行服务
export HADOOP_HOME=/root/tools/hadoop-3.3.4
# 无论Spark 纯净版还是Hive on Spark 没有SPARK_DIST_CLASSPATH 都不能运行
export SPARK_DIST_CLASSPATH=$(${HADOOP_HOME}/bin/hdfs classpath)
# YARN_CONF_DIR 仅用于Spark 和YARN 对接,跟Hive on Spark 无关
export YARN_CONF_DIR=${HADOOP_HOME}/etc/hadoop
"spark-env.sh" 87L, 5297C written
# 配置spark环境变量
vi /etc/profile
#spark
export SPARK_HOME=/root/tools/spark-2.4.8-pure
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export LD_LIBRARY_PATH=$HADOOP_HOME/lib/native:$LD_LIBRARY_PATH
export PATH=$SPARK_HOME/bin:$PATH
保存后,source /etc/profile 使环境配置生效
# 上传jars到hdfs目录
cd /root/tools/spark-2.4.8-pure/jars
mv orc-core-1.5.5-nohive.jar orc-core-1.5.5-nohive.jar.bak
hadoop fs -mkdir /spark2-jars
hadoop dfs -put *.jar /spark2-jars
# 测试 spark
cd /root/tools/spark-2.4.8-pure/bin
./run-example SparkPi 10
在打印的日志中找到结果值: Pi is roughly 3.1434191434191433,证明spark部署成功
# 启停
cd /root/tools/spark-2.4.8-pure/sbin
./start-all.sh --启动
./stop-all.sh --停止
./start-history-server.sh --启动历史服务
./stop-history-server.sh --停止历史服务
# 其他
- 无