一次流式处理的submit

时间:2022-03-27 03:03:03

一次流式处理的submit

考虑很多:

压背、限流、JVM优化,出错的重试等

#!/bin/bash
num_executors=1
executor_memory=1g
driver_memory=1g executor_cores=1
realtime_queue=root
# backpressure
receiver_max_rate=100
receiver_initial_rate=30
my_job_name="streamingSYN"
main_class="com.df.QZ.HeartOrderChart" spark-submit --master yarn --deploy-mode cluster \
--name ${my_job_name} \
--class ${main_class} \
--driver-memory ${driver_memory} \
--num-executors ${num_executors} --executor-cores ${executor_cores} --executor-memory ${executor_memory} \
--queue ${realtime_queue} \
--conf spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j-yarn.properties \
--conf spark.executor.extraJavaOptions=-Dlog4j.configuration=log4j-yarn.properties \
--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
--conf spark.locality.wait=10 \
--conf spark.task.maxFailures=8 \
--conf spark.ui.killEnabled=false \
--conf spark.logConf=true \
--conf spark.streaming.blockInterval=200 \
--conf spark.streaming.receiver.writeAheadLog.enable=true \
--conf spark.streaming.backpressure.enabled=true \
--conf spark.streaming.backpressure.pid.minRate=10 \
--conf spark.streaming.receiver.maxRate=${receiver_max_rate} \
--conf spark.streaming.kafka.maxRatePerPartition=${receiver_max_rate} \
--conf spark.streaming.backpressure.initialRate=${receiver_initial_rate} \
--conf spark.yarn.driver.memoryOverhead=512 \
--conf spark.yarn.executor.memoryOverhead=1024 \
--conf spark.yarn.maxAppAttempts=4 \
--conf spark.yarn.am.attemptFailuresValidityInterval=1h \
--conf spark.yarn.max.executor.failures=$((8 * ${num_executors})) \
--conf spark.yarn.executor.failuresValidityInterval=1h \
--driver-java-options "-XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:ParallelCMSThreads=4 -XX:+CMSParallelRemarkEnabled -XX:+UseCMSCompactAtFullCollection -XX:CMSInitiatingOccupancyFraction=70 -XX:CMSFullGCsBeforeCompaction=2 -XX:-UseCompressedOops -XX:+PrintHeapAtGC" \
hdfs://df1:9000/Thermodynamic-1.0-SNAPSHOT.jar \
1 df1:9092,df2:9092,df3:9092 driverinfo cm1 df1:2181,df2:2181,df3:2181