# AI and Predictive Analytics in Datacenter Environments # http://dcai.bsc.es ## Installing Spark (Standalone Mode) # - SPARK 2.4 (for hadoop2.7) # Download Spark wget https://bit.ly/2Gooizm -O spark-2.4.4-bin-hadoop2.7.tgz tar xvzf spark-2.4.4-bin-hadoop2.7.tgz ln -s spark-2.4.4-bin-hadoop2.7 spark # Then set-up the environment # -> Edit ~/.bashrc and add the lines export SPARK_HOME=/home/vagrant/spark export PATH=$PATH:$SPARK_HOME/bin # Finally, reload the environment source ~/.bashrc ## Starting Spark # e.g. (standalone = local, with some configuration for master and worker): spark-shell --master local[*] --driver-memory 2G --executor-memory 768M executor-cores 2 # Setting up a cluster: # - start a node as Master # - start N nodes as Workers # # e.g. master > spark-class "org.apache.spark.deploy.master.Master" --host ${HOSTNAME_MASTER} ${SPARK_MASTER_ARGS} worker(s)> spark-class 'org.apache.spark.deploy.worker.Worker' -d ${SPARK_TEMP} spark://${HOSTNAME_MASTER}:7077 ${SPARK_WORKER_ARGS}