# AI and Predictive Analytics in Datacenter Environments
# http://dcai.bsc.es

## Installing Spark (Standalone Mode)
# - SPARK 2.4 (for hadoop2.7)

# Download Spark

 wget https://bit.ly/2Gooizm -O spark-2.4.4-bin-hadoop2.7.tgz
 tar xvzf spark-2.4.4-bin-hadoop2.7.tgz
 ln -s spark-2.4.4-bin-hadoop2.7 spark

# Then set-up the environment
# -> Edit ~/.bashrc and add the lines

export SPARK_HOME=/home/vagrant/spark
export PATH=$PATH:$SPARK_HOME/bin

# Finally, reload the environment
source ~/.bashrc

## Starting Spark

# e.g. (standalone = local, with some configuration for master and worker):
spark-shell --master local[*] --driver-memory 2G --executor-memory 768M executor-cores 2

# Setting up a cluster:
# - start a node as Master
# - start N nodes as Workers
#
# e.g.
master   > spark-class "org.apache.spark.deploy.master.Master" --host ${HOSTNAME_MASTER} ${SPARK_MASTER_ARGS}
worker(s)> spark-class 'org.apache.spark.deploy.worker.Worker' -d ${SPARK_TEMP} spark://${HOSTNAME_MASTER}:7077 ${SPARK_WORKER_ARGS}