mirror of https://github.com/infosecn1nja/HELK.git
05312018
Logstash - Added Local Pipeline to the build to allow custom local configurations - updated sysmon config to fix https://github.com/Cyb3rWard0g/HELK/issues/63 - removed port exposed in local logstash Dockerfile. It will be pushed to official docker image in the next update - removed logstash init file (not being used anymore) Zeppelin - not available yet - initial draft dockerfile - created spark-defaults file for future zeppelin dockerfile Install Script - incrased minimum memory size requiredkeyword-vs-text-changes
parent
bb321d985a
commit
f3a0e251ea
|
@ -20,6 +20,8 @@ services:
|
|||
helk-logstash:
|
||||
image: cyb3rward0g/helk-logstash:6.2.4
|
||||
container_name: helk-logstash
|
||||
volumes:
|
||||
- ./helk-logstash/pipeline:/usr/share/logstash/pipeline
|
||||
environment:
|
||||
- "LS_JAVA_OPTS=-Xms2g -Xmx2g"
|
||||
restart: always
|
||||
|
|
|
@ -15,6 +15,4 @@ LABEL description="Dockerfile base for the HELK Logstash."
|
|||
# *********** Adding HELK scripts, config files and pipeline configs to ontainer ***************
|
||||
ADD logstash.yml /usr/share/logstash/config/logstash.yml
|
||||
ADD pipeline /usr/share/logstash/pipeline
|
||||
ADD output_templates /usr/share/logstash/output_templates
|
||||
|
||||
EXPOSE 5044
|
||||
ADD output_templates /usr/share/logstash/output_templates
|
|
@ -1,202 +0,0 @@
|
|||
#!/bin/sh
|
||||
# Init script for logstash
|
||||
# Maintained by Roberto Rodriguez @Cyb3rWard0g
|
||||
# Reference:
|
||||
# https://github.com/elastic/logstash/blob/master/distribution/rpm/src/main/packaging/init.d/logstash
|
||||
# https://github.com/spujadas/elk-docker/blob/master/logstash-init
|
||||
|
||||
### BEGIN INIT INFO
|
||||
# Provides: logstash
|
||||
# Required-Start: $remote_fs $syslog
|
||||
# Required-Stop: $remote_fs $syslog
|
||||
# Default-Start: 2 3 4 5
|
||||
# Default-Stop: 0 1 6
|
||||
# Short-Description:
|
||||
# Description: Starts Logstash as a daemon.
|
||||
### END INIT INFO
|
||||
|
||||
PATH=/sbin:/usr/sbin:/bin:/usr/bin
|
||||
NAME=logstash
|
||||
DEFAULT=/etc/default/$NAME
|
||||
export PATH
|
||||
|
||||
if [ $(id -u) -ne 0 ]; then
|
||||
echo "You need root privileges to run this script"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
. /lib/lsb/init-functions
|
||||
|
||||
if [ -r /etc/default/rcS ]; then
|
||||
. /etc/default/rcS
|
||||
fi
|
||||
|
||||
# The following variables can be overwritten in $DEFAULT
|
||||
|
||||
JAVACMD=/usr/bin/java
|
||||
LS_HOME=/usr/share/logstash
|
||||
LS_SETTINGS_DIR=/etc/logstash
|
||||
LS_CONF_PATH=/etc/logstash/pipeline
|
||||
LS_LOGS_PATH=/var/log/logstash
|
||||
LS_LOGS_FILE=${LS_LOGS_PATH}/${NAME}-plain.log
|
||||
#LS_JAVA_OPTS=""
|
||||
LS_PIDFILE=/var/run/logstash.pid
|
||||
LS_USER=logstash
|
||||
LS_GROUP=logstash
|
||||
LS_GC_LOG_FILE=/var/log/logstash/gc.log
|
||||
LS_OPEN_FILES=16384
|
||||
LS_NICE=19
|
||||
SERVICE_NAME="logstash"
|
||||
SERVICE_DESCRIPTION="logstash"
|
||||
|
||||
# End of variables that can be overwritten in $DEFAULT
|
||||
|
||||
# overwrite settings from default file
|
||||
if [ -f "$DEFAULT" ]; then
|
||||
. "$DEFAULT"
|
||||
fi
|
||||
|
||||
# Define other required variables
|
||||
LS_EXEC=$LS_HOME/bin/logstash
|
||||
#LS_EXEC_OPTS="--path.settings ${LS_SETTINGS_DIR} --path.config ${LS_CONF_PATH} --path.logs ${LS_LOGS_PATH}"
|
||||
LS_EXEC_OPTS="--path.settings ${LS_SETTINGS_DIR}"
|
||||
|
||||
export LS_JAVA_OPTS
|
||||
export LS_HOME
|
||||
export LS_SETTINGS_DIR
|
||||
|
||||
if [ ! -x "$LS_EXEC" ]; then
|
||||
echo "The logstash startup script does not exists or it is not executable, tried: $LS_EXEC"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
touch ${LS_LOGS_FILE}
|
||||
chown ${LS_USER}:${LS_GROUP} ${LS_LOGS_FILE}
|
||||
|
||||
checkJava() {
|
||||
if [ -x "$JAVACMD" ]; then
|
||||
JAVA="$JAVACMD"
|
||||
else
|
||||
JAVA=`which java`
|
||||
fi
|
||||
|
||||
if [ ! -x "$JAVA" ]; then
|
||||
echo "Could not find any executable java binary. Please install java in your PATH or set JAVACMD"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
start() {
|
||||
checkJava
|
||||
|
||||
echo "Starting $NAME"
|
||||
|
||||
if [ -n "$LS_PIDFILE" ] && [ ! -e "$LS_PIDFILE" ]; then
|
||||
touch "$LS_PIDFILE" && chown logstash:logstash "$LS_PIDFILE"
|
||||
fi
|
||||
|
||||
if [ -n "$LS_OPEN_FILES" ]; then
|
||||
ulimit -n $LS_OPEN_FILES
|
||||
fi
|
||||
|
||||
# Start Service
|
||||
nice -n$LS_NICE chroot --userspec $LS_USER:$LS_GROUP / sh -c "
|
||||
cd $LS_HOME
|
||||
ulimit -n ${LS_OPEN_FILES}
|
||||
exec $LS_EXEC $LS_EXEC_OPTS
|
||||
" &
|
||||
|
||||
# Generate the pidfile from here. If we instead made the forked process
|
||||
# generate it there will be a race condition between the pidfile writing
|
||||
# and a process possibly asking for status.
|
||||
echo $! > $LS_PIDFILE
|
||||
|
||||
echo "$NAME started."
|
||||
return 0
|
||||
}
|
||||
|
||||
stop() {
|
||||
# Try a few times to kill TERM the program
|
||||
if status; then
|
||||
pid=$(cat "$LS_PIDFILE")
|
||||
echo "Killing $NAME (pid $pid) with SIGTERM"
|
||||
kill -TERM $pid
|
||||
# Wait for it to exit.
|
||||
for i in 1 2 3 4 5; do
|
||||
echo "Waiting for $NAME (pid $pid) to die..."
|
||||
status || break
|
||||
sleep 1
|
||||
done
|
||||
if status; then
|
||||
echo "$NAME stop failed; still running."
|
||||
else
|
||||
echo "$NAME stopped."
|
||||
rm -f $LS_PIDFILE
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
status() {
|
||||
if [ -f "$LS_PIDFILE" ] ; then
|
||||
pid=$(cat "$LS_PIDFILE")
|
||||
if kill -0 $pid > /dev/null 2> /dev/null; then
|
||||
# process by this pid is running.
|
||||
# It may not be our pid, but that's what you get with just pidfiles.
|
||||
# TODO(sissel): Check if this process seems to be the same as the one we
|
||||
# expect. It'd be nice to use flock here, but flock uses fork, not exec,
|
||||
# so it makes it quite awkward to use in this case.
|
||||
return 0
|
||||
else
|
||||
return 2 # program is dead but pid file exists
|
||||
fi
|
||||
else
|
||||
return 3 # program is not running
|
||||
fi
|
||||
}
|
||||
|
||||
force_stop() {
|
||||
if status; then
|
||||
stop
|
||||
status && kill -KILL $(cat "$LS_PIDFILE")
|
||||
rm -f $LS_PIDFILE
|
||||
fi
|
||||
}
|
||||
|
||||
case "$1" in
|
||||
start)
|
||||
status
|
||||
code=$?
|
||||
if [ $code -eq 0 ]; then
|
||||
echo "$NAME is already running"
|
||||
else
|
||||
start
|
||||
code=$?
|
||||
fi
|
||||
exit $code
|
||||
;;
|
||||
|
||||
stop) stop ;;
|
||||
|
||||
force-stop) force_stop ;;
|
||||
|
||||
status)
|
||||
status
|
||||
code=$?
|
||||
if [ $code -eq 0 ]; then
|
||||
echo "$NAME is running"
|
||||
else
|
||||
echo "$NAME is not running"
|
||||
fi
|
||||
exit $code
|
||||
;;
|
||||
|
||||
restart) stop && start ;;
|
||||
|
||||
*)
|
||||
echo "Usage: $SCRIPTNAME {start|stop|force-stop|status|restart}" >&2
|
||||
exit 3
|
||||
;;
|
||||
esac
|
||||
|
||||
exit $?
|
||||
|
|
@ -298,7 +298,8 @@ filter {
|
|||
remove_field => "[event_data][UtcTime]"
|
||||
remove_field => "[event_data][CreationUtcTime]"
|
||||
remove_field => "[event_data][PreviousCreationUtcTime]"
|
||||
rename => { "[event_data][User]" => "user"}
|
||||
remove_field => "[user]"
|
||||
rename => { "[event_data][User]" => "user_account"}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,86 +10,80 @@ LABEL description="Dockerfile base for the HELK Zeppelin."
|
|||
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
USER root
|
||||
# *********** Installing Prerequisites ***************
|
||||
# `Z_VERSION` will be updated by `dev/change_zeppelin_version.sh`
|
||||
ENV Z_VERSION="0.7.3"
|
||||
ENV Z_VERSION="0.8."
|
||||
ENV LOG_TAG="[ZEPPELIN_${Z_VERSION}]:" \
|
||||
Z_HOME="/zeppelin" \
|
||||
LANG=en_US.UTF-8 \
|
||||
LC_ALL=en_US.UTF-8
|
||||
Z_HOME="/zeppelin"
|
||||
|
||||
RUN echo "$LOG_TAG update and install basic packages" && \
|
||||
apt-get -y update && \
|
||||
apt-get install -y locales && \
|
||||
locale-gen $LANG && \
|
||||
apt-get install -y software-properties-common && \
|
||||
apt -y autoclean && \
|
||||
apt -y dist-upgrade && \
|
||||
apt-get install -y build-essential
|
||||
ENV Z_GID=710
|
||||
ENV Z_UID=710
|
||||
ENV Z_USER=zelk
|
||||
|
||||
ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
|
||||
RUN echo "$LOG_TAG Install java8" && \
|
||||
apt-get -y update && \
|
||||
apt-get install -y openjdk-8-jdk && \
|
||||
apt-get install -y git openjdk-8-jdk \
|
||||
libfontconfig git build-essential chrpath \
|
||||
libssl-dev libxft-dev libfreetype6 libfreetype6-dev \
|
||||
libfontconfig1 libfontconfig1-dev python3-pip && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# should install conda first before numpy, matploylib since pip and python will be installed by conda
|
||||
RUN echo "$LOG_TAG Install miniconda2 related packages" && \
|
||||
apt-get -y update && \
|
||||
apt-get install -y bzip2 ca-certificates \
|
||||
libglib2.0-0 libxext6 libsm6 libxrender1 \
|
||||
git mercurial subversion && \
|
||||
echo 'export PATH=/opt/conda/bin:$PATH' > /etc/profile.d/conda.sh && \
|
||||
wget --quiet https://repo.continuum.io/miniconda/Miniconda2-4.3.11-Linux-x86_64.sh -O ~/miniconda.sh && \
|
||||
/bin/bash ~/miniconda.sh -b -p /opt/conda && \
|
||||
rm ~/miniconda.sh
|
||||
ENV PATH /opt/conda/bin:$PATH
|
||||
# *********** Upgrading PIP ***************
|
||||
RUN pip3 install --upgrade pip
|
||||
|
||||
# *********** Create User *****************
|
||||
RUN groupadd -r zelk -g ${Z_GID} \
|
||||
&& useradd -m -s /bin/bash -N -u $Z_UID $Z_USER \
|
||||
&& chmod g+w /etc/passwd /etc/group \
|
||||
&& chown -R zelk:zelk /usr/local/ /tmp /usr/bin ${SPARK_HOME}
|
||||
|
||||
RUN echo "$LOG_TAG Install python related packages" && \
|
||||
apt-get -y update && \
|
||||
apt-get install -y python-dev python-pip && \
|
||||
apt-get install -y gfortran && \
|
||||
# numerical/algebra packages
|
||||
# numerical/algebra packages
|
||||
apt-get install -y libblas-dev libatlas-dev liblapack-dev && \
|
||||
# font, image for matplotlib
|
||||
apt-get install -y libpng-dev libfreetype6-dev libxft-dev && \
|
||||
# for tkinter
|
||||
apt-get install -y python-tk libxml2-dev libxslt-dev zlib1g-dev && \
|
||||
pip install numpy && \
|
||||
pip install matplotlib
|
||||
pip3 install numpy && \
|
||||
pip3 install matplotlib
|
||||
|
||||
RUN echo "$LOG_TAG Install R related packages" && \
|
||||
echo "deb http://cran.rstudio.com/bin/linux/ubuntu xenial/" | tee -a /etc/apt/sources.list && \
|
||||
gpg --keyserver keyserver.ubuntu.com --recv-key E084DAB9 && \
|
||||
gpg -a --export E084DAB9 | apt-key add - && \
|
||||
apt-get -y update && \
|
||||
apt-get -y install r-base r-base-dev && \
|
||||
R -e "install.packages('knitr', repos='http://cran.us.r-project.org')" && \
|
||||
R -e "install.packages('ggplot2', repos='http://cran.us.r-project.org')" && \
|
||||
R -e "install.packages('googleVis', repos='http://cran.us.r-project.org')" && \
|
||||
R -e "install.packages('data.table', repos='http://cran.us.r-project.org')" && \
|
||||
# for devtools, Rcpp
|
||||
apt-get -y install libcurl4-gnutls-dev libssl-dev && \
|
||||
R -e "install.packages('devtools', repos='http://cran.us.r-project.org')" && \
|
||||
R -e "install.packages('Rcpp', repos='http://cran.us.r-project.org')" && \
|
||||
Rscript -e "library('devtools'); library('Rcpp'); install_github('ramnathv/rCharts')"
|
||||
# ************** Install PhantpmJS ****************
|
||||
USER $Z_UID
|
||||
|
||||
# ************** Install Maven *********************
|
||||
ENV MAVEN_VERSION 3.5.3
|
||||
RUN wget wget -qO- http://www.us.apache.org/dist/maven/maven-3/${MAVEN_VERSION}/binaries/apache-maven-${MAVEN_VERSION}-bin.tar.gz | sudo tar xvz -C /usr/local && \
|
||||
RUN wget -qO- http://www.us.apache.org/dist/maven/maven-3/${MAVEN_VERSION}/binaries/apache-maven-${MAVEN_VERSION}-bin.tar.gz | tar xvz -C /usr/local && \
|
||||
ln -s /usr/local/apache-maven-${MAVEN_VERSION}/bin/mvn /usr/bin/mvn
|
||||
|
||||
RUN echo "$LOG_TAG Download Zeppelin binary" && \
|
||||
wget -O /tmp/zeppelin-${Z_VERSION}-bin-all.tgz http://archive.apache.org/dist/zeppelin/zeppelin-${Z_VERSION}/zeppelin-${Z_VERSION}-bin-all.tgz && \
|
||||
tar -zxvf /tmp/zeppelin-${Z_VERSION}-bin-all.tgz && \
|
||||
rm -rf /tmp/zeppelin-${Z_VERSION}-bin-all.tgz && \
|
||||
mv /zeppelin-${Z_VERSION}-bin-all ${Z_HOME}
|
||||
USER root
|
||||
# ************** Install Zeppelin *********************
|
||||
RUN echo "$LOG_TAG Download Zeppelin" && \
|
||||
cd /tmp && git clone --branch v0.8.0-rc2 https://github.com/apache/zeppelin.git && \
|
||||
mv /tmp/zeppelin ${Z_HOME}
|
||||
|
||||
RUN chown -R zelk:zelk ${Z_HOME}
|
||||
|
||||
USER $Z_UID
|
||||
|
||||
RUN cd $Z_HOME && \
|
||||
mvn clean package -DskipTests -X
|
||||
|
||||
# *********** Install CAPS ***************
|
||||
RUN cd ${Z_HOME} && \
|
||||
wget https://github.com/opencypher/cypher-for-apache-spark/releases/download/1.0.0-beta7/spark-cypher-1.0.0-beta7-cluster.jar
|
||||
|
||||
ADD spark-defaults.conf ${SPARK_HOME}/conf/
|
||||
|
||||
USER root
|
||||
|
||||
RUN echo "$LOG_TAG Cleanup" && \
|
||||
apt-get autoclean && \
|
||||
apt-get clean
|
||||
|
||||
EXPOSE 8080
|
||||
EXPOSE $ZEPPELIN_PORT
|
||||
|
||||
WORKDIR ${Z_HOME}
|
||||
CMD ["bin/zeppelin.sh"]
|
|
@ -0,0 +1,38 @@
|
|||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Default system properties included when running spark-submit.
|
||||
# This is useful for setting default environmental settings.
|
||||
|
||||
# Example:
|
||||
# spark.master spark://master:7077
|
||||
# spark.eventLog.enabled true
|
||||
# spark.eventLog.dir hdfs://namenode:8021/directory
|
||||
# spark.serializer org.apache.spark.serializer.KryoSerializer
|
||||
# spark.driver.memory 5g
|
||||
# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
|
||||
|
||||
# HELK References:
|
||||
# https://spark.apache.org/docs/latest/configuration.html
|
||||
# https://graphframes.github.io/quick-start.html
|
||||
# https://spark-packages.org/package/graphframes/graphframes
|
||||
# https://spark.apache.org/docs/latest/sql-programming-guide.html#pyspark-usage-guide-for-pandas-with-apache-arrow
|
||||
|
||||
#spark.master spark://helk-spark-master:7077
|
||||
spark.jars /zeppelin/spark-cypher-1.0.0-beta7-cluster.jar
|
||||
spark.jars.packages graphframes:graphframes:0.5.0-spark2.1-s_2.11,org.apache.spark:spark-sql-kafka-0-10_2.11:2.3.0,databricks:spark-sklearn:0.2.3
|
||||
spark.sql.execution.arrow.enabled true
|
|
@ -0,0 +1,90 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# export JAVA_HOME=
|
||||
# export MASTER= # Spark master url. eg. spark://master_addr:7077. Leave empty if you want to use local mode.
|
||||
# export ZEPPELIN_JAVA_OPTS # Additional jvm options. for example, export ZEPPELIN_JAVA_OPTS="-Dspark.executor.memory=8g -Dspark.cores.max=16"
|
||||
# export ZEPPELIN_MEM # Zeppelin jvm mem options Default -Xms1024m -Xmx1024m -XX:MaxPermSize=512m
|
||||
# export ZEPPELIN_INTP_MEM # zeppelin interpreter process jvm mem options. Default -Xms1024m -Xmx1024m -XX:MaxPermSize=512m
|
||||
# export ZEPPELIN_INTP_JAVA_OPTS # zeppelin interpreter process jvm options.
|
||||
# export ZEPPELIN_SSL_PORT # ssl port (used when ssl environment variable is set to true)
|
||||
|
||||
# export ZEPPELIN_LOG_DIR # Where log files are stored. PWD by default.
|
||||
# export ZEPPELIN_PID_DIR # The pid files are stored. ${ZEPPELIN_HOME}/run by default.
|
||||
# export ZEPPELIN_WAR_TEMPDIR # The location of jetty temporary directory.
|
||||
# export ZEPPELIN_NOTEBOOK_DIR # Where notebook saved
|
||||
# export ZEPPELIN_NOTEBOOK_HOMESCREEN # Id of notebook to be displayed in homescreen. ex) 2A94M5J1Z
|
||||
# export ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE # hide homescreen notebook from list when this value set to "true". default "false"
|
||||
# export ZEPPELIN_NOTEBOOK_S3_BUCKET # Bucket where notebook saved
|
||||
# export ZEPPELIN_NOTEBOOK_S3_ENDPOINT # Endpoint of the bucket
|
||||
# export ZEPPELIN_NOTEBOOK_S3_USER # User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json
|
||||
# export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_ID # AWS KMS key ID
|
||||
# export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_REGION # AWS KMS key region
|
||||
# export ZEPPELIN_IDENT_STRING # A string representing this instance of zeppelin. $USER by default.
|
||||
# export ZEPPELIN_NICENESS # The scheduling priority for daemons. Defaults to 0.
|
||||
# export ZEPPELIN_INTERPRETER_LOCALREPO # Local repository for interpreter's additional dependency loading
|
||||
# export ZEPPELIN_INTERPRETER_DEP_MVNREPO # Remote principal repository for interpreter's additional dependency loading
|
||||
# export ZEPPELIN_HELIUM_NPM_REGISTRY # Remote Npm registry for Helium dependency loader
|
||||
# export ZEPPELIN_NOTEBOOK_STORAGE # Refers to pluggable notebook storage class, can have two classes simultaneously with a sync between them (e.g. local and remote).
|
||||
# export ZEPPELIN_NOTEBOOK_ONE_WAY_SYNC # If there are multiple notebook storages, should we treat the first one as the only source of truth?
|
||||
# export ZEPPELIN_NOTEBOOK_PUBLIC # Make notebook public by default when created, private otherwise
|
||||
|
||||
#### Spark interpreter configuration ####
|
||||
|
||||
## Use provided spark installation ##
|
||||
## defining SPARK_HOME makes Zeppelin run spark interpreter process using spark-submit
|
||||
##
|
||||
# export SPARK_HOME # (required) When it is defined, load it instead of Zeppelin embedded Spark libraries
|
||||
# export SPARK_SUBMIT_OPTIONS # (optional) extra options to pass to spark submit. eg) "--driver-memory 512M --executor-memory 1G".
|
||||
# export SPARK_APP_NAME # (optional) The name of spark application.
|
||||
|
||||
## Use embedded spark binaries ##
|
||||
## without SPARK_HOME defined, Zeppelin still able to run spark interpreter process using embedded spark binaries.
|
||||
## however, it is not encouraged when you can define SPARK_HOME
|
||||
##
|
||||
# Options read in YARN client mode
|
||||
# export HADOOP_CONF_DIR # yarn-site.xml is located in configuration directory in HADOOP_CONF_DIR.
|
||||
# Pyspark (supported with Spark 1.2.1 and above)
|
||||
# To configure pyspark, you need to set spark distribution's path to 'spark.home' property in Interpreter setting screen in Zeppelin GUI
|
||||
# export PYSPARK_PYTHON # path to the python command. must be the same path on the driver(Zeppelin) and all workers.
|
||||
# export PYTHONPATH
|
||||
|
||||
## Spark interpreter options ##
|
||||
##
|
||||
# export ZEPPELIN_SPARK_USEHIVECONTEXT # Use HiveContext instead of SQLContext if set true. true by default.
|
||||
# export ZEPPELIN_SPARK_CONCURRENTSQL # Execute multiple SQL concurrently if set true. false by default.
|
||||
# export ZEPPELIN_SPARK_IMPORTIMPLICIT # Import implicits, UDF collection, and sql if set true. true by default.
|
||||
# export ZEPPELIN_SPARK_MAXRESULT # Max number of Spark SQL result to display. 1000 by default.
|
||||
# export ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE # Size in characters of the maximum text message to be received by websocket. Defaults to 1024000
|
||||
|
||||
|
||||
#### HBase interpreter configuration ####
|
||||
|
||||
## To connect to HBase running on a cluster, either HBASE_HOME or HBASE_CONF_DIR must be set
|
||||
|
||||
# export HBASE_HOME= # (require) Under which HBase scripts and configuration should be
|
||||
# export HBASE_CONF_DIR= # (optional) Alternatively, configuration directory can be set to point to the directory that has hbase-site.xml
|
||||
|
||||
#### ZeppelinHub connection configuration ####
|
||||
# export ZEPPELINHUB_API_ADDRESS # Refers to the address of the ZeppelinHub service in use
|
||||
# export ZEPPELINHUB_API_TOKEN # Refers to the Zeppelin instance token of the user
|
||||
# export ZEPPELINHUB_USER_KEY # Optional, when using Zeppelin with authentication.
|
||||
|
||||
#### Zeppelin impersonation configuration
|
||||
# export ZEPPELIN_IMPERSONATE_CMD # Optional, when user want to run interpreter as end web user. eg) 'sudo -H -u ${ZEPPELIN_IMPERSONATE_USER} bash -c '
|
||||
# export ZEPPELIN_IMPERSONATE_SPARK_PROXY_USER #Optional, by default is true; can be set to false if you don't want to use --proxy-user option with Spark interpreter when impersonation enabled
|
|
@ -28,8 +28,8 @@ check_min_requirements(){
|
|||
AVAILABLE_MEMORY=$(free -hm | awk 'NR==2{printf "%.f\t\t", $4 }')
|
||||
ES_MEMORY=$(free -hm | awk 'NR==2{printf "%.f", $4/2 }')
|
||||
AVAILABLE_DISK=$(df -h | awk '$NF=="/"{printf "%.f\t\t", $4}')
|
||||
|
||||
if [ "${AVAILABLE_MEMORY}" -ge "10" ] && [ "${AVAILABLE_DISK}" -ge "30" ]; then
|
||||
|
||||
if [ "${AVAILABLE_MEMORY}" -ge "12" ] && [ "${AVAILABLE_DISK}" -ge "30" ]; then
|
||||
echo "[HELK-INSTALLATION-INFO] Available Memory: $AVAILABLE_MEMORY"
|
||||
echo "[HELK-INSTALLATION-INFO] Available Disk: $AVAILABLE_DISK"
|
||||
else
|
||||
|
|
Loading…
Reference in New Issue