From a17d8341bd8461a6cbc1fa5b5d9a351ee0316a1f Mon Sep 17 00:00:00 2001 From: Roberto Rodriguez Date: Fri, 6 Jul 2018 23:11:41 -0400 Subject: [PATCH] HELK v0.1.1-alpha07062018 Docker Compose ++ Updated Spark images to 2.3.1 ELK Stack ++ Docker Images updated to 6.3.1 helk-jupyter ++ Preparing Jupyter for Jupyterhub ++ Spark base image now comes with a sparkuser user ++ Updated es-hadoop package to 6.3.1 helk-logstash ++ Updated Sysmon parser to transform new datafield from Sysmon V8.0. RuleName helk-spark-base ++ Images updated to 2.3.1 helk_install ++ fixed https://github.com/Cyb3rWard0g/HELK/issues/81 ++ Updated banner to show right version --- docker-compose.yml | 8 +- helk-elasticsearch/Dockerfile | 2 +- helk-elasticsearch/elasticsearch_jvm.options | 99 ------------------- helk-jupyter/Dockerfile | 69 ++++++------- helk-jupyter/spark/spark-defaults.conf | 2 +- helk-kibana/Dockerfile | 2 +- helk-logstash/Dockerfile | 2 +- .../pipeline/11-winevent-sysmon-filter.conf | 9 ++ helk-spark-base/Dockerfile | 18 +++- helk-spark-master/Dockerfile | 7 +- .../scripts/spark-master-entrypoint.sh | 4 +- helk-spark-worker/Dockerfile | 7 +- .../scripts/spark-worker-entrypoint.sh | 4 +- helk_install.sh | 8 +- 14 files changed, 76 insertions(+), 165 deletions(-) delete mode 100644 helk-elasticsearch/elasticsearch_jvm.options diff --git a/docker-compose.yml b/docker-compose.yml index a1f09e8..fed4f52 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -61,7 +61,7 @@ services: aliases: - helk_nginx.hunt.local helk-spark-master: - image: cyb3rward0g/helk-spark-master:2.3.0 + image: cyb3rward0g/helk-spark-master:2.3.1 container_name: helk-spark-master ports: - "8080:8080" @@ -74,7 +74,7 @@ services: aliases: - helk_spark_master.hunt.local helk-spark-worker: - image: cyb3rward0g/helk-spark-worker:2.3.0 + image: cyb3rward0g/helk-spark-worker:2.3.1 container_name: helk-spark-worker environment: - SPARK_WORKER_MEMORY=1g @@ -89,7 +89,7 @@ services: aliases: - helk_spark_worker.hunt.local helk-spark-worker2: - image: cyb3rward0g/helk-spark-worker:2.3.0 + image: cyb3rward0g/helk-spark-worker:2.3.1 container_name: helk-spark-worker2 environment: - SPARK_WORKER_MEMORY=1g @@ -104,7 +104,7 @@ services: aliases: - helk_spark_worker2.hunt.local helk-jupyter: - image: cyb3rward0g/helk-jupyter:0.32.1 + build: helk-jupyter/ container_name: helk-jupyter ports: - "8880:8880" diff --git a/helk-elasticsearch/Dockerfile b/helk-elasticsearch/Dockerfile index b9734ef..ee6cfed 100644 --- a/helk-elasticsearch/Dockerfile +++ b/helk-elasticsearch/Dockerfile @@ -9,6 +9,6 @@ # *********** ELK Version *************** -FROM docker.elastic.co/elasticsearch/elasticsearch:6.3.0 +FROM docker.elastic.co/elasticsearch/elasticsearch:6.3.1 LABEL maintainer="Roberto Rodriguez @Cyb3rWard0g" LABEL description="Dockerfile base for the HELK Elasticsearch." \ No newline at end of file diff --git a/helk-elasticsearch/elasticsearch_jvm.options b/helk-elasticsearch/elasticsearch_jvm.options deleted file mode 100644 index 7abe542..0000000 --- a/helk-elasticsearch/elasticsearch_jvm.options +++ /dev/null @@ -1,99 +0,0 @@ -## JVM configuration - -################################################################ -## IMPORTANT: JVM heap size -################################################################ -## -## You should always set the min and max JVM heap -## size to the same value. For example, to set -## the heap to 4 GB, set: -## -## -Xms4g -## -Xmx4g -## -## See https://www.elastic.co/guide/en/elasticsearch/reference/current/heap-size.html -## for more information -## -################################################################ - -# Xms represents the initial size of total heap space -# Xmx represents the maximum size of total heap space - --Xms1g --Xmx1g - -################################################################ -## Expert settings -################################################################ -## -## All settings below this section are considered -## expert settings. Don't tamper with them unless -## you understand what you are doing -## -################################################################ - -## GC configuration --XX:+UseConcMarkSweepGC --XX:CMSInitiatingOccupancyFraction=75 --XX:+UseCMSInitiatingOccupancyOnly - -## optimizations - -# pre-touch memory pages used by the JVM during initialization --XX:+AlwaysPreTouch - -## basic - -# explicitly set the stack size --Xss1m - -# set to headless, just in case --Djava.awt.headless=true - -# ensure UTF-8 encoding by default (e.g. filenames) --Dfile.encoding=UTF-8 - -# use our provided JNA always versus the system one --Djna.nosys=true - -# turn off a JDK optimization that throws away stack traces for common -# exceptions because stack traces are important for debugging --XX:-OmitStackTraceInFastThrow - -# flags to configure Netty --Dio.netty.noUnsafe=true --Dio.netty.noKeySetOptimization=true --Dio.netty.recycler.maxCapacityPerThread=0 - -# log4j 2 --Dlog4j.shutdownHookEnabled=false --Dlog4j2.disable.jmx=true - --Djava.io.tmpdir=${ES_TMPDIR} - -## heap dumps - -# generate a heap dump when an allocation from the Java heap fails -# heap dumps are created in the working directory of the JVM --XX:+HeapDumpOnOutOfMemoryError - -# specify an alternative path for heap dumps -# ensure the directory exists and has sufficient space -#-XX:HeapDumpPath=/heap/dump/path - -## JDK 8 GC logging - -8:-XX:+PrintGCDetails -8:-XX:+PrintGCDateStamps -8:-XX:+PrintTenuringDistribution -8:-XX:+PrintGCApplicationStoppedTime -8:-Xloggc:logs/gc.log -8:-XX:+UseGCLogFileRotation -8:-XX:NumberOfGCLogFiles=32 -8:-XX:GCLogFileSize=64m - -# JDK 9+ GC logging -9-:-Xlog:gc*,gc+age=trace,safepoint:file=logs/gc.log:utctime,pid,tags:filecount=32,filesize=64m -# due to internationalization enhancements in JDK 9 Elasticsearch need to set the provider to COMPAT otherwise -# time/date parsing will break in an incompatible way for some date patterns and locals -9-:-Djava.locale.providers=COMPAT \ No newline at end of file diff --git a/helk-jupyter/Dockerfile b/helk-jupyter/Dockerfile index c911b43..d2125b3 100644 --- a/helk-jupyter/Dockerfile +++ b/helk-jupyter/Dockerfile @@ -3,17 +3,17 @@ # Author: Roberto Rodriguez (@Cyb3rWard0g) # License: BSD 3-Clause -FROM cyb3rward0g/helk-spark-base:2.3.0 +FROM cyb3rward0g/helk-spark-base:2.3.1 LABEL maintainer="Roberto Rodriguez @Cyb3rWard0g" LABEL description="Dockerfile base for HELK Jupyter." ENV DEBIAN_FRONTEND noninteractive +USER root + # *********** Installing Prerequisites *************** # -qq : No output except for errors -RUN echo "[HELK-DOCKER-INSTALLATION-INFO] Updating Ubuntu base image.." \ - && apt-get update -qq \ - && echo "[HELK-DOCKER-INSTALLATION-INFO] Extracting templates from packages.." \ +RUN echo "[HELK-DOCKER-INSTALLATION-INFO] Extracting templates from packages.." \ && apt-get install -qqy \ python3-pip \ python-tk \ @@ -25,46 +25,38 @@ RUN apt-get -qy clean \ # *********** Upgrading PIP *************** RUN pip3 install --upgrade pip +# *********** Installing Jupyter Hub Prerequisites +RUN curl -sL https://deb.nodesource.com/setup_8.x | sudo -E bash - +RUN apt-get install -y nodejs + # *********** Installing HELK python packages *************** RUN pip3 install pandas \ jupyter \ - jupyterlab + jupyterlab \ + jupyterhub -RUN pip3 install scipy \ - scikit-learn \ - nltk \ - enum34 \ - matplotlib==2.1.2 \ - seaborn \ - datasketch \ - keras \ - pyflux \ - imbalanced-learn \ - lime \ - bokeh \ - networkx==2.0 \ - numpy==1.14.0 \ - nxviz \ - hiveplot \ - pyarrow +RUN npm install -g configurable-http-proxy +RUN jupyter labextension install @jupyterlab/hub-extension # *********** Creating the right directories *************** -RUN bash -c 'mkdir -pv /opt/helk/{notebooks,es-hadoop}' +RUN bash -c 'mkdir -pv /opt/helk/{es-hadoop,jupyter}' + +# *********** Setting Jupyterhub*********************** +ENV JUPYTER_DIR=/opt/helk/jupyter # *********** Adding HELK scripts and files to Container *************** -ADD scripts/jupyter-entrypoint.sh /opt/helk/ -RUN chmod +x /opt/helk/jupyter-entrypoint.sh -ADD notebooks/ /opt/helk/notebooks/ +ADD scripts/jupyter-entrypoint.sh ${JUPYTER_DIR} +ADD notebooks ${JUPYTER_DIR}/notebooks -# *********** Install ES-Hadoop *************** -ENV ESHADOOP_VERSION=6.2.4 +# *********** Download ES-Hadoop *************** +ENV ESHADOOP_VERSION=6.3.1 RUN wget https://artifacts.elastic.co/downloads/elasticsearch-hadoop/elasticsearch-hadoop-${ESHADOOP_VERSION}.zip -P /opt/helk/es-hadoop/ \ - && unzip /opt/helk/es-hadoop/*.zip -d /opt/helk/es-hadoop/ \ + && unzip -j /opt/helk/es-hadoop/*.zip -d /opt/helk/es-hadoop/ \ && rm /opt/helk/es-hadoop/*.zip -# *********** Configure Spark *************** -ENV JUPYTER_LOGS_PATH=/var/log/jupyter -ENV JUPYTER_CONSOLE_LOG=/var/log/jupyter/jupyter.log +# *********** Configure Jupyterhub *************** +ENV JUPYTER_LOGS_PATH=${JUPYTER_DIR}/log +ENV JUPYTER_CONSOLE_LOG=${JUPYTER_LOGS_PATH}/jupyter.log ENV JUPYTER_EXEC=$SPARK_HOME/bin/pyspark ENV JUPYTER_LOGS=">> $JUPYTER_CONSOLE_LOG 2>&1" @@ -72,17 +64,18 @@ RUN mkdir -v $JUPYTER_LOGS_PATH ADD spark/log4j.properties ${SPARK_HOME}/conf/ ADD spark/spark-defaults.conf ${SPARK_HOME}/conf/ +# *********** Update Jupyter PySpark Kernel ************* +#ADD kernels/pyspark_kernel.json /usr/local/share/jupyter/kernels/python3/kernel.json + # ************* Adding SPARK environment variables ************* ENV PATH=$SPARK_HOME/bin:$PATH ENV PYSPARK_PYTHON=/usr/bin/python3 ENV PYSPARK_DRIVER_PYTHON=/usr/local/bin/jupyter -ENV PYSPARK_DRIVER_PYTHON_OPTS="lab --no-browser --ip=* --port=8880 --allow-root" +ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip +ENV PYSPARK_DRIVER_PYTHON_OPTS="lab --no-browser --ip=* --port=8880 --allow-root --notebook-dir=/opt/helk/jupyter/notebooks" -# *********** Update Jupyter PySpark Kernel ************* -ADD kernels/pyspark_kernel.json /usr/local/share/jupyter/kernels/python3/kernel.json - -# *********** RUN HELK *************** EXPOSE 4040 8880 -WORKDIR "/opt/helk/" +# *********** RUN HELK *************** +WORKDIR ${JUPYTER_DIR} ENTRYPOINT ["./jupyter-entrypoint.sh"] \ No newline at end of file diff --git a/helk-jupyter/spark/spark-defaults.conf b/helk-jupyter/spark/spark-defaults.conf index a0db81e..95c2de1 100644 --- a/helk-jupyter/spark/spark-defaults.conf +++ b/helk-jupyter/spark/spark-defaults.conf @@ -33,6 +33,6 @@ # https://spark.apache.org/docs/latest/sql-programming-guide.html#pyspark-usage-guide-for-pandas-with-apache-arrow spark.master spark://helk-spark-master:7077 -spark.jars /opt/helk/es-hadoop/elasticsearch-hadoop-6.2.4/dist/elasticsearch-hadoop-6.2.4.jar +spark.jars /opt/helk/es-hadoop/elasticsearch-hadoop-6.3.1.jar spark.jars.packages graphframes:graphframes:0.5.0-spark2.1-s_2.11,org.apache.spark:spark-sql-kafka-0-10_2.11:2.3.0,databricks:spark-sklearn:0.2.3 spark.sql.execution.arrow.enabled true \ No newline at end of file diff --git a/helk-kibana/Dockerfile b/helk-kibana/Dockerfile index de1fa84..ec77df1 100644 --- a/helk-kibana/Dockerfile +++ b/helk-kibana/Dockerfile @@ -8,7 +8,7 @@ # https://cyberwardog.blogspot.com/2017/02/setting-up-pentesting-i-mean-threat_98.html # https://github.com/spujadas/elk-docker/blob/master/Dockerfile -FROM docker.elastic.co/kibana/kibana:6.3.0 +FROM docker.elastic.co/kibana/kibana:6.3.1 LABEL maintainer="Roberto Rodriguez @Cyb3rWard0g" LABEL description="Dockerfile base for the HELK Kibana." diff --git a/helk-logstash/Dockerfile b/helk-logstash/Dockerfile index 721a6e9..e2252a0 100644 --- a/helk-logstash/Dockerfile +++ b/helk-logstash/Dockerfile @@ -8,6 +8,6 @@ # https://cyberwardog.blogspot.com/2017/02/setting-up-pentesting-i-mean-threat_98.html # https://github.com/spujadas/elk-docker/blob/master/Dockerfile -FROM docker.elastic.co/logstash/logstash:6.3.0 +FROM docker.elastic.co/logstash/logstash:6.3.1 LABEL maintainer="Roberto Rodriguez @Cyb3rWard0g" LABEL description="Dockerfile base for the HELK Logstash." \ No newline at end of file diff --git a/helk-logstash/pipeline/11-winevent-sysmon-filter.conf b/helk-logstash/pipeline/11-winevent-sysmon-filter.conf index f103bf8..f18edb8 100644 --- a/helk-logstash/pipeline/11-winevent-sysmon-filter.conf +++ b/helk-logstash/pipeline/11-winevent-sysmon-filter.conf @@ -19,6 +19,15 @@ filter { gsub => ["provider_guid","[{}]",""] } } + if [event_data][RuleName] { + kv { + source => "[event_data][RuleName]" + field_split => "," + value_split => "=" + prefix => "mitre_" + transform_key => "lowercase" + } + } if [event_data][Image] { if [event_data][Image] =~ /^(\w*$)|^(\w*\..*$)/ { mutate { diff --git a/helk-spark-base/Dockerfile b/helk-spark-base/Dockerfile index 28149b8..080f26d 100644 --- a/helk-spark-base/Dockerfile +++ b/helk-spark-base/Dockerfile @@ -24,10 +24,20 @@ RUN apt-get -qy clean \ RUN bash -c 'mkdir -pv /opt/helk/spark' # *********** Install Spark *************** -ENV SPARK_VERSION=2.3.0 +ENV SPARK_VERSION=2.3.1 ENV APACHE_HADOOP_VERSION=2.7 - -RUN wget -qO- http://download.nextag.com/apache/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${APACHE_HADOOP_VERSION}.tgz | sudo tar xvz -C /opt/helk/spark/ +RUN wget -qO- http://mirror.reverse.net/pub/apache/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${APACHE_HADOOP_VERSION}.tgz | sudo tar xvz -C /opt/helk/spark --strip-components=1 # Adding SPARK environment variables -ENV SPARK_HOME=/opt/helk/spark/spark-${SPARK_VERSION}-bin-hadoop${APACHE_HADOOP_VERSION} \ No newline at end of file +ENV SPARK_HOME=/opt/helk/spark +ENV SPARK_LOGS=$SPARK_HOME/logs + +RUN mkdir -p $SPARK_LOGS + +# Adding SPARK User +ENV SPARK_GID=710 +ENV SPARK_UID=710 +ENV SPARK_USER=sparkuser +RUN groupadd -g ${SPARK_GID} ${SPARK_USER} \ + && useradd -u ${SPARK_UID} -g ${SPARK_GID} -d ${SPARK_HOME} --no-create-home ${SPARK_USER} \ + && chown -R ${SPARK_USER}:${SPARK_USER} ${SPARK_HOME} \ No newline at end of file diff --git a/helk-spark-master/Dockerfile b/helk-spark-master/Dockerfile index 9d7d154..60cbaec 100644 --- a/helk-spark-master/Dockerfile +++ b/helk-spark-master/Dockerfile @@ -3,20 +3,19 @@ # Author: Roberto Rodriguez (@Cyb3rWard0g) # License: BSD 3-Clause -FROM cyb3rward0g/helk-spark-base:2.3.0 +FROM cyb3rward0g/helk-spark-base:2.3.1 LABEL maintainer="Roberto Rodriguez @Cyb3rWard0g" LABEL description="Dockerfile base for HELK Spark Master." ENV DEBIAN_FRONTEND noninteractive +USER sparkuser + ADD scripts/spark-master-entrypoint.sh ${SPARK_HOME}/ ENV SPARK_MASTER_HOST helk-spark-master ENV SPARK_MASTER_PORT 7077 ENV SPARK_MASTER_WEBUI_PORT 8080 -ENV SPARK_MASTER_LOG $SPARK_HOME/logs - -RUN mkdir -p $SPARK_MASTER_LOG EXPOSE 8080 7077 WORKDIR $SPARK_HOME diff --git a/helk-spark-master/scripts/spark-master-entrypoint.sh b/helk-spark-master/scripts/spark-master-entrypoint.sh index adbb32d..11e75a1 100755 --- a/helk-spark-master/scripts/spark-master-entrypoint.sh +++ b/helk-spark-master/scripts/spark-master-entrypoint.sh @@ -6,8 +6,8 @@ # Author: Roberto Rodriguez (@Cyb3rWard0g) # License: BSD 3-Clause -ln -sf /dev/stdout $SPARK_MASTER_LOG/spark-master.out +ln -sf /dev/stdout $SPARK_LOGS/spark-master.out echo "[HELK-DOCKER-INSTALLATION-INFO] Starting Spark Master Service.." exec $SPARK_HOME/bin/spark-class org.apache.spark.deploy.master.Master \ - --host $SPARK_MASTER_HOST --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT >> $SPARK_MASTER_LOG/spark-master.out \ No newline at end of file + --host $SPARK_MASTER_HOST --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT >> $SPARK_LOGS/spark-master.out \ No newline at end of file diff --git a/helk-spark-worker/Dockerfile b/helk-spark-worker/Dockerfile index e39302f..7728bdd 100644 --- a/helk-spark-worker/Dockerfile +++ b/helk-spark-worker/Dockerfile @@ -3,19 +3,18 @@ # Author: Roberto Rodriguez (@Cyb3rWard0g) # License: BSD 3-Clause -FROM cyb3rward0g/helk-spark-base:2.3.0 +FROM cyb3rward0g/helk-spark-base:2.3.1 LABEL maintainer="Roberto Rodriguez @Cyb3rWard0g" LABEL description="Dockerfile base for HELK Spark Worker." ENV DEBIAN_FRONTEND noninteractive +USER sparkuser + ADD scripts/spark-worker-entrypoint.sh ${SPARK_HOME}/ -ENV SPARK_WORKER_LOG $SPARK_HOME/logs ENV SPARK_MASTER "spark://helk-spark-master:7077" -RUN mkdir -p $SPARK_WORKER_LOG - EXPOSE $SPARK_WORKER_WEBUI_PORT WORKDIR $SPARK_HOME ENTRYPOINT ["./spark-worker-entrypoint.sh"] \ No newline at end of file diff --git a/helk-spark-worker/scripts/spark-worker-entrypoint.sh b/helk-spark-worker/scripts/spark-worker-entrypoint.sh index 8ed3fe7..2b2b4e1 100755 --- a/helk-spark-worker/scripts/spark-worker-entrypoint.sh +++ b/helk-spark-worker/scripts/spark-worker-entrypoint.sh @@ -6,8 +6,8 @@ # Author: Roberto Rodriguez (@Cyb3rWard0g) # License: BSD 3-Clause -ln -sf /dev/stdout $SPARK_WORKER_LOG/spark-worker.out +ln -sf /dev/stdout $SPARK_LOGS/spark-worker.out echo "[HELK-DOCKER-INSTALLATION-INFO] Starting Spark Worker Service.." exec /$SPARK_HOME/bin/spark-class org.apache.spark.deploy.worker.Worker \ - --webui-port $SPARK_WORKER_WEBUI_PORT $SPARK_MASTER >> $SPARK_WORKER_LOG/spark-worker.out \ No newline at end of file + --webui-port $SPARK_WORKER_WEBUI_PORT $SPARK_MASTER >> $SPARK_LOGS/spark-worker.out \ No newline at end of file diff --git a/helk_install.sh b/helk_install.sh index 0f8c9d4..b0a6533 100755 --- a/helk_install.sh +++ b/helk_install.sh @@ -25,8 +25,8 @@ systemKernel="$(uname -s)" check_min_requirements(){ echo "[HELK-INSTALLATION-INFO] HELK being hosted on a $systemKernel box" if [ "$systemKernel" == "Linux" ]; then - AVAILABLE_MEMORY=$(free -hm | awk 'NR==2{printf "%.f\t\t", $7 }') - ES_MEMORY=$(free -hm | awk 'NR==2{printf "%.f", $7/2 }') + AVAILABLE_MEMORY=$(awk '/MemAvailable/{printf "%.f", $2/1024/1024}' /proc/meminfo) + ES_MEMORY=$(awk '/MemAvailable/{printf "%.f", $2/1024/1024/2}' /proc/meminfo) AVAILABLE_DISK=$(df -m | awk '$NF=="/"{printf "%.f\t\t", $4 / 1024}') if [ "${AVAILABLE_MEMORY}" -ge "12" ] && [ "${AVAILABLE_DISK}" -ge "30" ]; then @@ -262,8 +262,8 @@ show_banner(){ echo "** HELK - THE HUNTING ELK **" echo "** **" echo "** Author: Roberto Rodriguez (@Cyb3rWard0g) **" - echo "** HELK build version: 0.9 (Alpha) **" - echo "** HELK ELK version: 6.3.0 **" + echo "** HELK build version: v0.1.1-alpha07062018 **" + echo "** HELK ELK version: 6.3.1 **" echo "** License: BSD 3-Clause **" echo "**********************************************" echo " "