diff --git a/docker/helk-jupyter/Dockerfile b/docker/helk-jupyter/Dockerfile index a9950c1..d297573 100644 --- a/docker/helk-jupyter/Dockerfile +++ b/docker/helk-jupyter/Dockerfile @@ -8,6 +8,6 @@ LABEL maintainer="Roberto Rodriguez @Cyb3rWard0g" LABEL description="Dockerfile Notebooks-Forge Jupyter-Hunt Project." # ********** Adding HELK Jupyter notebooks -RUN mkdir /opt/helk/jupyter/datasets +RUN mkdir /opt/helk/jupyter/notebooks/datasets COPY --chown=jupyter:810 notebooks/* /opt/helk/jupyter/notebooks/ -COPY --chown=jupyter:810 datasets/* /opt/helk/jupyter/datasets/ +COPY --chown=jupyter:810 datasets/* /opt/helk/jupyter/notebooks/datasets/ diff --git a/docker/helk-jupyter/notebooks/read_elasticsearch_via_spark.ipynb b/docker/helk-jupyter/notebooks/read_elasticsearch_via_spark.ipynb new file mode 100644 index 0000000..cd10eb5 --- /dev/null +++ b/docker/helk-jupyter/notebooks/read_elasticsearch_via_spark.ipynb @@ -0,0 +1,218 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import Libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from pyspark.sql import SparkSession" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create SparkSession" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "spark = SparkSession.builder \\\n", + " .appName(\"HELK Reader\") \\\n", + " .master(\"spark://helk-spark-master:7077\") \\\n", + " .enableHiveSupport() \\\n", + " .getOrCreate()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Verify Spark Variable" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "

SparkSession - hive

\n", + " \n", + "
\n", + "

SparkContext

\n", + "\n", + "

Spark UI

\n", + "\n", + "
\n", + "
Version
\n", + "
v2.4.3
\n", + "
Master
\n", + "
spark://helk-spark-master:7077
\n", + "
AppName
\n", + "
HELK Reader
\n", + "
\n", + "
\n", + " \n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "spark" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initiate Elasticsearch Dataframe Reader" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "es_reader = (spark.read\n", + " .format(\"org.elasticsearch.spark.sql\")\n", + " .option(\"inferSchema\", \"true\")\n", + " .option(\"es.read.field.as.array.include\", \"tags\")\n", + " .option(\"es.nodes\",\"helk-elasticsearch:9200\")\n", + " .option(\"es.net.http.auth.user\",\"elastic\")\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Data from Elasticsearch : Sysmon Index" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "sysmon_df = es_reader.load(\"logs-endpoint-winevent-sysmon-*/\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "processcreate_df = sysmon_df.filter(sysmon_df.action == \"processcreate\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "processcreate_df = processcreate_df.select(\n", + " \"process_guid\",\"process_parent_name\",\"process_parent_command_line\",\n", + " \"process_name\",\"process_command_line\",\"action\",\"@timestamp\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Show Sysmon Spark DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+-------------------+---------------------------+--------------------+--------------------+-------------+--------------------+\n", + "| process_guid|process_parent_name|process_parent_command_line| process_name|process_command_line| action| @timestamp|\n", + "+--------------------+-------------------+---------------------------+--------------------+--------------------+-------------+--------------------+\n", + "|aa6b4a20-7cde-5ce...| svchost.exe| c:\\windows\\system...| wmiprvse.exe|c:\\windows\\system...|processcreate|2019-05-18 21:45:...|\n", + "|aa6b4a20-7cde-5ce...| wmiprvse.exe| c:\\windows\\system...| powershell.exe|c:\\windows\\system...|processcreate|2019-05-18 21:45:...|\n", + "|aa6b4a20-7cdf-5ce...| powershell.exe| c:\\windows\\system...| conhost.exe|\\??\\c:\\windows\\sy...|processcreate|2019-05-18 21:45:...|\n", + "|aa6b4a20-7ce7-5ce...| winlogon.exe| winlogon.exe| logonui.exe|\"logonui.exe\" /fl...|processcreate|2019-05-18 21:45:...|\n", + "|aa6b4a20-7ce9-5ce...| svchost.exe| c:\\windows\\system...| taskhostw.exe|taskhostw.exe key...|processcreate|2019-05-18 21:45:...|\n", + "|aa6b4a20-7cd9-5ce...| svchost.exe| c:\\windows\\system...|backgroundtaskhos...|\"c:\\windows\\syste...|processcreate|2019-05-18 21:44:...|\n", + "|aa6b4a20-7cda-5ce...| svchost.exe| c:\\windows\\system...| runtimebroker.exe|c:\\windows\\system...|processcreate|2019-05-18 21:44:...|\n", + "|aa6b4a20-7cea-5ce...| svchost.exe| c:\\windows\\system...| dllhost.exe|c:\\windows\\system...|processcreate|2019-05-18 21:45:...|\n", + "|aa6b4a20-7cea-5ce...| svchost.exe| c:\\windows\\system...| taskhostw.exe| taskhostw.exe|processcreate|2019-05-18 21:45:...|\n", + "|aa6b4a20-7cec-5ce...| svchost.exe| c:\\windows\\system...| dllhost.exe|c:\\windows\\system...|processcreate|2019-05-18 21:45:...|\n", + "+--------------------+-------------------+---------------------------+--------------------+--------------------+-------------+--------------------+\n", + "only showing top 10 rows\n", + "\n" + ] + } + ], + "source": [ + "processcreate_df.show(10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "$" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "PySpark_Python3", + "language": "python", + "name": "pyspark3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docker/helk-jupyter/notebooks/security_sysmon_sql_join.ipynb b/docker/helk-jupyter/notebooks/security_sysmon_sql_join.ipynb new file mode 100644 index 0000000..7a096dd --- /dev/null +++ b/docker/helk-jupyter/notebooks/security_sysmon_sql_join.ipynb @@ -0,0 +1,225 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from pyspark.sql import SparkSession" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "spark = SparkSession.builder \\\n", + " .appName(\"HELK JOIN\") \\\n", + " .master(\"spark://helk-spark-master:7077\") \\\n", + " .enableHiveSupport() \\\n", + " .getOrCreate()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "es_reader = (spark.read\n", + " .format(\"org.elasticsearch.spark.sql\")\n", + " .option(\"inferSchema\", \"true\")\n", + " .option(\"es.read.field.as.array.include\", \"tags\")\n", + " .option(\"es.nodes\",\"helk-elasticsearch:9200\")\n", + " .option(\"es.net.http.auth.user\",\"elastic\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "security_df = es_reader.load(\"logs-endpoint-winevent-security-*/\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "security_df.createOrReplaceTempView(\"security_events\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "security_4624_3 = spark.sql(\n", + " '''\n", + " SELECT event_id,\n", + " host_name,\n", + " src_ip_addr,\n", + " user_logon_id,\n", + " user_name,\n", + " logon_type,\n", + " `@timestamp`\n", + " FROM security_events\n", + " WHERE event_id = 4624\n", + " AND logon_type = 3\n", + " AND src_ip_addr is not null\n", + " AND `@timestamp` BETWEEN \"2019-05-18 00:00:00.000\" AND \"2019-05-19 00:00:00.000\"\n", + " '''\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "security_4624_3.createOrReplaceTempView(\"security_4624_3\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "sysmon_df = es_reader.load(\"logs-endpoint-winevent-sysmon-*/\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "sysmon_df.createOrReplaceTempView(\"sysmon_events\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "sysmon_processcreate = spark.sql(\n", + " '''\n", + " SELECT event_id,\n", + " host_name,\n", + " process_parent_name,\n", + " process_parent_guid,\n", + " process_parent_command_line,\n", + " process_name,\n", + " process_guid,\n", + " process_command_line,\n", + " user_logon_id,\n", + " `@timestamp`\n", + " FROM sysmon_events\n", + " WHERE event_id = 1\n", + " AND `@timestamp` BETWEEN \"2019-05-18 00:00:00.000\" AND \"2019-05-19 00:00:00.000\"\n", + " '''\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "sysmon_processcreate.createOrReplaceTempView(\"sysmon_1\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "security_sysmon_join = spark.sql(\n", + " '''\n", + " SELECT s.`@timestamp`,\n", + " s.host_name,\n", + " s.src_ip_addr,\n", + " s.logon_type,\n", + " s.user_logon_id,\n", + " s.user_name,\n", + " p.process_parent_name,\n", + " p.process_parent_guid,\n", + " p.process_parent_command_line,\n", + " p.process_name,\n", + " p.process_guid,\n", + " p.process_command_line\n", + " FROM security_4624_3 s\n", + " INNER JOIN sysmon_1 p\n", + " ON s.user_logon_id = p.user_logon_id\n", + " '''\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+-------------+---------------+---------+-------------------+---------------------------+--------------+--------------------+\n", + "| @timestamp| src_ip_addr| host_name|user_name|process_parent_name|process_parent_command_line| process_name|process_command_line|\n", + "+--------------------+-------------+---------------+---------+-------------------+---------------------------+--------------+--------------------+\n", + "|2019-05-18 21:45:...|172.18.39.106|IT001.shire.com| pgustavo| wmiprvse.exe| c:\\windows\\system...|powershell.exe|c:\\windows\\system...|\n", + "|2019-05-18 21:45:...|172.18.39.106|IT001.shire.com| pgustavo| powershell.exe| c:\\windows\\system...| conhost.exe|\\??\\c:\\windows\\sy...|\n", + "|2019-05-18 21:45:...|172.18.39.106|IT001.shire.com| pgustavo| powershell.exe| c:\\windows\\system...| whoami.exe|\"c:\\windows\\syste...|\n", + "+--------------------+-------------+---------------+---------+-------------------+---------------------------+--------------+--------------------+\n", + "\n" + ] + } + ], + "source": [ + "security_sysmon_join.select(\n", + " \"@timestamp\",\"src_ip_addr\",\"host_name\",\"user_name\",\"process_parent_name\",\"process_parent_command_line\",\"process_name\",\"process_command_line\"\n", + ").show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "PySpark_Python3", + "language": "python", + "name": "pyspark3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}