diff --git a/docker/helk-jupyter/Dockerfile b/docker/helk-jupyter/Dockerfile
index a9950c1..d297573 100644
--- a/docker/helk-jupyter/Dockerfile
+++ b/docker/helk-jupyter/Dockerfile
@@ -8,6 +8,6 @@ LABEL maintainer="Roberto Rodriguez @Cyb3rWard0g"
LABEL description="Dockerfile Notebooks-Forge Jupyter-Hunt Project."
# ********** Adding HELK Jupyter notebooks
-RUN mkdir /opt/helk/jupyter/datasets
+RUN mkdir /opt/helk/jupyter/notebooks/datasets
COPY --chown=jupyter:810 notebooks/* /opt/helk/jupyter/notebooks/
-COPY --chown=jupyter:810 datasets/* /opt/helk/jupyter/datasets/
+COPY --chown=jupyter:810 datasets/* /opt/helk/jupyter/notebooks/datasets/
diff --git a/docker/helk-jupyter/notebooks/read_elasticsearch_via_spark.ipynb b/docker/helk-jupyter/notebooks/read_elasticsearch_via_spark.ipynb
new file mode 100644
index 0000000..cd10eb5
--- /dev/null
+++ b/docker/helk-jupyter/notebooks/read_elasticsearch_via_spark.ipynb
@@ -0,0 +1,218 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import Libraries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pyspark.sql import SparkSession"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Create SparkSession"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "spark = SparkSession.builder \\\n",
+ " .appName(\"HELK Reader\") \\\n",
+ " .master(\"spark://helk-spark-master:7077\") \\\n",
+ " .enableHiveSupport() \\\n",
+ " .getOrCreate()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Verify Spark Variable"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
SparkSession - hive
\n",
+ " \n",
+ "
\n",
+ "
SparkContext
\n",
+ "\n",
+ "
Spark UI
\n",
+ "\n",
+ "
\n",
+ " - Version
\n",
+ " v2.4.3
\n",
+ " - Master
\n",
+ " spark://helk-spark-master:7077
\n",
+ " - AppName
\n",
+ " HELK Reader
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "spark"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Initiate Elasticsearch Dataframe Reader"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "es_reader = (spark.read\n",
+ " .format(\"org.elasticsearch.spark.sql\")\n",
+ " .option(\"inferSchema\", \"true\")\n",
+ " .option(\"es.read.field.as.array.include\", \"tags\")\n",
+ " .option(\"es.nodes\",\"helk-elasticsearch:9200\")\n",
+ " .option(\"es.net.http.auth.user\",\"elastic\")\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Load Data from Elasticsearch : Sysmon Index"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sysmon_df = es_reader.load(\"logs-endpoint-winevent-sysmon-*/\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "processcreate_df = sysmon_df.filter(sysmon_df.action == \"processcreate\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "processcreate_df = processcreate_df.select(\n",
+ " \"process_guid\",\"process_parent_name\",\"process_parent_command_line\",\n",
+ " \"process_name\",\"process_command_line\",\"action\",\"@timestamp\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Show Sysmon Spark DataFrame"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+--------------------+-------------------+---------------------------+--------------------+--------------------+-------------+--------------------+\n",
+ "| process_guid|process_parent_name|process_parent_command_line| process_name|process_command_line| action| @timestamp|\n",
+ "+--------------------+-------------------+---------------------------+--------------------+--------------------+-------------+--------------------+\n",
+ "|aa6b4a20-7cde-5ce...| svchost.exe| c:\\windows\\system...| wmiprvse.exe|c:\\windows\\system...|processcreate|2019-05-18 21:45:...|\n",
+ "|aa6b4a20-7cde-5ce...| wmiprvse.exe| c:\\windows\\system...| powershell.exe|c:\\windows\\system...|processcreate|2019-05-18 21:45:...|\n",
+ "|aa6b4a20-7cdf-5ce...| powershell.exe| c:\\windows\\system...| conhost.exe|\\??\\c:\\windows\\sy...|processcreate|2019-05-18 21:45:...|\n",
+ "|aa6b4a20-7ce7-5ce...| winlogon.exe| winlogon.exe| logonui.exe|\"logonui.exe\" /fl...|processcreate|2019-05-18 21:45:...|\n",
+ "|aa6b4a20-7ce9-5ce...| svchost.exe| c:\\windows\\system...| taskhostw.exe|taskhostw.exe key...|processcreate|2019-05-18 21:45:...|\n",
+ "|aa6b4a20-7cd9-5ce...| svchost.exe| c:\\windows\\system...|backgroundtaskhos...|\"c:\\windows\\syste...|processcreate|2019-05-18 21:44:...|\n",
+ "|aa6b4a20-7cda-5ce...| svchost.exe| c:\\windows\\system...| runtimebroker.exe|c:\\windows\\system...|processcreate|2019-05-18 21:44:...|\n",
+ "|aa6b4a20-7cea-5ce...| svchost.exe| c:\\windows\\system...| dllhost.exe|c:\\windows\\system...|processcreate|2019-05-18 21:45:...|\n",
+ "|aa6b4a20-7cea-5ce...| svchost.exe| c:\\windows\\system...| taskhostw.exe| taskhostw.exe|processcreate|2019-05-18 21:45:...|\n",
+ "|aa6b4a20-7cec-5ce...| svchost.exe| c:\\windows\\system...| dllhost.exe|c:\\windows\\system...|processcreate|2019-05-18 21:45:...|\n",
+ "+--------------------+-------------------+---------------------------+--------------------+--------------------+-------------+--------------------+\n",
+ "only showing top 10 rows\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "processcreate_df.show(10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "$"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "PySpark_Python3",
+ "language": "python",
+ "name": "pyspark3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docker/helk-jupyter/notebooks/security_sysmon_sql_join.ipynb b/docker/helk-jupyter/notebooks/security_sysmon_sql_join.ipynb
new file mode 100644
index 0000000..7a096dd
--- /dev/null
+++ b/docker/helk-jupyter/notebooks/security_sysmon_sql_join.ipynb
@@ -0,0 +1,225 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pyspark.sql import SparkSession"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "spark = SparkSession.builder \\\n",
+ " .appName(\"HELK JOIN\") \\\n",
+ " .master(\"spark://helk-spark-master:7077\") \\\n",
+ " .enableHiveSupport() \\\n",
+ " .getOrCreate()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "es_reader = (spark.read\n",
+ " .format(\"org.elasticsearch.spark.sql\")\n",
+ " .option(\"inferSchema\", \"true\")\n",
+ " .option(\"es.read.field.as.array.include\", \"tags\")\n",
+ " .option(\"es.nodes\",\"helk-elasticsearch:9200\")\n",
+ " .option(\"es.net.http.auth.user\",\"elastic\")\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "security_df = es_reader.load(\"logs-endpoint-winevent-security-*/\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "security_df.createOrReplaceTempView(\"security_events\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "security_4624_3 = spark.sql(\n",
+ " '''\n",
+ " SELECT event_id,\n",
+ " host_name,\n",
+ " src_ip_addr,\n",
+ " user_logon_id,\n",
+ " user_name,\n",
+ " logon_type,\n",
+ " `@timestamp`\n",
+ " FROM security_events\n",
+ " WHERE event_id = 4624\n",
+ " AND logon_type = 3\n",
+ " AND src_ip_addr is not null\n",
+ " AND `@timestamp` BETWEEN \"2019-05-18 00:00:00.000\" AND \"2019-05-19 00:00:00.000\"\n",
+ " '''\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "security_4624_3.createOrReplaceTempView(\"security_4624_3\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sysmon_df = es_reader.load(\"logs-endpoint-winevent-sysmon-*/\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sysmon_df.createOrReplaceTempView(\"sysmon_events\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sysmon_processcreate = spark.sql(\n",
+ " '''\n",
+ " SELECT event_id,\n",
+ " host_name,\n",
+ " process_parent_name,\n",
+ " process_parent_guid,\n",
+ " process_parent_command_line,\n",
+ " process_name,\n",
+ " process_guid,\n",
+ " process_command_line,\n",
+ " user_logon_id,\n",
+ " `@timestamp`\n",
+ " FROM sysmon_events\n",
+ " WHERE event_id = 1\n",
+ " AND `@timestamp` BETWEEN \"2019-05-18 00:00:00.000\" AND \"2019-05-19 00:00:00.000\"\n",
+ " '''\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sysmon_processcreate.createOrReplaceTempView(\"sysmon_1\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "security_sysmon_join = spark.sql(\n",
+ " '''\n",
+ " SELECT s.`@timestamp`,\n",
+ " s.host_name,\n",
+ " s.src_ip_addr,\n",
+ " s.logon_type,\n",
+ " s.user_logon_id,\n",
+ " s.user_name,\n",
+ " p.process_parent_name,\n",
+ " p.process_parent_guid,\n",
+ " p.process_parent_command_line,\n",
+ " p.process_name,\n",
+ " p.process_guid,\n",
+ " p.process_command_line\n",
+ " FROM security_4624_3 s\n",
+ " INNER JOIN sysmon_1 p\n",
+ " ON s.user_logon_id = p.user_logon_id\n",
+ " '''\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "+--------------------+-------------+---------------+---------+-------------------+---------------------------+--------------+--------------------+\n",
+ "| @timestamp| src_ip_addr| host_name|user_name|process_parent_name|process_parent_command_line| process_name|process_command_line|\n",
+ "+--------------------+-------------+---------------+---------+-------------------+---------------------------+--------------+--------------------+\n",
+ "|2019-05-18 21:45:...|172.18.39.106|IT001.shire.com| pgustavo| wmiprvse.exe| c:\\windows\\system...|powershell.exe|c:\\windows\\system...|\n",
+ "|2019-05-18 21:45:...|172.18.39.106|IT001.shire.com| pgustavo| powershell.exe| c:\\windows\\system...| conhost.exe|\\??\\c:\\windows\\sy...|\n",
+ "|2019-05-18 21:45:...|172.18.39.106|IT001.shire.com| pgustavo| powershell.exe| c:\\windows\\system...| whoami.exe|\"c:\\windows\\syste...|\n",
+ "+--------------------+-------------+---------------+---------+-------------------+---------------------------+--------------+--------------------+\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "security_sysmon_join.select(\n",
+ " \"@timestamp\",\"src_ip_addr\",\"host_name\",\"user_name\",\"process_parent_name\",\"process_parent_command_line\",\"process_name\",\"process_command_line\"\n",
+ ").show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "PySpark_Python3",
+ "language": "python",
+ "name": "pyspark3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}