From d10231195d2fbf65a31548088ad09c4906538dff Mon Sep 17 00:00:00 2001 From: Roberto Rodriguez Date: Sun, 7 Jul 2019 19:58:44 -0400 Subject: [PATCH] Jupyter Updates + Updated notebooks connection to ES + Updaed Jupyter Image --- README.md | 11 +- docker/helk-jupyter/Dockerfile | 11 +- .../05-Intro_pyspark_sparkSQL_sysmon.ipynb | 218 ++++++++------- .../06-Intro_pyspark_graphframes_sysmon.ipynb | 99 +++---- .../07-pyspark-sparkSQL_tables.ipynb | 261 ++++++++++++++++++ 5 files changed, 447 insertions(+), 153 deletions(-) create mode 100644 docker/helk-jupyter/notebooks/07-pyspark-sparkSQL_tables.ipynb diff --git a/README.md b/README.md index fc30c01..82aa7d5 100644 --- a/README.md +++ b/README.md @@ -7,17 +7,16 @@ [![Open Source Love svg1](https://badges.frapsoft.com/os/v1/open-source.svg?v=103)](https://github.com/ellerbrock/open-source-badges/) -A Hunting ELK (Elasticsearch, Logstash, Kibana) with advanced analytic capabilities. +The Hunting ELK or simply the HELK is one of the first open source hunt platforms with advanced analytics capabilities such as SQL declarative language, graphing, structured streaming, and even machine learning via Jupyter notebooks and Apache Spark over an ELK stack. This project was developed primarily for research, but due to its flexible design and core components, it can be deployed in larger environments with the right configurations and scalable infrastructure. ![alt text](resources/images/HELK_Design.png "HELK Infrastructure") # Goals -* Provide a free hunting platform to the community and share the basics of Threat Hunting. -* Make sense of a large amount of event logs and add more context to suspicious events during hunting. -* Expedite the time it takes to deploy an ELK stack. -* Improve the testing of hunting use cases in an easier and more affordable way. -* Enable Data Science via Apache Spark, GraphFrames & Jupyter Notebooks. +* Provide an open source hunting platform to the community and share the basics of Threat Hunting. +* Expedite the time it takes to deploy a hunt platform. +* Improve the testing and development of hunting use cases in an easier and more affordable way. +* Enable Data Science capabilities while analyzing data via Apache Spark, GraphFrames & Jupyter Notebooks. # Current Status: Alpha diff --git a/docker/helk-jupyter/Dockerfile b/docker/helk-jupyter/Dockerfile index d297573..86ae4e7 100644 --- a/docker/helk-jupyter/Dockerfile +++ b/docker/helk-jupyter/Dockerfile @@ -3,11 +3,16 @@ # Author: Roberto Rodriguez (@Cyb3rWard0g) # License: GPL-3.0 -FROM cyb3rward0g/jupyter-hunt:0.0.1 +FROM cyb3rward0g/jupyter-hunt:0.0.2 LABEL maintainer="Roberto Rodriguez @Cyb3rWard0g" LABEL description="Dockerfile Notebooks-Forge Jupyter-Hunt Project." +USER root # ********** Adding HELK Jupyter notebooks RUN mkdir /opt/helk/jupyter/notebooks/datasets -COPY --chown=jupyter:810 notebooks/* /opt/helk/jupyter/notebooks/ -COPY --chown=jupyter:810 datasets/* /opt/helk/jupyter/notebooks/datasets/ +COPY notebooks/* /opt/helk/jupyter/notebooks/ +COPY datasets/* /opt/helk/jupyter/notebooks/datasets/ +RUN chown -R ${USER} /opt/helk/jupyter/notebooks/ + +USER ${USER} + diff --git a/docker/helk-jupyter/notebooks/05-Intro_pyspark_sparkSQL_sysmon.ipynb b/docker/helk-jupyter/notebooks/05-Intro_pyspark_sparkSQL_sysmon.ipynb index cd55863..ae6d69e 100644 --- a/docker/helk-jupyter/notebooks/05-Intro_pyspark_sparkSQL_sysmon.ipynb +++ b/docker/helk-jupyter/notebooks/05-Intro_pyspark_sparkSQL_sysmon.ipynb @@ -57,15 +57,8 @@ "spark = SparkSession.builder \\\n", " .appName(\"HELK Reader\") \\\n", " .master(\"spark://helk-spark-master:7077\") \\\n", - " .config(\"es.read.field.as.array.include\", \"tags\") \\\n", - " .config(\"es.nodes\",\"helk-elasticsearch:9200\") \\\n", - " .config(\"es.net.http.auth.user\",\"elastic\") \\\n", - " .config(\"es.net.http.auth.pass\",\"elasticpassword\") \\\n", " .enableHiveSupport() \\\n", - " .getOrCreate()\n", - " #PLEASE REMEMBER!!!!\n", - " #If you are using elastic TRIAL license, then you need the es.net.http.auth.pass value\n", - " #If you are using elastic BASIC license, then you can remove the es.net.http.auth.pass value" + " .getOrCreate()" ] }, { @@ -90,11 +83,11 @@ "
\n", "

SparkContext

\n", "\n", - "

Spark UI

\n", + "

Spark UI

\n", "\n", "
\n", "
Version
\n", - "
v2.4.0
\n", + "
v2.4.3
\n", "
Master
\n", "
spark://helk-spark-master:7077
\n", "
AppName
\n", @@ -106,7 +99,7 @@ " " ], "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -151,11 +144,16 @@ "metadata": {}, "outputs": [], "source": [ - "es_reader = (spark\n", - " .read\n", - " .format(\"org.elasticsearch.spark.sql\")\n", - " .option(\"inferSchema\", \"true\")\n", - ")" + "es_reader = (spark.read\n", + " .format(\"org.elasticsearch.spark.sql\")\n", + " .option(\"inferSchema\", \"true\")\n", + " .option(\"es.read.field.as.array.include\", \"tags\")\n", + " .option(\"es.nodes\",\"helk-elasticsearch:9200\")\n", + " .option(\"es.net.http.auth.user\",\"elastic\")\n", + ")\n", + " #PLEASE REMEMBER!!!!\n", + " #If you are using elastic TRIAL license, then you need the es.net.http.auth.pass config option set\n", + " #Example: .option(\"es.net.http.auth.pass\",\"elasticpassword\")" ] }, { @@ -175,8 +173,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 4 ms, sys: 0 ns, total: 4 ms\n", - "Wall time: 1.35 s\n" + "CPU times: user 0 ns, sys: 4 ms, total: 4 ms\n", + "Wall time: 3.86 s\n" ] } ], @@ -240,24 +238,24 @@ "name": "stdout", "output_type": "stream", "text": [ - "+--------------------+-------------------+---------------------------+-------------+--------------------+-------------+--------------------+\n", - "| process_guid|process_parent_name|process_parent_command_line| process_name|process_command_line| action| @timestamp|\n", - "+--------------------+-------------------+---------------------------+-------------+--------------------+-------------+--------------------+\n", - "|1C9FDC81-9806-5C6...| cmd.exe| c:\\windows\\system...| conhost.exe|\\??\\c:\\windows\\sy...|processcreate|2019-02-22 06:34:...|\n", - "|1C9FDC81-9806-5C6...| svchost.exe| c:\\windows\\system...|taskhostw.exe|taskhostw.exe ngc...|processcreate|2019-02-22 06:34:...|\n", - "|1C9FDC81-9807-5C6...| svchost.exe| c:\\windows\\system...| wsqmcons.exe|c:\\windows\\system...|processcreate|2019-02-22 06:34:...|\n", - "|1C9FDC81-9809-5C6...| gpupdate.exe| gpupdate.exe /tar...| conhost.exe|\\??\\c:\\windows\\sy...|processcreate|2019-02-22 06:34:...|\n", - "|1C9FDC81-980A-5C6...| services.exe| c:\\windows\\system...| svchost.exe|c:\\windows\\system...|processcreate|2019-02-22 06:34:...|\n", - "|1C9FDC81-980A-5C6...| svchost.exe| c:\\windows\\system...| wermgr.exe|c:\\windows\\system...|processcreate|2019-02-22 06:34:...|\n", - "|1C9FDC81-980B-5C6...| services.exe| c:\\windows\\system...| svchost.exe|c:\\windows\\system...|processcreate|2019-02-22 06:34:...|\n", - "|1C9FDC81-980C-5C6...| svchost.exe| c:\\windows\\system...|taskhostw.exe|taskhostw.exe net...|processcreate|2019-02-22 06:34:...|\n", - "|1C9FDC81-980D-5C6...| svchost.exe| c:\\windows\\system...| hxtsr.exe|\"c:\\program files...|processcreate|2019-02-22 06:34:...|\n", - "|1C9FDC81-9806-5C6...| vmtoolsd.exe| \"c:\\program files...| cmd.exe|c:\\windows\\system...|processcreate|2019-02-22 06:34:...|\n", - "+--------------------+-------------------+---------------------------+-------------+--------------------+-------------+--------------------+\n", + "+--------------------+-------------------+---------------------------+--------------------+--------------------+-------------+--------------------+\n", + "| process_guid|process_parent_name|process_parent_command_line| process_name|process_command_line| action| @timestamp|\n", + "+--------------------+-------------------+---------------------------+--------------------+--------------------+-------------+--------------------+\n", + "|aa6b4a20-7cd9-5ce...| svchost.exe| c:\\windows\\system...|backgroundtaskhos...|\"c:\\windows\\syste...|processcreate|2019-05-18 21:44:...|\n", + "|aa6b4a20-7cdf-5ce...| powershell.exe| c:\\windows\\system...| conhost.exe|\\??\\c:\\windows\\sy...|processcreate|2019-05-18 21:45:...|\n", + "|aa6b4a20-7d15-5ce...| svchost.exe| c:\\windows\\system...|backgroundtaskhos...|\"c:\\windows\\syste...|processcreate|2019-05-18 21:45:...|\n", + "|aa6b4a20-7d16-5ce...| svchost.exe| c:\\windows\\system...| runtimebroker.exe|c:\\windows\\system...|processcreate|2019-05-18 21:45:...|\n", + "|03ba39f5-7d20-5ce...| svchost.exe| c:\\windows\\system...| gpupdate.exe|gpupdate.exe /tar...|processcreate|2019-05-18 21:46:...|\n", + "|03ba39f5-7d20-5ce...| gpupdate.exe| gpupdate.exe /tar...| conhost.exe|\\??\\c:\\windows\\sy...|processcreate|2019-05-18 21:46:...|\n", + "|03ba39f5-7d20-5ce...| services.exe| c:\\windows\\system...| svchost.exe|c:\\windows\\system...|processcreate|2019-05-18 21:46:...|\n", + "|aa6b4a20-7ce9-5ce...| svchost.exe| c:\\windows\\system...| taskhostw.exe|taskhostw.exe key...|processcreate|2019-05-18 21:45:...|\n", + "|aa6b4a20-7cea-5ce...| svchost.exe| c:\\windows\\system...| dllhost.exe|c:\\windows\\system...|processcreate|2019-05-18 21:45:...|\n", + "|aa6b4a20-7cea-5ce...| svchost.exe| c:\\windows\\system...| taskhostw.exe| taskhostw.exe|processcreate|2019-05-18 21:45:...|\n", + "+--------------------+-------------------+---------------------------+--------------------+--------------------+-------------+--------------------+\n", "only showing top 10 rows\n", "\n", - "CPU times: user 4 ms, sys: 0 ns, total: 4 ms\n", - "Wall time: 10.2 s\n" + "CPU times: user 0 ns, sys: 4 ms, total: 4 ms\n", + "Wall time: 14.6 s\n" ] } ], @@ -323,20 +321,20 @@ "name": "stdout", "output_type": "stream", "text": [ - "+------------------------------------+---------------+--------+-------------------------------+--------------+-----------------------+\n", - "|process_guid |dst_ip_addr |dst_port|dst_host_name |action |@timestamp |\n", - "+------------------------------------+---------------+--------+-------------------------------+--------------+-----------------------+\n", - "|1C9FDC81-84E5-5C6D-0000-001060530400|239.255.255.250|1900 |null |networkconnect|2019-02-22 06:34:47.078|\n", - "|1C9FDC81-84E5-5C6D-0000-001060530400|127.0.0.1 |56783 |desktop-lfd11qp.rivendell.local|networkconnect|2019-02-22 06:34:47.078|\n", - "|1C9FDC81-84E5-5C6D-0000-001060530400|null |56781 |desktop-lfd11qp.rivendell.local|networkconnect|2019-02-22 06:34:47.484|\n", - "|1C9FDC81-84CA-5C6D-0000-0010262D0100|null |53 |null |networkconnect|2019-02-22 06:34:49.839|\n", - "|1C9FDC81-84CA-5C6D-0000-0010262D0100|null |53 |null |networkconnect|2019-02-22 06:34:49.839|\n", - "|1C9FDC81-84CA-5C6D-0000-0010262D0100|null |5355 |null |networkconnect|2019-02-22 06:34:50.714|\n", - "|1C9FDC81-84CA-5C6D-0000-0010262D0100|192.168.64.2 |53 |null |networkconnect|2019-02-22 06:34:50.714|\n", - "|1C9FDC81-84C4-5C6D-0000-0010EB030000|192.168.64.255 |137 |null |networkconnect|2019-02-22 06:34:53.942|\n", - "|1C9FDC81-84C4-5C6D-0000-0010EB030000|192.168.64.137 |137 |desktop-lfd11qp.rivendell.local|networkconnect|2019-02-22 06:34:53.942|\n", - "|1C9FDC81-84E5-5C6D-0000-001060530400|null |61557 |desktop-lfd11qp.rivendell.local|networkconnect|2019-02-22 06:34:47.484|\n", - "+------------------------------------+---------------+--------+-------------------------------+--------------+-----------------------+\n", + "+------------------------------------+-------------+--------+-------------+--------------+-----------------------+\n", + "|process_guid |dst_ip_addr |dst_port|dst_host_name|action |@timestamp |\n", + "+------------------------------------+-------------+--------+-------------+--------------+-----------------------+\n", + "|03ba39f5-50b2-5ce0-0000-00109995c501|10.0.10.106 |443 |null |networkconnect|2019-05-18 21:44:43.063|\n", + "|aa6b4a20-7b8d-5ce0-0000-001028031c00|10.0.10.106 |443 |null |networkconnect|2019-05-18 21:44:51.333|\n", + "|905CC552-2045-5CC5-0000-00105B2A0100|172.18.39.102|5985 |null |networkconnect|2019-05-18 21:44:53.257|\n", + "|03ba39f5-652c-5ce0-0000-0010760bff01|10.0.10.106 |443 |null |networkconnect|2019-05-18 21:44:53.484|\n", + "|03ba39f5-6e79-5ce0-0000-001032d21002|10.0.10.106 |443 |null |networkconnect|2019-05-18 21:44:58.094|\n", + "|03ba39f5-50b2-5ce0-0000-00109995c501|10.0.10.106 |443 |null |networkconnect|2019-05-18 21:44:58.609|\n", + "|03ba39f5-652c-5ce0-0000-0010760bff01|10.0.10.106 |443 |null |networkconnect|2019-05-18 21:44:58.609|\n", + "|03ba39f5-ea63-5ccb-0000-001050e60000|172.18.39.105|135 |it001 |networkconnect|2019-05-18 21:45:03.297|\n", + "|03ba39f5-6e79-5ce0-0000-001032d21002|10.0.10.106 |443 |null |networkconnect|2019-05-18 21:45:03.562|\n", + "|03ba39f5-652c-5ce0-0000-0010760bff01|10.0.10.106 |443 |null |networkconnect|2019-05-18 21:45:03.812|\n", + "+------------------------------------+-------------+--------+-------------+--------------+-----------------------+\n", "only showing top 10 rows\n", "\n" ] @@ -380,24 +378,24 @@ "name": "stdout", "output_type": "stream", "text": [ - "+------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------+-----------------------+\n", - "|process_guid |file_name |action |@timestamp |\n", - "+------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------+-----------------------+\n", - "|1C9FDC81-980B-5C6F-0000-00109B5CD100|c:\\programdata\\regid.1991-06.com.microsoft\\regid.1991-06.com.microsoft_windows-10-pro.swidtag |filecreate|2019-02-22 06:34:52.38 |\n", - "|1C9FDC81-850A-5C6D-0000-0010978A0500|c:\\users\\cbrown\\appdata\\local\\microsoft\\penworkspace\\discovercachedata.dat |filecreate|2019-02-22 06:34:53.328|\n", - "|1C9FDC81-84CA-5C6D-0000-00109F2C0100|c:\\windows\\prefetch\\ipconfig.exe-eea91845.pf |filecreate|2019-02-22 06:34:53.841|\n", - "|1C9FDC81-850E-5C6D-0000-001049410600|c:\\users\\cbrown\\appdata\\local\\packages\\microsoft.windows.cortana_cw5n1h2txyewy\\localstate\\devicesearchcache\\appcache131952908968534553.txt |filecreate|2019-02-22 06:34:59.983|\n", - "|1C9FDC81-850E-5C6D-0000-001049410600|c:\\users\\cbrown\\appdata\\local\\packages\\microsoft.windows.cortana_cw5n1h2txyewy\\localstate\\constraintindex\\apps_{34237869-b2e1-400f-8de7-90f3e51dd298} |filecreate|2019-02-22 06:35:00.621|\n", - "|1C9FDC81-850E-5C6D-0000-001049410600|c:\\users\\cbrown\\appdata\\local\\packages\\microsoft.windows.cortana_cw5n1h2txyewy\\localstate\\constraintindex\\apps_{34237869-b2e1-400f-8de7-90f3e51dd298}\\0.0.filtertrie.intermediate.txt|filecreate|2019-02-22 06:35:00.752|\n", - "|1C9FDC81-850E-5C6D-0000-001049410600|c:\\users\\cbrown\\appdata\\local\\packages\\microsoft.windows.cortana_cw5n1h2txyewy\\localstate\\constraintindex\\apps_{34237869-b2e1-400f-8de7-90f3e51dd298}\\0.2.filtertrie.intermediate.txt|filecreate|2019-02-22 06:35:00.756|\n", - "|1C9FDC81-850E-5C6D-0000-001049410600|c:\\users\\cbrown\\appdata\\local\\packages\\microsoft.windows.cortana_cw5n1h2txyewy\\localstate\\constraintindex\\apps_{34237869-b2e1-400f-8de7-90f3e51dd298}\\0.1.filtertrie.intermediate.txt|filecreate|2019-02-22 06:35:00.755|\n", - "|1C9FDC81-850E-5C6D-0000-001049410600|c:\\users\\cbrown\\appdata\\local\\packages\\microsoft.windows.cortana_cw5n1h2txyewy\\localstate\\constraintindex\\apps_{34237869-b2e1-400f-8de7-90f3e51dd298}\\apps.ft |filecreate|2019-02-22 06:35:00.771|\n", - "|1C9FDC81-84CA-5C6D-0000-00109F2C0100|c:\\windows\\prefetch\\conhost.exe-f98a1078.pf |filecreate|2019-02-22 06:35:03.776|\n", - "+------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------+-----------------------+\n", + "+------------------------------------+----------------------------------------------------------------------------------------------+----------+-----------------------+\n", + "|process_guid |file_name |action |@timestamp |\n", + "+------------------------------------+----------------------------------------------------------------------------------------------+----------+-----------------------+\n", + "|aa6b4a20-7cde-5ce0-0000-00109ea71e00|c:\\users\\pgustavo\\appdata\\local\\temp\\__psscriptpolicytest_kld4kxox.voz.ps1 |filecreate|2019-05-18 21:45:04.958|\n", + "|aa6b4a20-7cde-5ce0-0000-00109ea71e00|c:\\users\\pgustavo\\appdata\\local\\temp\\__psscriptpolicytest_4ksn3cia.csg.psm1 |filecreate|2019-05-18 21:45:04.958|\n", + "|905CC552-2042-5CC5-0000-00103D150100|c:\\windows\\serviceprofiles\\localservice\\appdata\\local\\lastalive1.dat |filecreate|2019-05-18 21:45:11.649|\n", + "|aa6b4a20-7719-5ce0-0000-001068a30000|c:\\windows\\temp\\his33a6.tmp |filecreate|2019-05-18 21:45:11.796|\n", + "|aa6b4a20-7735-5ce0-0000-001033f10100|c:\\windows\\system32\\sleepstudy\\screenon\\screenonpowerstudytracesession-2019-05-18-17-45-11.etl|filecreate|2019-05-18 21:45:11.99 |\n", + "|aa6b4a20-7719-5ce0-0000-001068a30000|c:\\windows\\temp\\hisf7a6.tmp |filecreate|2019-05-18 21:44:56.433|\n", + "|aa6b4a20-7cde-5ce0-0000-00109ea71e00|c:\\users\\pgustavo\\documents\\20190518\\powershell_transcript.it001.rhzmf_up.20190518174505.txt |filecreate|2019-05-18 21:45:05.573|\n", + "|aa6b4a20-771f-5ce0-0000-00108a420100|c:\\windows\\prefetch\\powershell.exe-920bba2a.pf |filecreate|2019-05-18 21:45:15.754|\n", + "|aa6b4a20-771f-5ce0-0000-00108a420100|c:\\windows\\prefetch\\dllhost.exe-d8e67ed6.pf |filecreate|2019-05-18 21:45:26.147|\n", + "|aa6b4a20-771f-5ce0-0000-00108a420100|c:\\windows\\prefetch\\wmiprvse.exe-1628051c.pf |filecreate|2019-05-18 21:45:15.648|\n", + "+------------------------------------+----------------------------------------------------------------------------------------------+----------+-----------------------+\n", "only showing top 10 rows\n", "\n", - "CPU times: user 0 ns, sys: 4 ms, total: 4 ms\n", - "Wall time: 334 ms\n" + "CPU times: user 0 ns, sys: 0 ns, total: 0 ns\n", + "Wall time: 507 ms\n" ] } ], @@ -452,18 +450,34 @@ "name": "stdout", "output_type": "stream", "text": [ - "+-------------------+----------------------+--------------+\n", - "|process_parent_name|process_name |dst_ip_addr |\n", - "+-------------------+----------------------+--------------+\n", - "|svchost.exe |backgroundtaskhost.exe|204.79.197.200|\n", - "|svchost.exe |backgroundtaskhost.exe|40.112.91.29 |\n", - "|svchost.exe |backgroundtaskhost.exe|40.112.91.29 |\n", - "|svchost.exe |backgroundtaskhost.exe|40.112.91.29 |\n", - "|svchost.exe |backgroundtaskhost.exe|40.112.91.29 |\n", - "+-------------------+----------------------+--------------+\n", + "+-------------------+-------------------+--------------+\n", + "|process_parent_name|process_name |dst_ip_addr |\n", + "+-------------------+-------------------+--------------+\n", + "|svchost.exe |microsoftedgecp.exe|13.107.21.200 |\n", + "|svchost.exe |microsoftedgecp.exe|13.107.21.200 |\n", + "|svchost.exe |microsoftedgecp.exe|204.79.197.200|\n", + "|svchost.exe |microsoftedgecp.exe|72.30.2.182 |\n", + "|svchost.exe |microsoftedgecp.exe|204.79.197.203|\n", + "|svchost.exe |microsoftedgecp.exe|23.50.228.129 |\n", + "|svchost.exe |microsoftedgecp.exe|23.194.130.152|\n", + "|svchost.exe |microsoftedgecp.exe|23.194.130.145|\n", + "|wmiprvse.exe |powershell.exe |10.0.10.106 |\n", + "|wmiprvse.exe |powershell.exe |10.0.10.106 |\n", + "|wmiprvse.exe |powershell.exe |10.0.10.106 |\n", + "|wmiprvse.exe |powershell.exe |10.0.10.106 |\n", + "|wmiprvse.exe |powershell.exe |10.0.10.106 |\n", + "|wmiprvse.exe |powershell.exe |10.0.10.106 |\n", + "|wmiprvse.exe |powershell.exe |10.0.10.106 |\n", + "|wmiprvse.exe |powershell.exe |10.0.10.106 |\n", + "|wmiprvse.exe |powershell.exe |10.0.10.106 |\n", + "|wmiprvse.exe |powershell.exe |10.0.10.106 |\n", + "|wmiprvse.exe |powershell.exe |10.0.10.106 |\n", + "|wmiprvse.exe |powershell.exe |10.0.10.106 |\n", + "+-------------------+-------------------+--------------+\n", + "only showing top 20 rows\n", "\n", "CPU times: user 4 ms, sys: 0 ns, total: 4 ms\n", - "Wall time: 6.57 s\n" + "Wall time: 5.05 s\n" ] } ], @@ -484,11 +498,14 @@ "+-------------------+-----+\n", "|process_parent_name|count|\n", "+-------------------+-----+\n", - "| svchost.exe| 5|\n", + "| wscript.exe| 49|\n", + "| wmiprvse.exe| 25|\n", + "| svchost.exe| 9|\n", + "| services.exe| 2|\n", "+-------------------+-----+\n", "\n", - "CPU times: user 4 ms, sys: 4 ms, total: 8 ms\n", - "Wall time: 7.38 s\n" + "CPU times: user 32 ms, sys: 80 ms, total: 112 ms\n", + "Wall time: 18.2 s\n" ] } ], @@ -499,25 +516,26 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "+------------------------------------------------+----------------------+--------------+\n", - "|process_parent_command_line |process_name |dst_ip_addr |\n", - "+------------------------------------------------+----------------------+--------------+\n", - "|c:\\windows\\system32\\svchost.exe -k dcomlaunch -p|backgroundtaskhost.exe|204.79.197.200|\n", - "|c:\\windows\\system32\\svchost.exe -k dcomlaunch -p|backgroundtaskhost.exe|40.112.91.29 |\n", - "|c:\\windows\\system32\\svchost.exe -k dcomlaunch -p|backgroundtaskhost.exe|40.112.91.29 |\n", - "|c:\\windows\\system32\\svchost.exe -k dcomlaunch -p|backgroundtaskhost.exe|40.112.91.29 |\n", - "|c:\\windows\\system32\\svchost.exe -k dcomlaunch -p|backgroundtaskhost.exe|40.112.91.29 |\n", - "+------------------------------------------------+----------------------+--------------+\n", + "+------------------------------------------------+-------------------+--------------+\n", + "|process_parent_command_line |process_name |dst_ip_addr |\n", + "+------------------------------------------------+-------------------+--------------+\n", + "|c:\\windows\\system32\\svchost.exe -k dcomlaunch -p|microsoftedgecp.exe|13.107.21.200 |\n", + "|c:\\windows\\system32\\svchost.exe -k dcomlaunch -p|microsoftedgecp.exe|13.107.21.200 |\n", + "|c:\\windows\\system32\\svchost.exe -k dcomlaunch -p|microsoftedgecp.exe|204.79.197.200|\n", + "|c:\\windows\\system32\\svchost.exe -k dcomlaunch -p|microsoftedgecp.exe|72.30.2.182 |\n", + "|c:\\windows\\system32\\svchost.exe -k dcomlaunch -p|microsoftedgecp.exe|204.79.197.203|\n", + "+------------------------------------------------+-------------------+--------------+\n", + "only showing top 5 rows\n", "\n", - "CPU times: user 0 ns, sys: 12 ms, total: 12 ms\n", - "Wall time: 2.4 s\n" + "CPU times: user 12 ms, sys: 32 ms, total: 44 ms\n", + "Wall time: 14.1 s\n" ] } ], @@ -547,7 +565,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -556,7 +574,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -566,12 +584,13 @@ "+-------------------+-----+\n", "|process_parent_name|count|\n", "+-------------------+-----+\n", - "| services.exe| 2|\n", - "| svchost.exe| 19|\n", + "| wmiprvse.exe| 3|\n", + "| wscript.exe| 4|\n", + "| svchost.exe| 5|\n", "+-------------------+-----+\n", "\n", - "CPU times: user 4 ms, sys: 4 ms, total: 8 ms\n", - "Wall time: 4.44 s\n" + "CPU times: user 0 ns, sys: 220 ms, total: 220 ms\n", + "Wall time: 24.6 s\n" ] } ], @@ -579,6 +598,13 @@ "%%time\n", "process_file_df.groupBy('process_parent_name').count().sort('count').show()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -597,7 +623,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.7" + "version": "3.7.3" } }, "nbformat": 4, diff --git a/docker/helk-jupyter/notebooks/06-Intro_pyspark_graphframes_sysmon.ipynb b/docker/helk-jupyter/notebooks/06-Intro_pyspark_graphframes_sysmon.ipynb index f2eab01..931e6d0 100644 --- a/docker/helk-jupyter/notebooks/06-Intro_pyspark_graphframes_sysmon.ipynb +++ b/docker/helk-jupyter/notebooks/06-Intro_pyspark_graphframes_sysmon.ipynb @@ -80,15 +80,8 @@ "spark = SparkSession.builder \\\n", " .appName(\"HELK Graphs\") \\\n", " .master(\"spark://helk-spark-master:7077\") \\\n", - " .config(\"es.read.field.as.array.include\", \"tags\") \\\n", - " .config(\"es.nodes\",\"helk-elasticsearch:9200\") \\\n", - " .config(\"es.net.http.auth.user\",\"elastic\") \\\n", - " .config(\"es.net.http.auth.pass\",\"elasticpassword\") \\\n", " .enableHiveSupport() \\\n", - " .getOrCreate()\n", - " #PLEASE REMEMBER!!!!\n", - " #If you are using elastic TRIAL license, then you need the es.net.http.auth.pass value\n", - " #If you are using elastic BASIC license, then you can remove the es.net.http.auth.pass value" + " .getOrCreate()" ] }, { @@ -113,11 +106,11 @@ "
\n", "

SparkContext

\n", "\n", - "

Spark UI

\n", + "

Spark UI

\n", "\n", "
\n", "
Version
\n", - "
v2.4.0
\n", + "
v2.4.3
\n", "
Master
\n", "
spark://helk-spark-master:7077
\n", "
AppName
\n", @@ -129,7 +122,7 @@ " " ], "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -174,8 +167,8 @@ "| b| 2|\n", "+---+--------+\n", "\n", - "CPU times: user 492 ms, sys: 1.2 s, total: 1.69 s\n", - "Wall time: 53.2 s\n" + "CPU times: user 348 ms, sys: 380 ms, total: 728 ms\n", + "Wall time: 31.6 s\n" ] } ], @@ -237,11 +230,16 @@ "metadata": {}, "outputs": [], "source": [ - "es_reader = (spark\n", - " .read\n", - " .format(\"org.elasticsearch.spark.sql\")\n", - " .option(\"inferSchema\", \"true\")\n", - ")" + "es_reader = (spark.read\n", + " .format(\"org.elasticsearch.spark.sql\")\n", + " .option(\"inferSchema\", \"true\")\n", + " .option(\"es.read.field.as.array.include\", \"tags\")\n", + " .option(\"es.nodes\",\"helk-elasticsearch:9200\")\n", + " .option(\"es.net.http.auth.user\",\"elastic\")\n", + ")\n", + " #PLEASE REMEMBER!!!!\n", + " #If you are using elastic TRIAL license, then you need the es.net.http.auth.pass config option set\n", + " #Example: .option(\"es.net.http.auth.pass\",\"elasticpassword\")" ] }, { @@ -261,8 +259,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 0 ns, sys: 4 ms, total: 4 ms\n", - "Wall time: 2.2 s\n" + "CPU times: user 4 ms, sys: 0 ns, total: 4 ms\n", + "Wall time: 1.58 s\n" ] } ], @@ -317,19 +315,19 @@ "name": "stdout", "output_type": "stream", "text": [ - "+------------------------------------+---------------+-------------------------------+-------------------+-------------+-------------+\n", - "|id |user_name |host_name |process_parent_name|process_name |action |\n", - "+------------------------------------+---------------+-------------------------------+-------------------+-------------+-------------+\n", - "|1C9FDC81-9806-5C6F-0000-00100CDDD000|system |DESKTOP-LFD11QP.RIVENDELL.local|cmd.exe |conhost.exe |processcreate|\n", - "|1C9FDC81-9806-5C6F-0000-001051DAD000|system |DESKTOP-LFD11QP.RIVENDELL.local|svchost.exe |taskhostw.exe|processcreate|\n", - "|1C9FDC81-9807-5C6F-0000-00100EEED000|system |DESKTOP-LFD11QP.RIVENDELL.local|svchost.exe |wsqmcons.exe |processcreate|\n", - "|1C9FDC81-9809-5C6F-0000-00100E28D100|network service|DESKTOP-LFD11QP.RIVENDELL.local|gpupdate.exe |conhost.exe |processcreate|\n", - "|1C9FDC81-980A-5C6F-0000-0010903BD100|cbrown |DESKTOP-LFD11QP.RIVENDELL.local|services.exe |svchost.exe |processcreate|\n", - "+------------------------------------+---------------+-------------------------------+-------------------+-------------+-------------+\n", + "+------------------------------------+---------------+---------------+-------------------+----------------------+-------------+\n", + "|id |user_name |host_name |process_parent_name|process_name |action |\n", + "+------------------------------------+---------------+---------------+-------------------+----------------------+-------------+\n", + "|aa6b4a20-7cd9-5ce0-0000-0010a3801e00|pgustavo |it001.shire.com|svchost.exe |backgroundtaskhost.exe|processcreate|\n", + "|aa6b4a20-7cdf-5ce0-0000-00105eac1e00|pgustavo |it001.shire.com|powershell.exe |conhost.exe |processcreate|\n", + "|aa6b4a20-7d15-5ce0-0000-0010f07f1f00|pgustavo |it001.shire.com|svchost.exe |backgroundtaskhost.exe|processcreate|\n", + "|aa6b4a20-7d16-5ce0-0000-001089921f00|pgustavo |it001.shire.com|svchost.exe |runtimebroker.exe |processcreate|\n", + "|03ba39f5-7d20-5ce0-0000-001052da2002|network service|hr001.shire.com|svchost.exe |gpupdate.exe |processcreate|\n", + "+------------------------------------+---------------+---------------+-------------------+----------------------+-------------+\n", "only showing top 5 rows\n", "\n", - "CPU times: user 0 ns, sys: 4 ms, total: 4 ms\n", - "Wall time: 2.55 s\n" + "CPU times: user 0 ns, sys: 0 ns, total: 0 ns\n", + "Wall time: 1.83 s\n" ] } ], @@ -387,16 +385,16 @@ "+------------------------------------+------------------------------------+------------+\n", "|src |dst |relationship|\n", "+------------------------------------+------------------------------------+------------+\n", - "|1C9FDC81-9806-5C6F-0000-001054D8D000|1C9FDC81-9806-5C6F-0000-00100CDDD000|spawned |\n", - "|1C9FDC81-84C9-5C6D-0000-001065210100|1C9FDC81-9806-5C6F-0000-001051DAD000|spawned |\n", - "|1C9FDC81-84C9-5C6D-0000-001065210100|1C9FDC81-9807-5C6F-0000-00100EEED000|spawned |\n", - "|1C9FDC81-9806-5C6F-0000-00102CEAD000|1C9FDC81-9809-5C6F-0000-00100E28D100|spawned |\n", - "|1C9FDC81-84C7-5C6D-0000-001025A90000|1C9FDC81-980A-5C6F-0000-0010903BD100|spawned |\n", + "|aa6b4a20-7719-5ce0-0000-001068a30000|aa6b4a20-7cd9-5ce0-0000-0010a3801e00|spawned |\n", + "|aa6b4a20-7cde-5ce0-0000-00109ea71e00|aa6b4a20-7cdf-5ce0-0000-00105eac1e00|spawned |\n", + "|aa6b4a20-7719-5ce0-0000-001068a30000|aa6b4a20-7d15-5ce0-0000-0010f07f1f00|spawned |\n", + "|aa6b4a20-7719-5ce0-0000-001068a30000|aa6b4a20-7d16-5ce0-0000-001089921f00|spawned |\n", + "|03ba39f5-ea64-5ccb-0000-0010c91c0100|03ba39f5-7d20-5ce0-0000-001052da2002|spawned |\n", "+------------------------------------+------------------------------------+------------+\n", "only showing top 5 rows\n", "\n", - "CPU times: user 0 ns, sys: 4 ms, total: 4 ms\n", - "Wall time: 458 ms\n" + "CPU times: user 4 ms, sys: 0 ns, total: 4 ms\n", + "Wall time: 453 ms\n" ] } ], @@ -438,7 +436,7 @@ "output_type": "stream", "text": [ "CPU times: user 0 ns, sys: 4 ms, total: 4 ms\n", - "Wall time: 208 ms\n" + "Wall time: 405 ms\n" ] } ], @@ -456,13 +454,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "+-------------------+------------+------------+------------+\n", - "|process_parent_name|process_name|process_name|process_name|\n", - "+-------------------+------------+------------+------------+\n", - "+-------------------+------------+------------+------------+\n", + "+-------------------+------------+--------------+------------+\n", + "|process_parent_name|process_name|process_name |process_name|\n", + "+-------------------+------------+--------------+------------+\n", + "|svchost.exe |wmiprvse.exe|powershell.exe|conhost.exe |\n", + "|svchost.exe |wmiprvse.exe|powershell.exe|whoami.exe |\n", + "|explorer.exe |wscript.exe |powershell.exe|conhost.exe |\n", + "+-------------------+------------+--------------+------------+\n", "\n", - "CPU times: user 4 ms, sys: 4 ms, total: 8 ms\n", - "Wall time: 18.1 s\n" + "CPU times: user 12 ms, sys: 0 ns, total: 12 ms\n", + "Wall time: 15.3 s\n" ] } ], @@ -486,10 +487,12 @@ "+-------------------+-----+\n", "|process_parent_name|count|\n", "+-------------------+-----+\n", + "| explorer.exe| 1|\n", + "| svchost.exe| 2|\n", "+-------------------+-----+\n", "\n", - "CPU times: user 12 ms, sys: 0 ns, total: 12 ms\n", - "Wall time: 12.9 s\n" + "CPU times: user 8 ms, sys: 0 ns, total: 8 ms\n", + "Wall time: 13.8 s\n" ] } ], @@ -515,7 +518,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.7" + "version": "3.7.3" } }, "nbformat": 4, diff --git a/docker/helk-jupyter/notebooks/07-pyspark-sparkSQL_tables.ipynb b/docker/helk-jupyter/notebooks/07-pyspark-sparkSQL_tables.ipynb new file mode 100644 index 0000000..9d7e039 --- /dev/null +++ b/docker/helk-jupyter/notebooks/07-pyspark-sparkSQL_tables.ipynb @@ -0,0 +1,261 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# **Spark SQL Tables via Pyspark**\n", + "----------------------------------------------------------------------------\n", + "## Goals:\n", + "* Practice Spark SQL via PySpark skills\n", + "* Ensure JupyterLab Server, Spark Cluster & Elasticsearch are communicating\n", + "* Practice Query execution via Pyspark\n", + "* Create template for future queries" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import SparkSession Class" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from pyspark.sql import SparkSession" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a SparkSession instance" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "spark = SparkSession.builder \\\n", + " .appName(\"HELK Reader\") \\\n", + " .master(\"spark://helk-spark-master:7077\") \\\n", + " .enableHiveSupport() \\\n", + " .getOrCreate()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read data from the HELK Elasticsearch via Spark SQL" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "es_reader = (spark.read\n", + " .format(\"org.elasticsearch.spark.sql\")\n", + " .option(\"inferSchema\", \"true\")\n", + " .option(\"es.read.field.as.array.include\", \"tags\")\n", + " .option(\"es.nodes\",\"helk-elasticsearch:9200\")\n", + " .option(\"es.net.http.auth.user\",\"elastic\")\n", + ")\n", + " #PLEASE REMEMBER!!!!\n", + " #If you are using elastic TRIAL license, then you need the es.net.http.auth.pass config option set\n", + " #Example: .option(\"es.net.http.auth.pass\",\"elasticpassword\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read Sysmon Events" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 0 ns, sys: 4 ms, total: 4 ms\n", + "Wall time: 1.99 s\n" + ] + } + ], + "source": [ + "%%time\n", + "sysmon_df = es_reader.load(\"logs-endpoint-winevent-sysmon-*/\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Register Sysmon SQL temporary View" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "sysmon_df.createOrReplaceTempView(\"sysmon_events\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "## Run SQL Queries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sysmon_ps_execution = spark.sql(\n", + " '''\n", + " SELECT event_id,process_parent_name,process_name\n", + " FROM sysmon_events\n", + " WHERE event_id = 1\n", + " AND process_name = \"powershell.exe\"\n", + " AND NOT process_parent_name = \"explorer.exe\"\n", + " '''\n", + ")\n", + "sysmon_ps_execution.show(10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sysmon_ps_module = spark.sql(\n", + " '''\n", + " SELECT event_id,process_name\n", + " FROM sysmon_events\n", + " WHERE event_id = 7 \n", + " AND (\n", + " lower(file_description) = \"system.management.automation\"\n", + " OR lower(module_loaded) LIKE \"%\\\\\\\\system.management.automation%\"\n", + " ) \n", + " '''\n", + ")\n", + "sysmon_ps_module.show(10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sysmon_ps_pipe = spark.sql(\n", + " '''\n", + " SELECT event_id,process_name\n", + " FROM sysmon_events\n", + " WHERE event_id = 17\n", + " AND lower(pipe_name) LIKE \"\\\\\\\\pshost%\"\n", + " '''\n", + ")\n", + "sysmon_ps_pipe.show(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read PowerShell Events" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "powershell_df = es_reader.load(\"logs-endpoint-winevent-powershell-*/\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Register PowerShell SQL temporary View" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "powershell_df.createOrReplaceTempView(\"powershell_events\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ps_named_pipe = spark.sql(\n", + " '''\n", + " SELECT event_id\n", + " FROM powershell_events\n", + " WHERE event_id = 53504\n", + " '''\n", + ")\n", + "ps_named_pipe.show(10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "PySpark_Python3", + "language": "python", + "name": "pyspark3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}