mirror of https://github.com/infosecn1nja/HELK.git
Added a few notebooks for testing
parent
b1516ee3c8
commit
7a6f6805e9
|
@ -8,6 +8,6 @@ LABEL maintainer="Roberto Rodriguez @Cyb3rWard0g"
|
|||
LABEL description="Dockerfile Notebooks-Forge Jupyter-Hunt Project."
|
||||
|
||||
# ********** Adding HELK Jupyter notebooks
|
||||
RUN mkdir /opt/helk/jupyter/datasets
|
||||
RUN mkdir /opt/helk/jupyter/notebooks/datasets
|
||||
COPY --chown=jupyter:810 notebooks/* /opt/helk/jupyter/notebooks/
|
||||
COPY --chown=jupyter:810 datasets/* /opt/helk/jupyter/datasets/
|
||||
COPY --chown=jupyter:810 datasets/* /opt/helk/jupyter/notebooks/datasets/
|
||||
|
|
|
@ -0,0 +1,218 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Import Libraries"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pyspark.sql import SparkSession"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create SparkSession"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"spark = SparkSession.builder \\\n",
|
||||
" .appName(\"HELK Reader\") \\\n",
|
||||
" .master(\"spark://helk-spark-master:7077\") \\\n",
|
||||
" .enableHiveSupport() \\\n",
|
||||
" .getOrCreate()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Verify Spark Variable"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"\n",
|
||||
" <div>\n",
|
||||
" <p><b>SparkSession - hive</b></p>\n",
|
||||
" \n",
|
||||
" <div>\n",
|
||||
" <p><b>SparkContext</b></p>\n",
|
||||
"\n",
|
||||
" <p><a href=\"http://1d254481cafc:4040\">Spark UI</a></p>\n",
|
||||
"\n",
|
||||
" <dl>\n",
|
||||
" <dt>Version</dt>\n",
|
||||
" <dd><code>v2.4.3</code></dd>\n",
|
||||
" <dt>Master</dt>\n",
|
||||
" <dd><code>spark://helk-spark-master:7077</code></dd>\n",
|
||||
" <dt>AppName</dt>\n",
|
||||
" <dd><code>HELK Reader</code></dd>\n",
|
||||
" </dl>\n",
|
||||
" </div>\n",
|
||||
" \n",
|
||||
" </div>\n",
|
||||
" "
|
||||
],
|
||||
"text/plain": [
|
||||
"<pyspark.sql.session.SparkSession at 0x7f6a1c8596a0>"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"spark"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initiate Elasticsearch Dataframe Reader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"es_reader = (spark.read\n",
|
||||
" .format(\"org.elasticsearch.spark.sql\")\n",
|
||||
" .option(\"inferSchema\", \"true\")\n",
|
||||
" .option(\"es.read.field.as.array.include\", \"tags\")\n",
|
||||
" .option(\"es.nodes\",\"helk-elasticsearch:9200\")\n",
|
||||
" .option(\"es.net.http.auth.user\",\"elastic\")\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load Data from Elasticsearch : Sysmon Index"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sysmon_df = es_reader.load(\"logs-endpoint-winevent-sysmon-*/\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"processcreate_df = sysmon_df.filter(sysmon_df.action == \"processcreate\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"processcreate_df = processcreate_df.select(\n",
|
||||
" \"process_guid\",\"process_parent_name\",\"process_parent_command_line\",\n",
|
||||
" \"process_name\",\"process_command_line\",\"action\",\"@timestamp\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Show Sysmon Spark DataFrame"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"+--------------------+-------------------+---------------------------+--------------------+--------------------+-------------+--------------------+\n",
|
||||
"| process_guid|process_parent_name|process_parent_command_line| process_name|process_command_line| action| @timestamp|\n",
|
||||
"+--------------------+-------------------+---------------------------+--------------------+--------------------+-------------+--------------------+\n",
|
||||
"|aa6b4a20-7cde-5ce...| svchost.exe| c:\\windows\\system...| wmiprvse.exe|c:\\windows\\system...|processcreate|2019-05-18 21:45:...|\n",
|
||||
"|aa6b4a20-7cde-5ce...| wmiprvse.exe| c:\\windows\\system...| powershell.exe|c:\\windows\\system...|processcreate|2019-05-18 21:45:...|\n",
|
||||
"|aa6b4a20-7cdf-5ce...| powershell.exe| c:\\windows\\system...| conhost.exe|\\??\\c:\\windows\\sy...|processcreate|2019-05-18 21:45:...|\n",
|
||||
"|aa6b4a20-7ce7-5ce...| winlogon.exe| winlogon.exe| logonui.exe|\"logonui.exe\" /fl...|processcreate|2019-05-18 21:45:...|\n",
|
||||
"|aa6b4a20-7ce9-5ce...| svchost.exe| c:\\windows\\system...| taskhostw.exe|taskhostw.exe key...|processcreate|2019-05-18 21:45:...|\n",
|
||||
"|aa6b4a20-7cd9-5ce...| svchost.exe| c:\\windows\\system...|backgroundtaskhos...|\"c:\\windows\\syste...|processcreate|2019-05-18 21:44:...|\n",
|
||||
"|aa6b4a20-7cda-5ce...| svchost.exe| c:\\windows\\system...| runtimebroker.exe|c:\\windows\\system...|processcreate|2019-05-18 21:44:...|\n",
|
||||
"|aa6b4a20-7cea-5ce...| svchost.exe| c:\\windows\\system...| dllhost.exe|c:\\windows\\system...|processcreate|2019-05-18 21:45:...|\n",
|
||||
"|aa6b4a20-7cea-5ce...| svchost.exe| c:\\windows\\system...| taskhostw.exe| taskhostw.exe|processcreate|2019-05-18 21:45:...|\n",
|
||||
"|aa6b4a20-7cec-5ce...| svchost.exe| c:\\windows\\system...| dllhost.exe|c:\\windows\\system...|processcreate|2019-05-18 21:45:...|\n",
|
||||
"+--------------------+-------------------+---------------------------+--------------------+--------------------+-------------+--------------------+\n",
|
||||
"only showing top 10 rows\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"processcreate_df.show(10)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"$"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "PySpark_Python3",
|
||||
"language": "python",
|
||||
"name": "pyspark3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -0,0 +1,225 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pyspark.sql import SparkSession"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"spark = SparkSession.builder \\\n",
|
||||
" .appName(\"HELK JOIN\") \\\n",
|
||||
" .master(\"spark://helk-spark-master:7077\") \\\n",
|
||||
" .enableHiveSupport() \\\n",
|
||||
" .getOrCreate()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"es_reader = (spark.read\n",
|
||||
" .format(\"org.elasticsearch.spark.sql\")\n",
|
||||
" .option(\"inferSchema\", \"true\")\n",
|
||||
" .option(\"es.read.field.as.array.include\", \"tags\")\n",
|
||||
" .option(\"es.nodes\",\"helk-elasticsearch:9200\")\n",
|
||||
" .option(\"es.net.http.auth.user\",\"elastic\")\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"security_df = es_reader.load(\"logs-endpoint-winevent-security-*/\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"security_df.createOrReplaceTempView(\"security_events\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"security_4624_3 = spark.sql(\n",
|
||||
" '''\n",
|
||||
" SELECT event_id,\n",
|
||||
" host_name,\n",
|
||||
" src_ip_addr,\n",
|
||||
" user_logon_id,\n",
|
||||
" user_name,\n",
|
||||
" logon_type,\n",
|
||||
" `@timestamp`\n",
|
||||
" FROM security_events\n",
|
||||
" WHERE event_id = 4624\n",
|
||||
" AND logon_type = 3\n",
|
||||
" AND src_ip_addr is not null\n",
|
||||
" AND `@timestamp` BETWEEN \"2019-05-18 00:00:00.000\" AND \"2019-05-19 00:00:00.000\"\n",
|
||||
" '''\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"security_4624_3.createOrReplaceTempView(\"security_4624_3\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sysmon_df = es_reader.load(\"logs-endpoint-winevent-sysmon-*/\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sysmon_df.createOrReplaceTempView(\"sysmon_events\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sysmon_processcreate = spark.sql(\n",
|
||||
" '''\n",
|
||||
" SELECT event_id,\n",
|
||||
" host_name,\n",
|
||||
" process_parent_name,\n",
|
||||
" process_parent_guid,\n",
|
||||
" process_parent_command_line,\n",
|
||||
" process_name,\n",
|
||||
" process_guid,\n",
|
||||
" process_command_line,\n",
|
||||
" user_logon_id,\n",
|
||||
" `@timestamp`\n",
|
||||
" FROM sysmon_events\n",
|
||||
" WHERE event_id = 1\n",
|
||||
" AND `@timestamp` BETWEEN \"2019-05-18 00:00:00.000\" AND \"2019-05-19 00:00:00.000\"\n",
|
||||
" '''\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sysmon_processcreate.createOrReplaceTempView(\"sysmon_1\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"security_sysmon_join = spark.sql(\n",
|
||||
" '''\n",
|
||||
" SELECT s.`@timestamp`,\n",
|
||||
" s.host_name,\n",
|
||||
" s.src_ip_addr,\n",
|
||||
" s.logon_type,\n",
|
||||
" s.user_logon_id,\n",
|
||||
" s.user_name,\n",
|
||||
" p.process_parent_name,\n",
|
||||
" p.process_parent_guid,\n",
|
||||
" p.process_parent_command_line,\n",
|
||||
" p.process_name,\n",
|
||||
" p.process_guid,\n",
|
||||
" p.process_command_line\n",
|
||||
" FROM security_4624_3 s\n",
|
||||
" INNER JOIN sysmon_1 p\n",
|
||||
" ON s.user_logon_id = p.user_logon_id\n",
|
||||
" '''\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"+--------------------+-------------+---------------+---------+-------------------+---------------------------+--------------+--------------------+\n",
|
||||
"| @timestamp| src_ip_addr| host_name|user_name|process_parent_name|process_parent_command_line| process_name|process_command_line|\n",
|
||||
"+--------------------+-------------+---------------+---------+-------------------+---------------------------+--------------+--------------------+\n",
|
||||
"|2019-05-18 21:45:...|172.18.39.106|IT001.shire.com| pgustavo| wmiprvse.exe| c:\\windows\\system...|powershell.exe|c:\\windows\\system...|\n",
|
||||
"|2019-05-18 21:45:...|172.18.39.106|IT001.shire.com| pgustavo| powershell.exe| c:\\windows\\system...| conhost.exe|\\??\\c:\\windows\\sy...|\n",
|
||||
"|2019-05-18 21:45:...|172.18.39.106|IT001.shire.com| pgustavo| powershell.exe| c:\\windows\\system...| whoami.exe|\"c:\\windows\\syste...|\n",
|
||||
"+--------------------+-------------+---------------+---------+-------------------+---------------------------+--------------+--------------------+\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"security_sysmon_join.select(\n",
|
||||
" \"@timestamp\",\"src_ip_addr\",\"host_name\",\"user_name\",\"process_parent_name\",\"process_parent_command_line\",\"process_name\",\"process_command_line\"\n",
|
||||
").show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "PySpark_Python3",
|
||||
"language": "python",
|
||||
"name": "pyspark3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
Loading…
Reference in New Issue