Added a few notebooks for testing

keyword-vs-text-changes
Roberto Rodriguez 2019-05-30 15:01:13 -04:00
parent b1516ee3c8
commit 7a6f6805e9
3 changed files with 445 additions and 2 deletions

View File

@ -8,6 +8,6 @@ LABEL maintainer="Roberto Rodriguez @Cyb3rWard0g"
LABEL description="Dockerfile Notebooks-Forge Jupyter-Hunt Project." LABEL description="Dockerfile Notebooks-Forge Jupyter-Hunt Project."
# ********** Adding HELK Jupyter notebooks # ********** Adding HELK Jupyter notebooks
RUN mkdir /opt/helk/jupyter/datasets RUN mkdir /opt/helk/jupyter/notebooks/datasets
COPY --chown=jupyter:810 notebooks/* /opt/helk/jupyter/notebooks/ COPY --chown=jupyter:810 notebooks/* /opt/helk/jupyter/notebooks/
COPY --chown=jupyter:810 datasets/* /opt/helk/jupyter/datasets/ COPY --chown=jupyter:810 datasets/* /opt/helk/jupyter/notebooks/datasets/

View File

@ -0,0 +1,218 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Import Libraries"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from pyspark.sql import SparkSession"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create SparkSession"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"spark = SparkSession.builder \\\n",
" .appName(\"HELK Reader\") \\\n",
" .master(\"spark://helk-spark-master:7077\") \\\n",
" .enableHiveSupport() \\\n",
" .getOrCreate()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Verify Spark Variable"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <div>\n",
" <p><b>SparkSession - hive</b></p>\n",
" \n",
" <div>\n",
" <p><b>SparkContext</b></p>\n",
"\n",
" <p><a href=\"http://1d254481cafc:4040\">Spark UI</a></p>\n",
"\n",
" <dl>\n",
" <dt>Version</dt>\n",
" <dd><code>v2.4.3</code></dd>\n",
" <dt>Master</dt>\n",
" <dd><code>spark://helk-spark-master:7077</code></dd>\n",
" <dt>AppName</dt>\n",
" <dd><code>HELK Reader</code></dd>\n",
" </dl>\n",
" </div>\n",
" \n",
" </div>\n",
" "
],
"text/plain": [
"<pyspark.sql.session.SparkSession at 0x7f6a1c8596a0>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"spark"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Initiate Elasticsearch Dataframe Reader"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"es_reader = (spark.read\n",
" .format(\"org.elasticsearch.spark.sql\")\n",
" .option(\"inferSchema\", \"true\")\n",
" .option(\"es.read.field.as.array.include\", \"tags\")\n",
" .option(\"es.nodes\",\"helk-elasticsearch:9200\")\n",
" .option(\"es.net.http.auth.user\",\"elastic\")\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Data from Elasticsearch : Sysmon Index"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"sysmon_df = es_reader.load(\"logs-endpoint-winevent-sysmon-*/\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"processcreate_df = sysmon_df.filter(sysmon_df.action == \"processcreate\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"processcreate_df = processcreate_df.select(\n",
" \"process_guid\",\"process_parent_name\",\"process_parent_command_line\",\n",
" \"process_name\",\"process_command_line\",\"action\",\"@timestamp\"\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Show Sysmon Spark DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+-------------------+---------------------------+--------------------+--------------------+-------------+--------------------+\n",
"| process_guid|process_parent_name|process_parent_command_line| process_name|process_command_line| action| @timestamp|\n",
"+--------------------+-------------------+---------------------------+--------------------+--------------------+-------------+--------------------+\n",
"|aa6b4a20-7cde-5ce...| svchost.exe| c:\\windows\\system...| wmiprvse.exe|c:\\windows\\system...|processcreate|2019-05-18 21:45:...|\n",
"|aa6b4a20-7cde-5ce...| wmiprvse.exe| c:\\windows\\system...| powershell.exe|c:\\windows\\system...|processcreate|2019-05-18 21:45:...|\n",
"|aa6b4a20-7cdf-5ce...| powershell.exe| c:\\windows\\system...| conhost.exe|\\??\\c:\\windows\\sy...|processcreate|2019-05-18 21:45:...|\n",
"|aa6b4a20-7ce7-5ce...| winlogon.exe| winlogon.exe| logonui.exe|\"logonui.exe\" /fl...|processcreate|2019-05-18 21:45:...|\n",
"|aa6b4a20-7ce9-5ce...| svchost.exe| c:\\windows\\system...| taskhostw.exe|taskhostw.exe key...|processcreate|2019-05-18 21:45:...|\n",
"|aa6b4a20-7cd9-5ce...| svchost.exe| c:\\windows\\system...|backgroundtaskhos...|\"c:\\windows\\syste...|processcreate|2019-05-18 21:44:...|\n",
"|aa6b4a20-7cda-5ce...| svchost.exe| c:\\windows\\system...| runtimebroker.exe|c:\\windows\\system...|processcreate|2019-05-18 21:44:...|\n",
"|aa6b4a20-7cea-5ce...| svchost.exe| c:\\windows\\system...| dllhost.exe|c:\\windows\\system...|processcreate|2019-05-18 21:45:...|\n",
"|aa6b4a20-7cea-5ce...| svchost.exe| c:\\windows\\system...| taskhostw.exe| taskhostw.exe|processcreate|2019-05-18 21:45:...|\n",
"|aa6b4a20-7cec-5ce...| svchost.exe| c:\\windows\\system...| dllhost.exe|c:\\windows\\system...|processcreate|2019-05-18 21:45:...|\n",
"+--------------------+-------------------+---------------------------+--------------------+--------------------+-------------+--------------------+\n",
"only showing top 10 rows\n",
"\n"
]
}
],
"source": [
"processcreate_df.show(10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"$"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "PySpark_Python3",
"language": "python",
"name": "pyspark3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -0,0 +1,225 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from pyspark.sql import SparkSession"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"spark = SparkSession.builder \\\n",
" .appName(\"HELK JOIN\") \\\n",
" .master(\"spark://helk-spark-master:7077\") \\\n",
" .enableHiveSupport() \\\n",
" .getOrCreate()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"es_reader = (spark.read\n",
" .format(\"org.elasticsearch.spark.sql\")\n",
" .option(\"inferSchema\", \"true\")\n",
" .option(\"es.read.field.as.array.include\", \"tags\")\n",
" .option(\"es.nodes\",\"helk-elasticsearch:9200\")\n",
" .option(\"es.net.http.auth.user\",\"elastic\")\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"security_df = es_reader.load(\"logs-endpoint-winevent-security-*/\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"security_df.createOrReplaceTempView(\"security_events\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"security_4624_3 = spark.sql(\n",
" '''\n",
" SELECT event_id,\n",
" host_name,\n",
" src_ip_addr,\n",
" user_logon_id,\n",
" user_name,\n",
" logon_type,\n",
" `@timestamp`\n",
" FROM security_events\n",
" WHERE event_id = 4624\n",
" AND logon_type = 3\n",
" AND src_ip_addr is not null\n",
" AND `@timestamp` BETWEEN \"2019-05-18 00:00:00.000\" AND \"2019-05-19 00:00:00.000\"\n",
" '''\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"security_4624_3.createOrReplaceTempView(\"security_4624_3\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"sysmon_df = es_reader.load(\"logs-endpoint-winevent-sysmon-*/\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"sysmon_df.createOrReplaceTempView(\"sysmon_events\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"sysmon_processcreate = spark.sql(\n",
" '''\n",
" SELECT event_id,\n",
" host_name,\n",
" process_parent_name,\n",
" process_parent_guid,\n",
" process_parent_command_line,\n",
" process_name,\n",
" process_guid,\n",
" process_command_line,\n",
" user_logon_id,\n",
" `@timestamp`\n",
" FROM sysmon_events\n",
" WHERE event_id = 1\n",
" AND `@timestamp` BETWEEN \"2019-05-18 00:00:00.000\" AND \"2019-05-19 00:00:00.000\"\n",
" '''\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"sysmon_processcreate.createOrReplaceTempView(\"sysmon_1\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"security_sysmon_join = spark.sql(\n",
" '''\n",
" SELECT s.`@timestamp`,\n",
" s.host_name,\n",
" s.src_ip_addr,\n",
" s.logon_type,\n",
" s.user_logon_id,\n",
" s.user_name,\n",
" p.process_parent_name,\n",
" p.process_parent_guid,\n",
" p.process_parent_command_line,\n",
" p.process_name,\n",
" p.process_guid,\n",
" p.process_command_line\n",
" FROM security_4624_3 s\n",
" INNER JOIN sysmon_1 p\n",
" ON s.user_logon_id = p.user_logon_id\n",
" '''\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------------------+-------------+---------------+---------+-------------------+---------------------------+--------------+--------------------+\n",
"| @timestamp| src_ip_addr| host_name|user_name|process_parent_name|process_parent_command_line| process_name|process_command_line|\n",
"+--------------------+-------------+---------------+---------+-------------------+---------------------------+--------------+--------------------+\n",
"|2019-05-18 21:45:...|172.18.39.106|IT001.shire.com| pgustavo| wmiprvse.exe| c:\\windows\\system...|powershell.exe|c:\\windows\\system...|\n",
"|2019-05-18 21:45:...|172.18.39.106|IT001.shire.com| pgustavo| powershell.exe| c:\\windows\\system...| conhost.exe|\\??\\c:\\windows\\sy...|\n",
"|2019-05-18 21:45:...|172.18.39.106|IT001.shire.com| pgustavo| powershell.exe| c:\\windows\\system...| whoami.exe|\"c:\\windows\\syste...|\n",
"+--------------------+-------------+---------------+---------+-------------------+---------------------------+--------------+--------------------+\n",
"\n"
]
}
],
"source": [
"security_sysmon_join.select(\n",
" \"@timestamp\",\"src_ip_addr\",\"host_name\",\"user_name\",\"process_parent_name\",\"process_parent_command_line\",\"process_name\",\"process_command_line\"\n",
").show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "PySpark_Python3",
"language": "python",
"name": "pyspark3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}