Create malware_bazzar_to_stix.py

This commit is contained in:
Omar Santos 2025-02-14 18:40:01 -05:00 committed by GitHub
parent 665051b6ef
commit 2a2cc8b3fa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -0,0 +1,98 @@
'''
This script demonstrates how to use OpenAI's models to generate STIX JSON documents
from recent malware entries obtained from the Malware Bazaar API. The script retrieves the latest
malware entries, then uses the OpenAI API to generate STIX JSON documents for each entry.
'''
# Import Required Libraries
import os
import requests
import json
from openai import OpenAI
# Retrieve your OpenAI API key from environment variables.
api_key = os.environ.get("OPENAI_API_KEY")
if not api_key:
raise ValueError("Please set your OPENAI_API_KEY environment variable.")
# Instantiate the OpenAI client.
client = OpenAI(api_key=api_key)
# Malware Bazaar API endpoint.
MALWARE_BAZAAR_API_URL = 'https://mb-api.abuse.ch/api/v1/'
def get_recent_malware_entries(limit=5):
"""
Retrieve recent malware entries from Malware Bazaar using the "selector": "100"
(which returns the latest 100 additions) and then return only the first `limit` entries.
"""
payload = {
"query": "get_recent",
"selector": "100" # Using "100" to get the latest 100 additions.
}
try:
response = requests.post(MALWARE_BAZAAR_API_URL, data=payload, timeout=15)
response.raise_for_status()
data = response.json()
if data.get("query_status") == "ok" and "data" in data:
return data["data"][:limit]
else:
print("Malware Bazaar returned an error or no data:", data.get("query_status"))
return []
except requests.RequestException as e:
print("Error contacting Malware Bazaar API:", e)
return []
def generate_stix_document(malware_entry):
"""
Use OpenAI's GPT model (via the new client interface) to generate a STIX 2.1 JSON document
from a single malware entry.
"""
prompt = (
"Convert the following malware intelligence entry into a valid STIX 2.1 JSON document. "
"Include relevant STIX objects such as Malware, Indicator, and Observed Data with proper relationships. "
"Ensure the output is valid JSON and conforms to STIX 2.1 standards.\n\n"
"Malware Entry:\n"
f"{json.dumps(malware_entry, indent=2)}\n\n"
"Output the complete STIX JSON document."
)
try:
chat_completion = client.chat.completions.create(
model="gpt-4o-mini", # Or "o3-mini" as needed.
messages=[
{"role": "system", "content": "You are an expert in cyber threat intelligence and STIX 2.1."},
{"role": "user", "content": prompt}
],
temperature=0.1,
max_tokens=16000,
)
# Use dot notation to access the response content.
stix_json = chat_completion.choices[0].message.content
return stix_json
except Exception as e:
print("Error generating STIX document:", e)
return None
def main():
# Retrieve the last 5 malware entries from Malware Bazaar.
recent_entries = get_recent_malware_entries(limit=5)
if not recent_entries:
print("No recent malware entries found.")
return
for entry in recent_entries:
sha256 = entry.get("sha256_hash", "unknown")
print("Processing malware entry with SHA256:", sha256)
stix_doc = generate_stix_document(entry)
if stix_doc:
file_name = f"stix_{sha256}.json"
with open(file_name, "w") as f:
f.write(stix_doc)
print(f"Saved STIX document to {file_name}\n")
else:
print("Failed to generate STIX document for this entry.\n")
print("Completed processing recent malware entries.")
if __name__ == "__main__":
main()