diff --git a/threat_intelligence/malware_bazzar_to_stix.py b/threat_intelligence/malware_bazzar_to_stix.py new file mode 100644 index 0000000..dc719ec --- /dev/null +++ b/threat_intelligence/malware_bazzar_to_stix.py @@ -0,0 +1,98 @@ +''' +This script demonstrates how to use OpenAI's models to generate STIX JSON documents +from recent malware entries obtained from the Malware Bazaar API. The script retrieves the latest +malware entries, then uses the OpenAI API to generate STIX JSON documents for each entry. +''' + +# Import Required Libraries +import os +import requests +import json +from openai import OpenAI + +# Retrieve your OpenAI API key from environment variables. +api_key = os.environ.get("OPENAI_API_KEY") +if not api_key: + raise ValueError("Please set your OPENAI_API_KEY environment variable.") + +# Instantiate the OpenAI client. +client = OpenAI(api_key=api_key) + +# Malware Bazaar API endpoint. +MALWARE_BAZAAR_API_URL = 'https://mb-api.abuse.ch/api/v1/' + +def get_recent_malware_entries(limit=5): + """ + Retrieve recent malware entries from Malware Bazaar using the "selector": "100" + (which returns the latest 100 additions) and then return only the first `limit` entries. + """ + payload = { + "query": "get_recent", + "selector": "100" # Using "100" to get the latest 100 additions. + } + try: + response = requests.post(MALWARE_BAZAAR_API_URL, data=payload, timeout=15) + response.raise_for_status() + data = response.json() + if data.get("query_status") == "ok" and "data" in data: + return data["data"][:limit] + else: + print("Malware Bazaar returned an error or no data:", data.get("query_status")) + return [] + except requests.RequestException as e: + print("Error contacting Malware Bazaar API:", e) + return [] + +def generate_stix_document(malware_entry): + """ + Use OpenAI's GPT model (via the new client interface) to generate a STIX 2.1 JSON document + from a single malware entry. + """ + prompt = ( + "Convert the following malware intelligence entry into a valid STIX 2.1 JSON document. " + "Include relevant STIX objects such as Malware, Indicator, and Observed Data with proper relationships. " + "Ensure the output is valid JSON and conforms to STIX 2.1 standards.\n\n" + "Malware Entry:\n" + f"{json.dumps(malware_entry, indent=2)}\n\n" + "Output the complete STIX JSON document." + ) + try: + chat_completion = client.chat.completions.create( + model="gpt-4o-mini", # Or "o3-mini" as needed. + messages=[ + {"role": "system", "content": "You are an expert in cyber threat intelligence and STIX 2.1."}, + {"role": "user", "content": prompt} + ], + temperature=0.1, + max_tokens=16000, + ) + # Use dot notation to access the response content. + stix_json = chat_completion.choices[0].message.content + return stix_json + except Exception as e: + print("Error generating STIX document:", e) + return None + +def main(): + # Retrieve the last 5 malware entries from Malware Bazaar. + recent_entries = get_recent_malware_entries(limit=5) + if not recent_entries: + print("No recent malware entries found.") + return + + for entry in recent_entries: + sha256 = entry.get("sha256_hash", "unknown") + print("Processing malware entry with SHA256:", sha256) + stix_doc = generate_stix_document(entry) + if stix_doc: + file_name = f"stix_{sha256}.json" + with open(file_name, "w") as f: + f.write(stix_doc) + print(f"Saved STIX document to {file_name}\n") + else: + print("Failed to generate STIX document for this entry.\n") + + print("Completed processing recent malware entries.") + +if __name__ == "__main__": + main()