DeTTECT/group_mapping.py

626 lines
28 KiB
Python

import simplejson
from generic import *
CG_GROUPS = {}
def is_in_group(json_groups, argument_groups):
"""
Check if the two dicts (json_groups and argument_groups) have any groups in common based on their names/aliases.
:param json_groups: group aliases from ATT&CK
:param argument_groups: group names provided via the command line by the user
:return: true or false
"""
json_groups = list(map(lambda x: x.lower(), json_groups))
for group in argument_groups:
if group in json_groups:
return True
return False
def is_group_found(groups_found, argument_groups):
"""
Check if a group that has been provided using '-g/--groups'/'-o/--overlay' is present within MITRE ATT&CK.
:param groups_found: groups that are found in the ATT&CK data
:param argument_groups: groups provided via the command line by the user
:return: returns boolean that incidates if the group is found
"""
groups_json = load_attack_data(DATATYPE_ALL_GROUPS)
for group_arg in argument_groups:
if group_arg == 'all': # this one will be ignored as it does not make any sense for this function
return True
group_id = None
for group in groups_json: # is the group provided via the command line known in ATT&CK?
if group['group_aliases']:
group_aliases_lower = list(map(lambda x: x.lower(), group['group_aliases']))
if group_arg in group_aliases_lower or group_arg == group['group_id'].lower():
group_id = group['group_id']
if not group_id: # the group that has been provided through the command line cannot be found in ATT&CK
print('[!] Unknown group: ' + group_arg)
return False
elif group_id not in groups_found: # group not present in filtered data sate (i.e. platform and stage)
print('[!] Group not part of the data set: ' + group_arg)
return False
else:
return True
def get_software_techniques(groups, stage, platform):
"""
Get all techniques (in a dict) from the provided list of groups in relation to the software these groups use,
and hence techniques they support.
:param groups: ATT&CK groups
:param stage: attack or pre-attack
:param platform: the applicable platform
:return: dictionary with info on groups
"""
# { group_id: {group_name: NAME, techniques: set{id, ...} } }
groups_dict = {}
tech_by_software_json = load_attack_data(DATATYPE_TECH_BY_SOFTWARE)
# { software_id: [technique, ...] }
software_dict = {}
for tech in tech_by_software_json:
if tech['software_id'] not in software_dict:
software_dict[tech['software_id']] = set([tech['technique_id']])
else:
software_dict[tech['software_id']].add(tech['technique_id'])
# groups is a YAML file
if os.path.isfile(str(groups)):
with open(groups, 'r') as yaml_file:
config = yaml.load(yaml_file, Loader=yaml.FullLoader)
for group in config['groups']:
if group['enabled']:
group_id = get_group_id(group['group_name'], group['campaign'])
groups_dict[group_id] = dict()
groups_dict[group_id]['group_name'] = group['group_name']
groups_dict[group_id]['techniques'] = set()
groups_dict[group_id]['campaign'] = group['campaign']
groups_dict[group_id]['software'] = group['software_id']
if group['software_id']:
for soft_id in group['software_id']:
try:
groups_dict[group_id]['techniques'].update(software_dict[soft_id])
except KeyError:
print('[!] unknown ATT&CK software ID: ' + soft_id)
# groups are provided as arguments via the command line
else:
software_by_group_json = load_attack_data(DATATYPE_SOFTWARE_BY_GROUP)
for s in software_by_group_json:
# group matches the: matrix/stage, platform and the group(s) we are interested in
if s['software_platform']: # their is some software that do not have a platform, skip those
if s['matrix'] == 'mitre-'+stage and (platform in s['software_platform'] or platform == 'all') and \
(groups[0] == 'all' or s['group_id'].lower() in groups or is_in_group(s['group_aliases'], groups)):
if s['group_id'] not in groups_dict:
groups_dict[s['group_id']] = {'group_name': s['group']}
groups_dict[s['group_id']]['techniques'] = set()
groups_dict[s['group_id']]['techniques'].update(software_dict[s['software_id']])
return groups_dict
def get_group_id(group_name, campaign):
# CG_GROUPS = { group_name+campaign: id } }
"""
Generate a custom group id.
:param group_name: group name as used within the YAML file
:param campaign: campaign as used within the YAML file
:return: custom group identifier string (e.g. CG0001)
"""
global CG_GROUPS
if not CG_GROUPS:
new_id = 1
elif group_name + campaign not in CG_GROUPS:
new_id = len(CG_GROUPS) + 1
if group_name + campaign not in CG_GROUPS:
length = len(str(new_id))
if length > 9:
cg_id = 'CG00' + str(new_id)
elif length > 99:
cg_id = 'CG0' + str(new_id)
elif length > 999:
cg_id = 'CG' + str(new_id)
else:
cg_id = 'CG000' + str(new_id)
CG_GROUPS[group_name + campaign] = cg_id
return CG_GROUPS[group_name + campaign]
def get_group_techniques(groups, stage, platform, file_type):
"""
Get all techniques (in a dict) from the provided list of groups
:param groups: group ID, group name/alias or a YAML file with group(s) data
:param stage: attack or pre-attack
:param platform: all, Linux, macOS, Windows
:param file_type: the file type of the YAML file as present in the key 'file_type'
:return: returns dictionary with all techniques from the provided list of groups or -1 when group is not found
"""
# { group_id: {group_name: NAME, techniques: set{id, ...} } }
groups_dict = {}
groups_found = set()
# groups is a YAML file
if file_type == FILE_TYPE_GROUP_ADMINISTRATION:
with open(groups, 'r') as yaml_file:
config = yaml.load(yaml_file, Loader=yaml.FullLoader)
for group in config['groups']:
if group['enabled']:
campaign = group['campaign'] if group['campaign'] else ''
group_id = get_group_id(group['group_name'], campaign)
groups_dict[group_id] = dict()
groups_dict[group_id]['group_name'] = group['group_name']
if type(group['technique_id']) == list:
groups_dict[group_id]['techniques'] = set(group['technique_id'])
groups_dict[group_id]['weight'] = dict((i, 1) for i in group['technique_id'])
elif type(group['technique_id']) == dict:
groups_dict[group_id]['techniques'] = set(group['technique_id'].keys())
groups_dict[group_id]['weight'] = group['technique_id']
groups_dict[group_id]['campaign'] = group['campaign']
groups_dict[group_id]['software'] = group['software_id']
else:
# groups are provided as arguments via the command line
groups_json = load_attack_data(DATATYPE_TECH_BY_GROUP)
for e in groups_json:
json_platform = e['platform']
if not json_platform:
# we just set this to an random legit value, because for pre-attack 'platform' is not used
json_platform = 'Windows'
# group matches the: matrix/stage, platform and the group(s) we are interested in
if e['matrix'] == 'mitre-'+stage and (platform in json_platform or platform == 'all') and \
(groups[0] == 'all' or e['group_id'].lower() in groups or is_in_group(e['group_aliases'], groups)):
if e['group_id'] not in groups_dict:
groups_found.add(e['group_id'])
groups_dict[e['group_id']] = {'group_name': e['group']}
groups_dict[e['group_id']]['techniques'] = set()
groups_dict[e['group_id']]['weight'] = dict()
groups_dict[e['group_id']]['techniques'].add(e['technique_id'])
groups_dict[e['group_id']]['weight'][e['technique_id']] = 1
# do not call 'is_group_found' when groups is a YAML file
# (this could contain groups that do not exists within ATT&CK)
if not os.path.isfile(str(groups)):
found = is_group_found(groups_found, groups)
if not found:
return -1
return groups_dict
def get_detection_techniques(filename, filter_applicable_to):
"""
Get all techniques (in a dict) from the detection administration
:param filename: path to the YAML technique administration file
:param filter_applicable_to: filter techniques based on applicable_to field in techniques administration YAML file
:return: groups dictionary, loaded techniques from administration YAML file
"""
# { group_id: {group_name: NAME, techniques: set{id, ...} } }
groups_dict = {}
detection_techniques, name, platform = load_techniques(filename, 'detection', filter_applicable_to)
group_id = 'DETECTION'
groups_dict[group_id] = {}
groups_dict[group_id]['group_name'] = 'Detection'
groups_dict[group_id]['techniques'] = set()
groups_dict[group_id]['weight'] = dict()
for t, v in detection_techniques.items():
s = calculate_score(v['detection'])
if s > 0:
groups_dict[group_id]['techniques'].add(t)
groups_dict[group_id]['weight'][t] = 1
return groups_dict, detection_techniques
def get_visibility_techniques(filename, filter_applicable_to):
"""
Get all techniques (in a dict) from the detections administration
:param filename: path to the YAML technique administration file
:param filter_applicable_to: filter techniques based on applicable_to field in techniques administration YAML file
:return: dictionary
"""
# { group_id: {group_name: NAME, techniques: set{id, ...} } }
groups_dict = {}
visibility_techniques, name, platform = load_techniques(filename, 'visibility', filter_applicable_to)
group_id = 'VISIBILITY'
groups_dict[group_id] = {}
groups_dict[group_id]['group_name'] = 'Visibility'
groups_dict[group_id]['techniques'] = set()
groups_dict[group_id]['weight'] = dict()
for t, v in visibility_techniques.items():
s = calculate_score(v['visibility'])
if s > 0:
groups_dict[group_id]['techniques'].add(t)
groups_dict[group_id]['weight'][t] = 1
return groups_dict, visibility_techniques
def get_technique_count(groups, groups_overlay, groups_software, overlay_type, all_techniques):
"""
Create a dict with all involved techniques and their relevant count/score
:param groups: a dict with data on groups
:param groups_overlay: a dict with data on the groups to overlay
:param groups_software: a dict with with data on which techniques are used within related software
:param overlay_type: group, visibility or detection
:param all_techniques: dict containing all technique data for visibility or detection
:return: dictionary, max_count
"""
# { technique_id: {count: ..., groups: set{} }
techniques_dict = {}
for group, v in groups.items():
for tech in v['techniques']:
if tech not in techniques_dict:
techniques_dict[tech] = dict()
techniques_dict[tech]['groups'] = set()
techniques_dict[tech]['count'] = v['weight'][tech]
# We only want to increase the score when comparing groups and not for visibility or detection.
# This allows to have proper sorting of the heat map, which in turn improves the ability to visually
# compare this heat map with the detection/visibility ATT&CK Navigator layers.
else:
techniques_dict[tech]['count'] += v['weight'][tech]
techniques_dict[tech]['groups'].add(group)
max_count = max(techniques_dict.values(), key=lambda v: v['count'])['count']
# create dict {tech_id: score+max_tech_count} to be used for when doing an overlay of the type visibility or detection
if overlay_type != OVERLAY_TYPE_GROUP:
dict_tech_score = {}
list_tech = groups_overlay[overlay_type.upper()]['techniques']
for tech in list_tech:
dict_tech_score[tech] = calculate_score(all_techniques[tech][overlay_type]) + max_count
for group, v in groups_overlay.items():
for tech in v['techniques']:
if tech not in techniques_dict:
techniques_dict[tech] = dict()
techniques_dict[tech]['groups'] = set()
if overlay_type == OVERLAY_TYPE_GROUP:
techniques_dict[tech]['count'] = v['weight'][tech]
else:
techniques_dict[tech]['count'] = dict_tech_score[tech]
elif group in groups:
if tech not in groups[group]['techniques']:
if overlay_type == OVERLAY_TYPE_GROUP:
techniques_dict[tech]['count'] += v['weight'][tech]
else:
techniques_dict[tech]['count'] = dict_tech_score[tech]
# Only do this when it was not already counted by being part of 'groups'.
# Meaning the group in 'groups_overlay' was also part of 'groups' (match on Group ID) and the
# technique was already counted for that group / it is not a new technique for that group coming
# from a YAML file
else:
if overlay_type == OVERLAY_TYPE_GROUP:
# increase count when the group in the YAML file is a custom group
techniques_dict[tech]['count'] += v['weight'][tech]
else:
techniques_dict[tech]['count'] = dict_tech_score[tech]
techniques_dict[tech]['groups'].add(group)
for group, v in groups_software.items():
for tech in v['techniques']:
if tech not in techniques_dict:
techniques_dict[tech] = dict()
techniques_dict[tech]['count'] = 0
# we will not adjust the scoring for groups_software. We will just set the the score to 0.
# This will later be used for the colouring of the heat map.
if 'groups' not in techniques_dict[tech]:
techniques_dict[tech]['groups'] = set()
techniques_dict[tech]['groups'].add(group)
return techniques_dict, max_count
def get_technique_layer(techniques_count, groups, overlay, groups_software, overlay_file_type, overlay_type,
all_techniques):
"""
Create the technique layer that will be part of the ATT&CK navigator json file
:param techniques_count: involved techniques with count (to be used within the scores)
:param groups: a dict with data on groups
:param overlay: a dict with data on the groups to overlay
:param groups_software: a dict with with data on which techniques are used within related software
:param overlay_file_type: the file type of the YAML file as present in the key 'file_type'
:param overlay_type: group, visibility or detection
:param all_techniques: dictionary with all techniques loaded from techniques administration YAML file
:return: dictionary
"""
techniques_layer = []
# { technique_id: {count: ..., groups: set{} }
# add the technique count/scoring
for tech, v in techniques_count.items():
t = dict()
t['techniqueID'] = tech
t['score'] = v['count']
t['metadata'] = []
metadata_dict = dict()
for group, values in groups.items():
if tech in values['techniques']: # we do not color this one because that's done using the scoring
if 'Groups' not in metadata_dict:
metadata_dict['Groups'] = set()
metadata_dict['Groups'].add(values['group_name'])
# this will only be effective when loading a YAML files that has a value for the key 'campaign'
if 'Campaign' in values and values['campaign'] is not None:
if 'CAMPAIGN' not in metadata_dict:
metadata_dict['Campaign'] = set()
metadata_dict['Campaign'].add(values['campaign'])
# change the color and add metadata to make the groups overlay visible
for group, values in overlay.items():
if tech in values['techniques']:
# Determine color:
if len(v['groups'].intersection(set(groups.keys()))) > 0:
# if the technique is both present in the group (-g/--groups) and the groups overlay (-o/--overlay)
t['color'] = COLOR_GROUP_OVERLAY_MATCH
else:
# the technique is only present in the overlay and not in the provided groups (-g/--groups)
if overlay_file_type == FILE_TYPE_TECHNIQUE_ADMINISTRATION:
if overlay_type == OVERLAY_TYPE_VISIBILITY:
t['color'] = COLOR_GROUP_OVERLAY_ONLY_VISIBILITY
elif overlay_type == OVERLAY_TYPE_DETECTION:
t['color'] = COLOR_GROUP_OVERLAY_ONLY_DETECTION
else:
t['color'] = COLOR_GROUP_OVERLAY_NO_MATCH
# Add applicable_to to metadata in case of overlay for detection/visibility:
if overlay_file_type == FILE_TYPE_TECHNIQUE_ADMINISTRATION:
metadata_dict['Applicable to'] = set([a for v in all_techniques[tech][overlay_type] for a in v['applicable_to']])
metadata_dict['Detection score'] = [str(calculate_score(all_techniques[tech]['detection']))]
metadata_dict['Visibility score'] = [str(calculate_score(all_techniques[tech]['visibility']))]
if 'Overlay' not in metadata_dict:
metadata_dict['Overlay'] = set()
metadata_dict['Overlay'].add(values['group_name'])
# this will only be effective when loading a YAML files that has a value for the key 'campaign'
if 'campaign' in values and values['campaign'] is not None:
if 'Campaign' not in metadata_dict:
metadata_dict['Campaign'] = set()
metadata_dict['Campaign'].add(values['campaign'])
# change the color and add metadata to make the groups software overlay visible
for group, values in groups_software.items(): # TODO add support for campaign info in layer metadata
if tech in values['techniques']:
if t['score'] > 0:
t['color'] = COLOR_GROUP_AND_SOFTWARE
else:
t['color'] = COLOR_SOFTWARE
if 'Software groups' not in metadata_dict:
metadata_dict['Software groups'] = set()
metadata_dict['Software groups'].add(values['group_name'])
# create the metadata based on the dict 'metadata_dict'
for metadata, values in metadata_dict.items():
tmp_dict = {'name': '-' + metadata, 'value': ', '.join(values)}
t['metadata'].append(tmp_dict)
techniques_layer.append(t)
return techniques_layer
def get_group_list(groups, file_type):
"""
Make a list of group names for the involved groups.
:param groups: a dict with data on groups
:param file_type: the file type of the YAML file as present in the key 'file_type'
:return: list
"""
if file_type == FILE_TYPE_GROUP_ADMINISTRATION:
groups_list = []
for group, values in groups.items():
# if YAML file contains campaign key with a legit value
if 'campaign' in values and values['campaign'] is not None:
groups_list.append(values['group_name'] + ' (' + values['campaign'] + ')')
else:
groups_list.append(values['group_name'])
return groups_list
else:
return groups
def generate_group_heat_map(groups, overlay, overlay_type, stage, platform, software_groups, filter_applicable_to):
"""
Calls all functions that are necessary for the generation of the heat map and write a json layer to disk.
:param groups: threat actor groups
:param overlay: group(s), visibility or detections to overlay (group ID, group name/alias, YAML file with
group(s), detections or visibility)
:param overlay_type: group, visibility or detection
:param stage: attack or pre-attack
:param platform: all, Linux, macOS, Windows
:param software_groups: specify if techniques from related software should be included.
:param filter_applicable_to: filter techniques based on applicable_to field in techniques administration YAML file
:return: returns nothing when something's wrong
"""
overlay_dict = {}
groups_software_dict = {}
groups_file_type = None
if os.path.isfile(groups):
groups_file_type = check_file(groups, file_type=FILE_TYPE_GROUP_ADMINISTRATION)
if not groups_file_type:
return
else:
# remove whitespaces (leading and trailing), convert to lower case and put in a list
groups = groups.split(',')
groups = list(map(lambda x: x.strip().lower(), groups))
overlay_file_type = None
if overlay:
if os.path.isfile(overlay):
expected_file_type = FILE_TYPE_GROUP_ADMINISTRATION if overlay_type == OVERLAY_TYPE_GROUP \
else FILE_TYPE_TECHNIQUE_ADMINISTRATION \
if overlay_type in [OVERLAY_TYPE_VISIBILITY, OVERLAY_TYPE_DETECTION] else None
overlay_file_type = check_file(overlay, expected_file_type)
if not overlay_file_type:
return
else:
overlay = overlay.split(',')
overlay = list(map(lambda x: x.strip().lower(), overlay))
else:
overlay = []
all_techniques = None
if overlay_file_type == FILE_TYPE_TECHNIQUE_ADMINISTRATION:
if overlay_type == OVERLAY_TYPE_VISIBILITY:
overlay_dict, all_techniques = get_visibility_techniques(overlay, filter_applicable_to)
elif overlay_type == OVERLAY_TYPE_DETECTION:
overlay_dict, all_techniques = get_detection_techniques(overlay, filter_applicable_to)
elif len(overlay) > 0:
overlay_dict = get_group_techniques(overlay, stage, platform, overlay_file_type)
if not overlay_dict:
return
groups_dict = get_group_techniques(groups, stage, platform, groups_file_type)
if groups_dict == -1:
return
if len(groups_dict) == 0:
print('[!] Empty layer.') # the provided groups dit not result in any techniques
return
# check if we are doing a software group overlay
if software_groups and overlay: # TODO add support for campaign info in layer metadata
if overlay_type not in [OVERLAY_TYPE_VISIBILITY, OVERLAY_TYPE_DETECTION]:
# if a group overlay is provided, get the software techniques for the overlay
groups_software_dict = get_software_techniques(overlay, stage, platform)
elif software_groups:
groups_software_dict = get_software_techniques(groups, stage, platform)
technique_count, max_count = get_technique_count(groups_dict, overlay_dict, groups_software_dict, overlay_type, all_techniques)
technique_layer = get_technique_layer(technique_count, groups_dict, overlay_dict, groups_software_dict,
overlay_file_type, overlay_type, all_techniques)
# make a list group names for the involved groups.
if groups == ['all']:
groups_list = ['all']
else:
groups_list = get_group_list(groups_dict, groups_file_type)
overlay_list = get_group_list(overlay_dict, overlay_file_type)
desc = 'stage: ' + stage + ' | platform: ' + platform + ' | group(s): ' + ', '.join(groups_list) + \
' | overlay group(s): ' + ', '.join(overlay_list)
layer = get_layer_template_groups(stage[0].upper() + stage[1:] + ' ' + platform, max_count, desc, stage, platform, overlay_type)
layer['techniques'] = technique_layer
json_string = simplejson.dumps(layer).replace('}, ', '},\n')
if overlay:
filename = "output/" + stage + '_' + platform.lower() + '_' + '_'.join(groups_list) + '-overlay_' + '_'.join(overlay_list) + '_' + filter_applicable_to.replace(' ', '_')
else:
filename = "output/" + stage + '_' + platform.lower() + '_' + '_'.join(groups_list)
filename = filename[:255] + '.json'
with open(filename, 'w') as f: # write layer file to disk
f.write(json_string)
print('Written layer: ' + filename)
def get_updates(update_type, sort='modified'):
"""
Print a list of updates for a techniques, groups or software. Sort by modified or creation date.
:param update_type: the type of update: techniques, groups or software
:param sort: sort the list by modified or creation date
:return:
"""
if update_type[:-1] == 'technique':
techniques = load_attack_data(DATATYPE_ALL_TECH)
sorted_techniques = sorted(techniques, key=lambda k: k[sort])
for t in sorted_techniques:
print(t['technique_id'] + ' ' + t['technique'])
print(' ' * 6 + 'created: ' + t['created'].split(' ')[0])
print(' ' * 6 + 'modified: ' + t['modified'][:10])
print(' ' * 6 + 'matrix: ' + t['matrix'][6:])
if t['tactic']:
print(' ' * 6 + 'tactic: ' + ' '.join(t['tactic']))
else:
print(' ' * 6 + 'tactic: None')
print('')
elif update_type[:-1] == 'group':
groups = load_attack_data(DATATYPE_ALL_GROUPS)
sorted_groups = sorted(groups, key=lambda k: k[sort])
for t in sorted_groups:
print(t['group_id'] + ' ' + t['group'])
print(' ' * 6 + 'created: ' + t['created'].split(' ')[0])
print(' ' * 6 + 'modified: ' + t['modified'][:10])
print('')
elif update_type == 'software':
software = load_attack_data(DATATYPE_ALL_SOFTWARE)
sorted_software = sorted(software, key=lambda k: k[sort])
for t in sorted_software:
print(t['software_id'] + ' ' + t['software'])
print(' ' * 6 + 'created: ' + t['created'].split(' ')[0])
print(' ' * 6 + 'modified: ' + t['modified'][:10])
print(' ' * 6 + 'matrix: ' + t['matrix'][6:])
print(' ' * 6 + 'type: ' + t['type'])
if t['software_platform']:
print(' ' * 6 + 'platform: ' + ', '.join(t['software_platform']))
else:
print(' ' * 6 + 'platform: None')
print('')
def get_statistics():
"""
Print out statistics related to data sources and how many techniques they cover.
:return:
"""
techniques = load_attack_data(DATATYPE_ALL_TECH)
# {data_source: {techniques: [T0001, ...}, count: ...}
data_sources_dict = {}
for tech in techniques:
tech_id = tech['technique_id']
data_sources = tech['data_sources']
if data_sources:
for ds in data_sources:
if ds not in data_sources_dict:
data_sources_dict[ds] = {'techniques': [tech_id], 'count': 1}
else:
data_sources_dict[ds]['techniques'].append(tech_id)
data_sources_dict[ds]['count'] += 1
# sort the dict on the value of 'count'
data_sources_dict_sorted = dict(sorted(data_sources_dict.items(), key=lambda kv: kv[1]['count'], reverse=True))
str_format = '{:<6s} {:s}'
print(str_format.format('Count', 'Data Source'))
print('-'*50)
for k, v in data_sources_dict_sorted.items():
print(str_format.format(str(v['count']), k))