DeTTECT/group_mapping.py

import simplejson
from generic import *
from eql_yaml import techniques_search

CG_GROUPS = {}


def _is_in_group(json_groups, argument_groups):
    """
    Check if the two dicts (json_groups and argument_groups) have any groups in common based on their names/aliases.
    :param json_groups: group aliases from ATT&CK
    :param argument_groups: group names provided via the command line by the user
    :return: true or false
    """
    json_groups = list(map(lambda x: x.lower(), json_groups))

    for group in argument_groups:
        if group in json_groups:
            return True

    return False


def _is_group_found(groups_found, argument_groups):
    """
    Check if a group that has been provided using '-g/--groups'/'-o/--overlay' is present within MITRE ATT&CK.
    :param groups_found: groups that are found in the ATT&CK data
    :param argument_groups: groups provided via the command line by the user
    :return: returns boolean that indicates if the group is found
    """
    groups = load_attack_data(DATA_TYPE_STIX_ALL_GROUPS)

    for group_arg in argument_groups:
        if group_arg == 'all':  # this one will be ignored as it does not make any sense for this function
            return True

        group_id = None

        for group in groups:  # is the group provided via the command line known in ATT&CK?
            if 'aliases' in group:
                group_aliases_lower = list(map(lambda x: x.lower(), group['aliases']))
                if group_arg in group_aliases_lower or group_arg == get_attack_id(group).lower():
                    group_id = get_attack_id(group)

        if not group_id:  # the group that has been provided through the command line cannot be found in ATT&CK
            print('[!] Unknown group: ' + group_arg)
            return False
        elif group_id not in groups_found:  # group not present in filtered data sate (i.e. platform and stage)
            print('[!] Group not part of the data set: ' + group_arg)
            return False
        else:
            return True


def _get_software_techniques(groups, stage, platform):
    """
    Get all techniques (in a dict) from the provided list of groups in relation to the software these groups use,
    and hence techniques they support.
    :param groups: ATT&CK groups
    :param stage: attack or pre-attack
    :param platform: the applicable platform(s)
    :return: dictionary with info on groups
    """
    # { group_id: {group_name: NAME, techniques: set{id, ...} } }
    groups_dict = {}

    tech_by_software = load_attack_data(DATA_TYPE_CUSTOM_TECH_BY_SOFTWARE)

    # { software_id: [technique, ...] }
    software_dict = {}
    for tech in tech_by_software:
        if tech['software_id'] not in software_dict:
            # noinspection PySetFunctionToLiteral
            software_dict[tech['software_id']] = set([tech['technique_id']])
        else:
            software_dict[tech['software_id']].add(tech['technique_id'])

    # groups is a YAML file
    if os.path.isfile(str(groups)):
        _yaml = init_yaml()
        with open(groups, 'r') as yaml_file:
            config = _yaml.load(yaml_file)

        for group in config['groups']:
            if group['enabled']:
                campaign = group.get('campaign', None)
                campaign = str(campaign) if campaign else ''
                group_id = _generate_group_id(str(group['group_name']), campaign)
                groups_dict[group_id] = dict()

                groups_dict[group_id]['group_name'] = str(group['group_name'])
                groups_dict[group_id]['techniques'] = set()
                if campaign != '':
                    groups_dict[group_id]['campaign'] = str(campaign)
                groups_dict[group_id]['software'] = group['software_id']

                if group['software_id']:
                    for soft_id in group['software_id']:
                        try:
                            groups_dict[group_id]['techniques'].update(software_dict[soft_id])
                        except KeyError:
                            print('[!] unknown ATT&CK software ID: ' + soft_id)

    # groups are provided as arguments via the command line
    else:
        software_by_group = load_attack_data(DATA_TYPE_CUSTOM_SOFTWARE_BY_GROUP)

        for s in software_by_group:
            # software matches the ATT&CK Matrix and platform
            # and the group is a group we are interested in
            if s['x_mitre_platforms']:  # there is software that do not have a platform, skip those
                if s['matrix'] == 'mitre-'+stage and (platform == 'all' or len(set(s['x_mitre_platforms']).intersection(set(platform))) > 0) and \
                        (groups[0] == 'all' or s['group_id'].lower() in groups or _is_in_group(s['aliases'], groups)):
                    if s['group_id'] not in groups_dict:
                        groups_dict[s['group_id']] = {'group_name': s['name']}
                        groups_dict[s['group_id']]['techniques'] = set()
                    groups_dict[s['group_id']]['techniques'].update(software_dict[s['software_id']])

    return groups_dict


def _generate_group_id(group_name, campaign):
    # CG_GROUPS = { group_name+campaign: id } }
    """
    Generate a custom group id.
    :param group_name: group name as used within the YAML file
    :param campaign: campaign as used within the YAML file
    :return: custom group identifier string (e.g. CG0001)
    """
    global CG_GROUPS

    if not CG_GROUPS:
        new_id = 1
    elif group_name + campaign not in CG_GROUPS:
        new_id = len(CG_GROUPS) + 1

    if group_name + campaign not in CG_GROUPS:
        length = len(str(new_id))
        if length > 9:
            cg_id = 'CG00' + str(new_id)
        elif length > 99:
            cg_id = 'CG0' + str(new_id)
        elif length > 999:
            cg_id = 'CG' + str(new_id)
        else:
            cg_id = 'CG000' + str(new_id)

        CG_GROUPS[group_name + campaign] = cg_id

    return CG_GROUPS[group_name + campaign]


def _get_group_techniques(groups, stage, platform, file_type):
    """
    Get all techniques (in a dict) from the provided list of groups
    :param groups: group ID, group name/alias or a YAML file with group(s) data
    :param stage: attack or pre-attack
    :param platform: one of the values from PLATFORMS constant or 'all'
    :param file_type: the file type of the YAML file as present in the key 'file_type'
    :return: returns dictionary with all techniques from the provided list of groups or -1 when group is not found
    """
    # { group_id: {group_name: NAME, techniques: set{id, ...} } }
    groups_dict = {}
    groups_found = set()

    # groups is a YAML file
    if file_type == FILE_TYPE_GROUP_ADMINISTRATION:
        _yaml = init_yaml()
        with open(groups, 'r') as yaml_file:
            config = _yaml.load(yaml_file)

        for group in config['groups']:
            if group['enabled']:
                campaign = group.get('campaign', None)
                campaign = str(campaign) if campaign else ''
                group_id = _generate_group_id(str(group['group_name']), campaign)
                groups_dict[group_id] = dict()

                groups_dict[group_id]['group_name'] = str(group['group_name'])
                if isinstance(group['technique_id'], list):
                    groups_dict[group_id]['techniques'] = set(group['technique_id'])
                    groups_dict[group_id]['weight'] = dict((i, 1) for i in group['technique_id'])
                elif isinstance(group['technique_id'], dict):
                    groups_dict[group_id]['techniques'] = set(group['technique_id'].keys())
                    groups_dict[group_id]['weight'] = group['technique_id']
                if campaign != '':
                    groups_dict[group_id]['campaign'] = str(campaign)
                groups_dict[group_id]['software'] = group['software_id']
    else:
        # groups are provided as arguments via the command line
        all_groups_tech = load_attack_data(DATA_TYPE_CUSTOM_TECH_BY_GROUP)

        for gr in all_groups_tech:
            platforms = gr['x_mitre_platforms']
            if not platforms:
                # we just set this to an random legit value, because for pre-attack 'platform' is not used
                platforms = 'Windows'

            # group matches the: matrix/stage, platform and the group(s) we are interested in
            if gr['matrix'] == 'mitre-'+stage and (platform == 'all' or len(set(platforms).intersection(set(platform))) > 0) and \
                    (groups[0] == 'all' or gr['group_id'].lower() in groups or _is_in_group(gr['aliases'], groups)):
                if gr['group_id'] not in groups_dict:
                    groups_found.add(gr['group_id'])
                    groups_dict[gr['group_id']] = {'group_name': gr['name']}
                    groups_dict[gr['group_id']]['techniques'] = set()
                    groups_dict[gr['group_id']]['weight'] = dict()

                groups_dict[gr['group_id']]['techniques'].add(gr['technique_id'])
                groups_dict[gr['group_id']]['weight'][gr['technique_id']] = 1

        # do not call '_is_group_found' when groups is a YAML file
        # (this could contain groups that do not exists within ATT&CK)
        if not os.path.isfile(str(groups)):
            found = _is_group_found(groups_found, groups)
            if not found:
                return -1

    return groups_dict


def _get_detection_techniques(filename):
    """
    Get all techniques (in a dict) from the detection administration
    :param filename: path to the YAML technique administration file
    :return: groups dictionary, loaded techniques from administration YAML file
    """
    # { group_id: {group_name: NAME, techniques: set{id, ...} } }
    groups_dict = {}

    detection_techniques, name, platform = load_techniques(filename)

    group_id = 'DETECTION'
    groups_dict[group_id] = {}
    groups_dict[group_id]['group_name'] = 'Detection'
    groups_dict[group_id]['techniques'] = set()
    groups_dict[group_id]['weight'] = dict()
    for t, v in detection_techniques.items():
        s = calculate_score(v['detection'])
        if s > 0:
            groups_dict[group_id]['techniques'].add(t)
            groups_dict[group_id]['weight'][t] = 1

    return groups_dict, detection_techniques


def _get_visibility_techniques(filename):
    """
    Get all techniques (in a dict) from the technique administration
    :param filename: path to the YAML technique administration file
    :return: dictionary
    """
    # { group_id: {group_name: NAME, techniques: set{id, ...} } }
    groups_dict = {}

    visibility_techniques, name, platform = load_techniques(filename)

    group_id = 'VISIBILITY'
    groups_dict[group_id] = {}
    groups_dict[group_id]['group_name'] = 'Visibility'
    groups_dict[group_id]['techniques'] = set()
    groups_dict[group_id]['weight'] = dict()
    for t, v in visibility_techniques.items():
        s = calculate_score(v['visibility'])
        if s > 0:
            groups_dict[group_id]['techniques'].add(t)
            groups_dict[group_id]['weight'][t] = 1

    return groups_dict, visibility_techniques


def _get_technique_count(groups, groups_overlay, groups_software, overlay_type, all_techniques):
    """
    Create a dict with all involved techniques and their relevant count/score
    :param groups: a dict with data on groups
    :param groups_overlay: a dict with data on the groups to overlay
    :param groups_software: a dict with with data on which techniques are used within related software
    :param overlay_type: group, visibility or detection
    :param all_techniques: dict containing all technique data for visibility or detection
    :return: dictionary, max_count
    """
    # { technique_id: {count: ..., groups: set{} }
    techniques_dict = {}

    for group, v in groups.items():
        for tech in v['techniques']:
            if tech not in techniques_dict:
                techniques_dict[tech] = dict()
                techniques_dict[tech]['groups'] = set()
                techniques_dict[tech]['count'] = v['weight'][tech]

            # We only want to increase the score when comparing groups and not for visibility or detection.
            # This allows to have proper sorting of the heat map, which in turn improves the ability to visually
            # compare this heat map with the detection/visibility ATT&CK Navigator layers.
            else:
                techniques_dict[tech]['count'] += v['weight'][tech]
            techniques_dict[tech]['groups'].add(group)

    max_count = max(techniques_dict.values(), key=lambda k: k['count'])['count']

    # create dict {tech_id: score+max_tech_count} to be used for when doing an overlay of the type visibility or detection
    if overlay_type != OVERLAY_TYPE_GROUP:
        dict_tech_score = {}
        list_tech = groups_overlay[overlay_type.upper()]['techniques']
        for tech in list_tech:
            dict_tech_score[tech] = calculate_score(all_techniques[tech][overlay_type]) + max_count

    for group, v in groups_overlay.items():
        for tech in v['techniques']:
            if tech not in techniques_dict:
                techniques_dict[tech] = dict()
                techniques_dict[tech]['groups'] = set()
                if overlay_type == OVERLAY_TYPE_GROUP:
                    techniques_dict[tech]['count'] = v['weight'][tech]
                else:
                    techniques_dict[tech]['count'] = dict_tech_score[tech]
            elif group in groups:
                if tech not in groups[group]['techniques']:
                    if overlay_type == OVERLAY_TYPE_GROUP:
                        techniques_dict[tech]['count'] += v['weight'][tech]
                    else:
                        techniques_dict[tech]['count'] = dict_tech_score[tech]
                    # Only do this when it was not already counted by being part of 'groups'.
                    # Meaning the group in 'groups_overlay' was also part of 'groups' (match on Group ID) and the
                    # technique was already counted for that group / it is not a new technique for that group coming
                    # from a YAML file
            else:
                if overlay_type == OVERLAY_TYPE_GROUP:
                    # increase count when the group in the YAML file is a custom group
                    techniques_dict[tech]['count'] += v['weight'][tech]
                else:
                    techniques_dict[tech]['count'] = dict_tech_score[tech]

            techniques_dict[tech]['groups'].add(group)

    for group, v in groups_software.items():
        for tech in v['techniques']:
            if tech not in techniques_dict:
                techniques_dict[tech] = dict()
                techniques_dict[tech]['count'] = 0
                # we will not adjust the scoring for groups_software. We will just set the the score to 0.
                # This will later be used for the colouring of the heat map.
            if 'groups' not in techniques_dict[tech]:
                techniques_dict[tech]['groups'] = set()
            techniques_dict[tech]['groups'].add(group)

    return techniques_dict, max_count


def _get_technique_layer(techniques_count, groups, overlay, groups_software, overlay_file_type, overlay_type,
                         all_techniques):
    """
    Create the technique layer that will be part of the ATT&CK navigator json file
    :param techniques_count: involved techniques with count (to be used within the scores)
    :param groups: a dict with data on groups
    :param overlay: a dict with data on the groups to overlay
    :param groups_software: a dict with with data on which techniques are used within related software
    :param overlay_file_type: the file type of the YAML file as present in the key 'file_type'
    :param overlay_type: group, visibility or detection
    :param all_techniques: dictionary with all techniques loaded from techniques administration YAML file
    :return: dictionary
    """
    techniques_layer = []

    # { technique_id: {count: ..., groups: set{} }
    # add the technique count/scoring
    for tech, v in techniques_count.items():
        t = dict()
        t['techniqueID'] = tech
        t['score'] = v['count']
        t['metadata'] = []
        metadata_dict = dict()

        for group, values in groups.items():
            if tech in values['techniques']:  # we do not color this one because that's done using the scoring
                if 'Groups' not in metadata_dict:
                    metadata_dict['Groups'] = set()
                metadata_dict['Groups'].add(values['group_name'])

                # this will only be effective when loading a YAML files that have a value for the key 'campaign'
                if 'campaign' in values:
                    if 'Campaign' not in metadata_dict:
                        metadata_dict['Campaign'] = set()
                    metadata_dict['Campaign'].add(values['campaign'])

        # change the color and add metadata to make the groups overlay visible
        for group, values in overlay.items():
            if tech in values['techniques']:
                # Determine color:
                if len(v['groups'].intersection(set(groups.keys()))) > 0:
                    # if the technique is both present in the group (-g/--groups) and the groups overlay (-o/--overlay)
                    t['color'] = COLOR_GROUP_OVERLAY_MATCH
                else:
                    # the technique is only present in the overlay and not in the provided groups (-g/--groups)
                    if overlay_file_type == FILE_TYPE_TECHNIQUE_ADMINISTRATION:
                        if overlay_type == OVERLAY_TYPE_VISIBILITY:
                            t['color'] = COLOR_GROUP_OVERLAY_ONLY_VISIBILITY
                        elif overlay_type == OVERLAY_TYPE_DETECTION:
                            t['color'] = COLOR_GROUP_OVERLAY_ONLY_DETECTION
                    else:
                        t['color'] = COLOR_GROUP_OVERLAY_NO_MATCH

                # Add applicable_to to metadata in case of overlay for detection/visibility:
                if overlay_file_type == FILE_TYPE_TECHNIQUE_ADMINISTRATION:
                    metadata_dict['Applicable to'] = set([a for v in all_techniques[tech][overlay_type] for a in v['applicable_to']])
                    metadata_dict['Detection score'] = [str(calculate_score(all_techniques[tech]['detection']))]
                    metadata_dict['Visibility score'] = [str(calculate_score(all_techniques[tech]['visibility']))]

                if 'Overlay' not in metadata_dict:
                    metadata_dict['Overlay'] = set()
                metadata_dict['Overlay'].add(values['group_name'])

                # this will only be effective when loading a YAML files that has a value for the key 'campaign'
                if 'campaign' in values:
                    if 'Campaign' not in metadata_dict:
                        metadata_dict['Campaign'] = set()
                    metadata_dict['Campaign'].add(values['campaign'])

        # change the color and add metadata to make the groups software overlay visible
        for group, values in groups_software.items():
            if tech in values['techniques']:
                if t['score'] > 0:
                    t['color'] = COLOR_GROUP_AND_SOFTWARE
                else:
                    t['color'] = COLOR_SOFTWARE

                if 'Software groups' not in metadata_dict:
                    metadata_dict['Software groups'] = set()
                metadata_dict['Software groups'].add(values['group_name'])
                if 'campaign' in values:
                    if 'Software campaign' not in metadata_dict:
                        metadata_dict['Software campaign'] = set()
                    metadata_dict['Software campaign'].add(values['campaign'])

        # create the metadata based on the dict 'metadata_dict'
        for metadata, values in metadata_dict.items():
            tmp_dict = {'name': '-' + metadata, 'value': ', '.join(values)}
            t['metadata'].append(tmp_dict)

        techniques_layer.append(t)

    return techniques_layer


def _get_group_list(groups, file_type):
    """
    Make a list of group names for the involved groups.
    :param groups: a dict with data on groups
    :param file_type: the file type of the YAML file as present in the key 'file_type'
    :return: list
    """
    if file_type == FILE_TYPE_GROUP_ADMINISTRATION:
        groups_list = []
        for group, values in groups.items():
            if 'campaign' in values and values['campaign'] != '':
                groups_list.append(values['group_name'] + ' (' + values['campaign'] + ')')
            else:
                groups_list.append(values['group_name'])

        return groups_list
    else:
        return groups


def generate_group_heat_map(groups, overlay, overlay_type, stage, platform, software_groups,
                            search_visibility, search_detection, health_is_called, include_all_score_objs=False):
    """
    Calls all functions that are necessary for the generation of the heat map and write a json layer to disk.
    :param groups: threat actor groups
    :param overlay: group(s), visibility or detections to overlay (group ID, group name/alias, YAML file with
    group(s), detections or visibility)
    :param overlay_type: group, visibility or detection
    :param stage: attack or pre-attack
    :param platform: one of the values from PLATFORMS constant or 'all'
    :param software_groups: specify if techniques from related software should be included
    :param search_visibility: visibility EQL search query
    :param search_detection: detection EQL search query
    :param health_is_called: boolean that specifies if detailed errors in the file will be printed
    :param include_all_score_objs: include all score objects within the score_logbook for the EQL query
    :return: returns nothing when something's wrong
    """
    overlay_dict = {}
    groups_software_dict = {}

    groups_file_type = None
    if os.path.isfile(groups):
        groups_file_type = check_file(groups, file_type=FILE_TYPE_GROUP_ADMINISTRATION,
                                      health_is_called=health_is_called)
        if not groups_file_type:
            return
    else:
        # remove whitespaces (leading and trailing), convert to lower case and put in a list
        groups = groups.split(',')
        groups = list(map(lambda x: x.strip().lower(), groups))

    # set the correct value for platform
    if groups_file_type == FILE_TYPE_GROUP_ADMINISTRATION:
        _yaml = init_yaml()
        with open(groups, 'r') as yaml_file:
            group_file = _yaml.load(yaml_file)

        platform_yaml = get_platform_from_yaml(group_file)
        if platform_yaml:
            platform = platform_yaml
    if isinstance(platform, str) and platform.lower() != 'all':
        platform = [platform]

    overlay_file_type = None
    if overlay:
        if os.path.isfile(overlay):
            expected_file_type = FILE_TYPE_GROUP_ADMINISTRATION if overlay_type == OVERLAY_TYPE_GROUP \
                else FILE_TYPE_TECHNIQUE_ADMINISTRATION \
                if overlay_type in [OVERLAY_TYPE_VISIBILITY, OVERLAY_TYPE_DETECTION] else None
            overlay_file_type = check_file(overlay, expected_file_type, health_is_called=health_is_called)
            if not overlay_file_type:
                return
        else:
            overlay = overlay.split(',')
            overlay = list(map(lambda x: x.strip().lower(), overlay))
    else:
        overlay = []

    # load the techniques (visibility or detection) from the YAML file
    all_techniques = None
    if overlay_file_type == FILE_TYPE_TECHNIQUE_ADMINISTRATION:
        # filter out visibility and/or detection objects using EQL
        if search_detection or search_visibility:
            overlay = techniques_search(overlay, search_visibility, search_detection,
                                        include_all_score_objs=include_all_score_objs)
            if not overlay:
                return None  # something went wrong in executing the search or 0 results where returned

        if overlay_type == OVERLAY_TYPE_VISIBILITY:
            overlay_dict, all_techniques = _get_visibility_techniques(overlay)
        elif overlay_type == OVERLAY_TYPE_DETECTION:
            overlay_dict, all_techniques = _get_detection_techniques(overlay)
    # we are not overlaying visibility or detection, overlay group will therefore contain information on another group
    elif len(overlay) > 0:
        overlay_dict = _get_group_techniques(overlay, stage, platform, overlay_file_type)
        if overlay_dict == -1:
            return

    groups_dict = _get_group_techniques(groups, stage, platform, groups_file_type)
    if groups_dict == -1:
        return
    if len(groups_dict) == 0:
        print('[!] Empty layer.')  # the provided groups dit not result in any techniques
        return

    # check if we are doing a software group overlay
    if software_groups and overlay:
        if overlay_type not in [OVERLAY_TYPE_VISIBILITY, OVERLAY_TYPE_DETECTION]:
            # if a group overlay is provided, get the software techniques for the overlay
            groups_software_dict = _get_software_techniques(overlay, stage, platform)
    elif software_groups:
        groups_software_dict = _get_software_techniques(groups, stage, platform)

    technique_count, max_count = _get_technique_count(groups_dict, overlay_dict, groups_software_dict, overlay_type, all_techniques)
    technique_layer = _get_technique_layer(technique_count, groups_dict, overlay_dict, groups_software_dict,
                                           overlay_file_type, overlay_type, all_techniques)

    # make a list group names for the involved groups.
    if groups == ['all']:
        groups_list = ['all']
    else:
        groups_list = _get_group_list(groups_dict, groups_file_type)
    overlay_list = _get_group_list(overlay_dict, overlay_file_type)

    desc = 'stage: ' + stage + ' | platform(s): ' + platform_to_name(platform, separator=', ') + ' | group(s): ' \
           + ', '.join(groups_list) + ' | overlay group(s): ' + ', '.join(overlay_list)

    layer = get_layer_template_groups(stage[0].upper() + stage[1:] + ' - ' + platform_to_name(platform, separator=', '),
                                      max_count, desc, stage, platform, overlay_type)
    layer['techniques'] = technique_layer

    json_string = simplejson.dumps(layer).replace('}, ', '},\n')

    if stage == 'pre-attack':
        filename = '_'.join(groups_list)
    elif overlay:
        filename = platform_to_name(platform) + '_' + '_'.join(groups_list) + '-overlay_' + '_'.join(overlay_list)
    else:
        filename = platform_to_name(platform) + '_' + '_'.join(groups_list)

    write_file(stage, filename[:255], json_string)