80 lines
2.4 KiB
Python
80 lines
2.4 KiB
Python
|
|
# Basically we spit out a tree that looks like:
|
|
# | (root)
|
|
# | Virus
|
|
# | Boot
|
|
# | Marburg (samples with only one child do not create a leaf node, so they will)
|
|
# | DOS
|
|
# | Jerusalem
|
|
# | 664
|
|
# | Crypt.1808
|
|
# | Win9x
|
|
# | ...
|
|
#
|
|
# Basically, we build the tree, then walk it to execute the organize operation.
|
|
# The tree knows its parents, so to create names we can simply walk the tree backwards.
|
|
# Additionally, every sample is indicated as a leaf node
|
|
# (nevermind, this only partially holds true, so we DO have to introduce oob info for it :()
|
|
|
|
# Parses a text file containing VXHeaven sample identifiers into the tree
|
|
# [sample_tree].
|
|
def parse_into_tree(sample_tree, path):
|
|
listfile = open(path, 'r')
|
|
for line in listfile:
|
|
line = line.strip()
|
|
split = line.split(".")
|
|
#print(f'split: {split}')
|
|
type_leaf = None
|
|
platform_leaf = None
|
|
family_leaf = None
|
|
variant_leaf = None
|
|
|
|
# type ('Virus', 'Worm' so on)
|
|
if sample_tree.root.find_child(split[0]) is None:
|
|
#print(f'creating leaf for type {split[0]}')
|
|
type_leaf = sample_tree.create_leaf(split[0])
|
|
else:
|
|
type_leaf = sample_tree.root.find_child(split[0])
|
|
|
|
# no more items
|
|
if len(split) == 1:
|
|
type_leaf.is_sample = True
|
|
continue
|
|
|
|
# platform ('DOS', 'Win32' so on)
|
|
if type_leaf.find_child(split[1]) is None:
|
|
#print(f'creating leaf for type & platform {split[0]}.{split[1]}')
|
|
platform_leaf = type_leaf.create_child_leaf(split[1])
|
|
else:
|
|
platform_leaf = type_leaf.find_child(split[1])
|
|
|
|
# family
|
|
if platform_leaf.find_child(split[2]) is None:
|
|
#print(f'creating leaf for type & platform & family {split[0]}.{split[1]}.{split[2]}')
|
|
family_leaf = platform_leaf.create_child_leaf(split[2])
|
|
else:
|
|
family_leaf = platform_leaf.find_child(split[2])
|
|
|
|
# Hack, kind of.
|
|
# Some families have a sample without subvariants, then the subvariants;
|
|
# this handles that case in a relatively quick way that doesn't involve
|
|
# recursing into the tree a bunch.
|
|
if len(split) == 3:
|
|
family_leaf.is_sample = True
|
|
continue
|
|
|
|
# Subvariant
|
|
if len(split) > 4:
|
|
subvariants = split[3:]
|
|
leaf = family_leaf
|
|
for var in subvariants:
|
|
if leaf.find_child(var) is None:
|
|
leaf = leaf.create_child_leaf(var)
|
|
|
|
# The last node we arrive to is the sample's node
|
|
leaf.is_sample = True
|
|
else:
|
|
subvariant = split[3]
|
|
if family_leaf.find_child(subvariant) is None:
|
|
leaf = family_leaf.create_child_leaf(split[3])
|
|
leaf.is_sample = True
|