# Basically we spit out a tree that looks like: # | (root) # | Virus # | Boot # | Marburg (samples with only one child do not create a leaf node, so they will) # | DOS # | Jerusalem # | 664 # | Crypt.1808 # | Win9x # | ... # # Basically, we build the tree, then walk it to execute the organize operation. # The tree knows its parents, so to create names we can simply walk the tree backwards. # Additionally, every sample is indicated as a leaf node # (nevermind, this only partially holds true, so we DO have to introduce oob info for it :() # Parses a text file containing VXHeaven sample identifiers into the tree # [sample_tree]. def parse_into_tree(sample_tree, path): listfile = open(path, 'r') for line in listfile: line = line.strip() split = line.split(".") #print(f'split: {split}') type_leaf = None platform_leaf = None family_leaf = None variant_leaf = None # type ('Virus', 'Worm' so on) if sample_tree.root.find_child(split[0]) is None: #print(f'creating leaf for type {split[0]}') type_leaf = sample_tree.create_leaf(split[0]) else: type_leaf = sample_tree.root.find_child(split[0]) # no more items if len(split) == 1: type_leaf.is_sample = True continue # platform ('DOS', 'Win32' so on) if type_leaf.find_child(split[1]) is None: #print(f'creating leaf for type & platform {split[0]}.{split[1]}') platform_leaf = type_leaf.create_child_leaf(split[1]) else: platform_leaf = type_leaf.find_child(split[1]) # family if platform_leaf.find_child(split[2]) is None: #print(f'creating leaf for type & platform & family {split[0]}.{split[1]}.{split[2]}') family_leaf = platform_leaf.create_child_leaf(split[2]) else: family_leaf = platform_leaf.find_child(split[2]) # Hack, kind of. # Some families have a sample without subvariants, then the subvariants; # this handles that case in a relatively quick way that doesn't involve # recursing into the tree a bunch. if len(split) == 3: family_leaf.is_sample = True continue # Subvariant if len(split) > 4: subvariants = split[3:] leaf = family_leaf for var in subvariants: if leaf.find_child(var) is None: leaf = leaf.create_child_leaf(var) # The last node we arrive to is the sample's node leaf.is_sample = True else: subvariant = split[3] if family_leaf.find_child(subvariant) is None: leaf = family_leaf.create_child_leaf(split[3]) leaf.is_sample = True