vxorg/python_refonly/vxheaven_parse.py
modeco80 af49206a16 Rewrite it in C++
rust people are going to be very mad at me now /j
2024-10-21 12:22:35 -04:00

80 lines
2.4 KiB
Python

# Basically we spit out a tree that looks like:
# | (root)
# | Virus
# | Boot
# | Marburg (samples with only one child do not create a leaf node, so they will)
# | DOS
# | Jerusalem
# | 664
# | Crypt.1808
# | Win9x
# | ...
#
# Basically, we build the tree, then walk it to execute the organize operation.
# The tree knows its parents, so to create names we can simply walk the tree backwards.
# Additionally, every sample is indicated as a leaf node
# (nevermind, this only partially holds true, so we DO have to introduce oob info for it :()
# Parses a text file containing VXHeaven sample identifiers into the tree
# [sample_tree].
def parse_into_tree(sample_tree, path):
listfile = open(path, 'r')
for line in listfile:
line = line.strip()
split = line.split(".")
#print(f'split: {split}')
type_leaf = None
platform_leaf = None
family_leaf = None
variant_leaf = None
# type ('Virus', 'Worm' so on)
if sample_tree.root.find_child(split[0]) is None:
#print(f'creating leaf for type {split[0]}')
type_leaf = sample_tree.create_leaf(split[0])
else:
type_leaf = sample_tree.root.find_child(split[0])
# no more items
if len(split) == 1:
type_leaf.is_sample = True
continue
# platform ('DOS', 'Win32' so on)
if type_leaf.find_child(split[1]) is None:
#print(f'creating leaf for type & platform {split[0]}.{split[1]}')
platform_leaf = type_leaf.create_child_leaf(split[1])
else:
platform_leaf = type_leaf.find_child(split[1])
# family
if platform_leaf.find_child(split[2]) is None:
#print(f'creating leaf for type & platform & family {split[0]}.{split[1]}.{split[2]}')
family_leaf = platform_leaf.create_child_leaf(split[2])
else:
family_leaf = platform_leaf.find_child(split[2])
# Hack, kind of.
# Some families have a sample without subvariants, then the subvariants;
# this handles that case in a relatively quick way that doesn't involve
# recursing into the tree a bunch.
if len(split) == 3:
family_leaf.is_sample = True
continue
# Subvariant
if len(split) > 4:
subvariants = split[3:]
leaf = family_leaf
for var in subvariants:
if leaf.find_child(var) is None:
leaf = leaf.create_child_leaf(var)
# The last node we arrive to is the sample's node
leaf.is_sample = True
else:
subvariant = split[3]
if family_leaf.find_child(subvariant) is None:
leaf = family_leaf.create_child_leaf(split[3])
leaf.is_sample = True