Start of rewrite
New things: - Multiple Python modules (I might rewrite this in C++ or something) - This uses a tree data structure now, which is much less rickity - Parsing is much less rickity too
This commit is contained in:
parent
02c2d520c5
commit
a100d6e508
4 changed files with 255 additions and 31 deletions
82
tree.py
Normal file
82
tree.py
Normal file
|
@ -0,0 +1,82 @@
|
|||
# node of the tree
|
||||
class TreeNode:
|
||||
def __init__(self):
|
||||
self.item = None # Can be anything (a string currently)
|
||||
# Only one parent can exist
|
||||
self.parent = None
|
||||
self.is_sample = False
|
||||
self.children = []
|
||||
|
||||
# creates a child node
|
||||
def create_leaf(self):
|
||||
node = TreeNode()
|
||||
node.parent = self
|
||||
self.children.append(node)
|
||||
return node
|
||||
|
||||
# returns true if this node is an "leaf" node, i.e:
|
||||
# it has no children to continue with
|
||||
def is_leaf(self):
|
||||
if len(self.children) == 0:
|
||||
return True
|
||||
return False
|
||||
|
||||
def create_child_leaf(self, item):
|
||||
node = self.create_leaf()
|
||||
node.item = item
|
||||
return node
|
||||
|
||||
def walk_children(self, fn):
|
||||
fn(self)
|
||||
# walk children nodes if this node isn't an leaf node
|
||||
if not self.is_leaf():
|
||||
for node in self.children:
|
||||
node.walk_children(fn)
|
||||
|
||||
# find a single child. Returns None if no child with name ecists
|
||||
def find_child(self, name):
|
||||
if self.item == name:
|
||||
return self
|
||||
for node in self.children:
|
||||
if node.item == name:
|
||||
return node
|
||||
return None
|
||||
|
||||
def get_parent_count(self):
|
||||
# Walk the parents to figure out current tree depth
|
||||
# and how many
|
||||
parent = self.parent
|
||||
parent_count = 0
|
||||
while parent is not None:
|
||||
parent_count += 1
|
||||
parent = parent.parent
|
||||
return parent_count
|
||||
|
||||
|
||||
|
||||
# def walk_children(self, fn):
|
||||
# fn(self)
|
||||
# for node in self.children:
|
||||
# fn(node)
|
||||
# for other_node in node.children:
|
||||
# fn(other_node)
|
||||
|
||||
class Tree:
|
||||
def __init__(self):
|
||||
# create a root node
|
||||
self.root = TreeNode()
|
||||
#self.root.item = '(root)'
|
||||
|
||||
def walk(self, fn):
|
||||
self.root.walk_children(fn)
|
||||
#for node in self.root.children:
|
||||
# node.walk_children(fn)
|
||||
#fn(node)
|
||||
#for other_node in node.children:
|
||||
# fn(other_node)
|
||||
|
||||
# creates a leaf in the root node and populates it
|
||||
def create_leaf(self, item):
|
||||
leaf_node = self.root.create_child()
|
||||
leaf_node.item = item
|
||||
return leaf_node
|
48
tree_test.py
Normal file
48
tree_test.py
Normal file
|
@ -0,0 +1,48 @@
|
|||
import tree
|
||||
|
||||
the_tree = tree.Tree()
|
||||
|
||||
# create leaf node
|
||||
virus = the_tree.create_leaf('Virus')
|
||||
virus2 = the_tree.create_leaf('Worm')
|
||||
|
||||
# create child leaf
|
||||
test = virus.create_child_leaf('Test')
|
||||
|
||||
# create test items inside of 'test' leaf
|
||||
v1 = test.create_child_leaf('a')
|
||||
v2 = test.create_child_leaf('b')
|
||||
v3 = test.create_child_leaf('c')
|
||||
v4 = test.create_child_leaf('884')
|
||||
|
||||
|
||||
def walk_cb(node):
|
||||
ident = ''
|
||||
|
||||
# Walk the parents to figure out current tree depth
|
||||
# and how many
|
||||
parent_list = []
|
||||
parent = node.parent
|
||||
parent_count = 0
|
||||
while parent is not None:
|
||||
if parent.item is not None:
|
||||
parent_list.append(parent.item)
|
||||
parent_count += 1
|
||||
parent = parent.parent
|
||||
|
||||
|
||||
#for item in reversed(parent_list):
|
||||
# ident += f'{item}.'
|
||||
|
||||
if node.item is not None:
|
||||
ident += node.item
|
||||
else:
|
||||
ident = '[root]'
|
||||
|
||||
tab = ''
|
||||
for i in range(0, parent_count):
|
||||
tab += '\t'
|
||||
|
||||
print(f"{tab}{ident}")
|
||||
|
||||
the_tree.walk(walk_cb)
|
80
vxheaven_parse.py
Normal file
80
vxheaven_parse.py
Normal file
|
@ -0,0 +1,80 @@
|
|||
|
||||
# Basically we spit out a tree that looks like:
|
||||
# | (root)
|
||||
# | Virus
|
||||
# | Boot
|
||||
# | Marburg (samples with only one child do not create a leaf node, so they will)
|
||||
# | DOS
|
||||
# | Jerusalem
|
||||
# | 664
|
||||
# | Crypt.1808
|
||||
# | Win9x
|
||||
# | ...
|
||||
#
|
||||
# Basically, we build the tree, then walk it to execute the organize operation.
|
||||
# The tree knows its parents, so to create names we can simply walk the tree backwards.
|
||||
# Additionally, every sample is indicated as a leaf node
|
||||
# (nevermind, this only partially holds true, so we DO have to introduce oob info for it :()
|
||||
|
||||
# Parses a text file containing VXHeaven sample identifiers into the tree
|
||||
# [sample_tree].
|
||||
def parse_into_tree(sample_tree, path):
|
||||
listfile = open(path, 'r')
|
||||
for line in listfile:
|
||||
line = line.strip()
|
||||
split = line.split(".")
|
||||
#print(f'split: {split}')
|
||||
type_leaf = None
|
||||
platform_leaf = None
|
||||
family_leaf = None
|
||||
variant_leaf = None
|
||||
|
||||
# type ('Virus', 'Worm' so on)
|
||||
if sample_tree.root.find_child(split[0]) is None:
|
||||
#print(f'creating leaf for type {split[0]}')
|
||||
type_leaf = sample_tree.create_leaf(split[0])
|
||||
else:
|
||||
type_leaf = sample_tree.root.find_child(split[0])
|
||||
|
||||
# no more items
|
||||
if len(split) == 1:
|
||||
type_leaf.is_sample = True
|
||||
continue
|
||||
|
||||
# platform ('DOS', 'Win32' so on)
|
||||
if type_leaf.find_child(split[1]) is None:
|
||||
#print(f'creating leaf for type & platform {split[0]}.{split[1]}')
|
||||
platform_leaf = type_leaf.create_child_leaf(split[1])
|
||||
else:
|
||||
platform_leaf = type_leaf.find_child(split[1])
|
||||
|
||||
# family
|
||||
if platform_leaf.find_child(split[2]) is None:
|
||||
#print(f'creating leaf for type & platform & family {split[0]}.{split[1]}.{split[2]}')
|
||||
family_leaf = platform_leaf.create_child_leaf(split[2])
|
||||
else:
|
||||
family_leaf = platform_leaf.find_child(split[2])
|
||||
|
||||
# Hack, kind of.
|
||||
# Some families have a sample without subvariants, then the subvariants;
|
||||
# this handles that case in a relatively quick way that doesn't involve
|
||||
# recursing into the tree a bunch.
|
||||
if len(split) == 3:
|
||||
family_leaf.is_sample = True
|
||||
continue
|
||||
|
||||
# Subvariant
|
||||
if len(split) > 4:
|
||||
subvariants = split[3:]
|
||||
leaf = family_leaf
|
||||
for var in subvariants:
|
||||
if leaf.find_child(var) is None:
|
||||
leaf = leaf.create_child_leaf(var)
|
||||
|
||||
# The last node we arrive to is the sample's node
|
||||
leaf.is_sample = True
|
||||
else:
|
||||
subvariant = split[3]
|
||||
if family_leaf.find_child(subvariant) is None:
|
||||
leaf = family_leaf.create_child_leaf(split[3])
|
||||
leaf.is_sample = True
|
76
vxorg.py
76
vxorg.py
|
@ -3,7 +3,7 @@
|
|||
# simple script to organize the vxheaven collection
|
||||
#
|
||||
# Usage:
|
||||
# Create input list with "ls > ../list" ran inside where you extracted the vxheaven archive
|
||||
# Create input list
|
||||
# Run this script
|
||||
# ... Watch it go?
|
||||
|
||||
|
@ -13,43 +13,57 @@ import sys
|
|||
|
||||
from pathlib import Path, PurePath
|
||||
|
||||
class VxFolderInfo:
|
||||
def __init__(self, vtype, platform, family, filename):
|
||||
self.vtype = vtype
|
||||
self.platform = platform
|
||||
self.family = family
|
||||
self.filename = filename
|
||||
import tree
|
||||
import vxheaven_parse
|
||||
|
||||
def MakePath(self):
|
||||
return Path(os.getcwd()) / self.vtype / self.platform / self.family
|
||||
# tree used to hold samples
|
||||
sample_tree = tree.Tree()
|
||||
|
||||
|
||||
def main():
|
||||
folderInfo = []
|
||||
listfile = open('../list', 'r')
|
||||
def get_sample_name_from_tree_node(node):
|
||||
sample_name = ''
|
||||
if not node.is_sample:
|
||||
return sample_name
|
||||
|
||||
for line in listfile:
|
||||
line = line.strip()
|
||||
split = line.split(".")
|
||||
parent_list = []
|
||||
parent = node.parent
|
||||
while parent is not None:
|
||||
# reached the root node of the tree
|
||||
if parent.item is None:
|
||||
break
|
||||
parent_list.append(parent.item)
|
||||
parent = parent.parent
|
||||
|
||||
try:
|
||||
folderInfo.append(VxFolderInfo(split[0], split[1], split[2], line))
|
||||
except:
|
||||
print(f'invalid format for {split}/{line}')
|
||||
for item in reversed(parent_list):
|
||||
sample_name += f'{item}.'
|
||||
|
||||
listfile.close()
|
||||
sample_name += node.item
|
||||
return sample_name
|
||||
|
||||
for item in folderInfo:
|
||||
srcPath = Path(item.filename)
|
||||
dstPath = item.MakePath()
|
||||
# python doesn't have true anonymous functions.
|
||||
# I really regret writing this in python but i've sunk too much
|
||||
# in to rewrite this in C++ or something
|
||||
def walk_cb(node):
|
||||
node_name = '(root)'
|
||||
sample_name = get_sample_name_from_tree_node(node)
|
||||
|
||||
if not dstPath.is_dir():
|
||||
print(f'making directory tree {str(dstPath)}')
|
||||
dstPath.mkdir(parents=True)
|
||||
if node.item is not None:
|
||||
node_name = node.item
|
||||
|
||||
if srcPath.is_file():
|
||||
newDst = dstPath / item.filename
|
||||
print(f'moving {str(srcPath)} to {str(newDst)}')
|
||||
srcPath.rename(newDst)
|
||||
# Tab the list for clarity
|
||||
tab = ''
|
||||
for i in range(0, node.get_parent_count()):
|
||||
tab += '\t'
|
||||
|
||||
main()
|
||||
leaf = ''
|
||||
if node.is_sample:
|
||||
leaf = f' (sample {sample_name})'
|
||||
|
||||
print(f"{tab}{node_name}{leaf}")
|
||||
|
||||
|
||||
# Parse sample tree from vxheaven list
|
||||
vxheaven_parse.parse_into_tree(sample_tree, './samples.sort')
|
||||
|
||||
# Walk the sample tree (currently, just dumps it for debugging)
|
||||
sample_tree.walk(walk_cb)
|
Loading…
Reference in a new issue