Start of rewrite
New things: - Multiple Python modules (I might rewrite this in C++ or something) - This uses a tree data structure now, which is much less rickity - Parsing is much less rickity too
This commit is contained in:
parent
02c2d520c5
commit
a100d6e508
4 changed files with 255 additions and 31 deletions
82
tree.py
Normal file
82
tree.py
Normal file
|
@ -0,0 +1,82 @@
|
||||||
|
# node of the tree
|
||||||
|
class TreeNode:
|
||||||
|
def __init__(self):
|
||||||
|
self.item = None # Can be anything (a string currently)
|
||||||
|
# Only one parent can exist
|
||||||
|
self.parent = None
|
||||||
|
self.is_sample = False
|
||||||
|
self.children = []
|
||||||
|
|
||||||
|
# creates a child node
|
||||||
|
def create_leaf(self):
|
||||||
|
node = TreeNode()
|
||||||
|
node.parent = self
|
||||||
|
self.children.append(node)
|
||||||
|
return node
|
||||||
|
|
||||||
|
# returns true if this node is an "leaf" node, i.e:
|
||||||
|
# it has no children to continue with
|
||||||
|
def is_leaf(self):
|
||||||
|
if len(self.children) == 0:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def create_child_leaf(self, item):
|
||||||
|
node = self.create_leaf()
|
||||||
|
node.item = item
|
||||||
|
return node
|
||||||
|
|
||||||
|
def walk_children(self, fn):
|
||||||
|
fn(self)
|
||||||
|
# walk children nodes if this node isn't an leaf node
|
||||||
|
if not self.is_leaf():
|
||||||
|
for node in self.children:
|
||||||
|
node.walk_children(fn)
|
||||||
|
|
||||||
|
# find a single child. Returns None if no child with name ecists
|
||||||
|
def find_child(self, name):
|
||||||
|
if self.item == name:
|
||||||
|
return self
|
||||||
|
for node in self.children:
|
||||||
|
if node.item == name:
|
||||||
|
return node
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_parent_count(self):
|
||||||
|
# Walk the parents to figure out current tree depth
|
||||||
|
# and how many
|
||||||
|
parent = self.parent
|
||||||
|
parent_count = 0
|
||||||
|
while parent is not None:
|
||||||
|
parent_count += 1
|
||||||
|
parent = parent.parent
|
||||||
|
return parent_count
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# def walk_children(self, fn):
|
||||||
|
# fn(self)
|
||||||
|
# for node in self.children:
|
||||||
|
# fn(node)
|
||||||
|
# for other_node in node.children:
|
||||||
|
# fn(other_node)
|
||||||
|
|
||||||
|
class Tree:
|
||||||
|
def __init__(self):
|
||||||
|
# create a root node
|
||||||
|
self.root = TreeNode()
|
||||||
|
#self.root.item = '(root)'
|
||||||
|
|
||||||
|
def walk(self, fn):
|
||||||
|
self.root.walk_children(fn)
|
||||||
|
#for node in self.root.children:
|
||||||
|
# node.walk_children(fn)
|
||||||
|
#fn(node)
|
||||||
|
#for other_node in node.children:
|
||||||
|
# fn(other_node)
|
||||||
|
|
||||||
|
# creates a leaf in the root node and populates it
|
||||||
|
def create_leaf(self, item):
|
||||||
|
leaf_node = self.root.create_child()
|
||||||
|
leaf_node.item = item
|
||||||
|
return leaf_node
|
48
tree_test.py
Normal file
48
tree_test.py
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
import tree
|
||||||
|
|
||||||
|
the_tree = tree.Tree()
|
||||||
|
|
||||||
|
# create leaf node
|
||||||
|
virus = the_tree.create_leaf('Virus')
|
||||||
|
virus2 = the_tree.create_leaf('Worm')
|
||||||
|
|
||||||
|
# create child leaf
|
||||||
|
test = virus.create_child_leaf('Test')
|
||||||
|
|
||||||
|
# create test items inside of 'test' leaf
|
||||||
|
v1 = test.create_child_leaf('a')
|
||||||
|
v2 = test.create_child_leaf('b')
|
||||||
|
v3 = test.create_child_leaf('c')
|
||||||
|
v4 = test.create_child_leaf('884')
|
||||||
|
|
||||||
|
|
||||||
|
def walk_cb(node):
|
||||||
|
ident = ''
|
||||||
|
|
||||||
|
# Walk the parents to figure out current tree depth
|
||||||
|
# and how many
|
||||||
|
parent_list = []
|
||||||
|
parent = node.parent
|
||||||
|
parent_count = 0
|
||||||
|
while parent is not None:
|
||||||
|
if parent.item is not None:
|
||||||
|
parent_list.append(parent.item)
|
||||||
|
parent_count += 1
|
||||||
|
parent = parent.parent
|
||||||
|
|
||||||
|
|
||||||
|
#for item in reversed(parent_list):
|
||||||
|
# ident += f'{item}.'
|
||||||
|
|
||||||
|
if node.item is not None:
|
||||||
|
ident += node.item
|
||||||
|
else:
|
||||||
|
ident = '[root]'
|
||||||
|
|
||||||
|
tab = ''
|
||||||
|
for i in range(0, parent_count):
|
||||||
|
tab += '\t'
|
||||||
|
|
||||||
|
print(f"{tab}{ident}")
|
||||||
|
|
||||||
|
the_tree.walk(walk_cb)
|
80
vxheaven_parse.py
Normal file
80
vxheaven_parse.py
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
|
||||||
|
# Basically we spit out a tree that looks like:
|
||||||
|
# | (root)
|
||||||
|
# | Virus
|
||||||
|
# | Boot
|
||||||
|
# | Marburg (samples with only one child do not create a leaf node, so they will)
|
||||||
|
# | DOS
|
||||||
|
# | Jerusalem
|
||||||
|
# | 664
|
||||||
|
# | Crypt.1808
|
||||||
|
# | Win9x
|
||||||
|
# | ...
|
||||||
|
#
|
||||||
|
# Basically, we build the tree, then walk it to execute the organize operation.
|
||||||
|
# The tree knows its parents, so to create names we can simply walk the tree backwards.
|
||||||
|
# Additionally, every sample is indicated as a leaf node
|
||||||
|
# (nevermind, this only partially holds true, so we DO have to introduce oob info for it :()
|
||||||
|
|
||||||
|
# Parses a text file containing VXHeaven sample identifiers into the tree
|
||||||
|
# [sample_tree].
|
||||||
|
def parse_into_tree(sample_tree, path):
|
||||||
|
listfile = open(path, 'r')
|
||||||
|
for line in listfile:
|
||||||
|
line = line.strip()
|
||||||
|
split = line.split(".")
|
||||||
|
#print(f'split: {split}')
|
||||||
|
type_leaf = None
|
||||||
|
platform_leaf = None
|
||||||
|
family_leaf = None
|
||||||
|
variant_leaf = None
|
||||||
|
|
||||||
|
# type ('Virus', 'Worm' so on)
|
||||||
|
if sample_tree.root.find_child(split[0]) is None:
|
||||||
|
#print(f'creating leaf for type {split[0]}')
|
||||||
|
type_leaf = sample_tree.create_leaf(split[0])
|
||||||
|
else:
|
||||||
|
type_leaf = sample_tree.root.find_child(split[0])
|
||||||
|
|
||||||
|
# no more items
|
||||||
|
if len(split) == 1:
|
||||||
|
type_leaf.is_sample = True
|
||||||
|
continue
|
||||||
|
|
||||||
|
# platform ('DOS', 'Win32' so on)
|
||||||
|
if type_leaf.find_child(split[1]) is None:
|
||||||
|
#print(f'creating leaf for type & platform {split[0]}.{split[1]}')
|
||||||
|
platform_leaf = type_leaf.create_child_leaf(split[1])
|
||||||
|
else:
|
||||||
|
platform_leaf = type_leaf.find_child(split[1])
|
||||||
|
|
||||||
|
# family
|
||||||
|
if platform_leaf.find_child(split[2]) is None:
|
||||||
|
#print(f'creating leaf for type & platform & family {split[0]}.{split[1]}.{split[2]}')
|
||||||
|
family_leaf = platform_leaf.create_child_leaf(split[2])
|
||||||
|
else:
|
||||||
|
family_leaf = platform_leaf.find_child(split[2])
|
||||||
|
|
||||||
|
# Hack, kind of.
|
||||||
|
# Some families have a sample without subvariants, then the subvariants;
|
||||||
|
# this handles that case in a relatively quick way that doesn't involve
|
||||||
|
# recursing into the tree a bunch.
|
||||||
|
if len(split) == 3:
|
||||||
|
family_leaf.is_sample = True
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Subvariant
|
||||||
|
if len(split) > 4:
|
||||||
|
subvariants = split[3:]
|
||||||
|
leaf = family_leaf
|
||||||
|
for var in subvariants:
|
||||||
|
if leaf.find_child(var) is None:
|
||||||
|
leaf = leaf.create_child_leaf(var)
|
||||||
|
|
||||||
|
# The last node we arrive to is the sample's node
|
||||||
|
leaf.is_sample = True
|
||||||
|
else:
|
||||||
|
subvariant = split[3]
|
||||||
|
if family_leaf.find_child(subvariant) is None:
|
||||||
|
leaf = family_leaf.create_child_leaf(split[3])
|
||||||
|
leaf.is_sample = True
|
76
vxorg.py
76
vxorg.py
|
@ -3,7 +3,7 @@
|
||||||
# simple script to organize the vxheaven collection
|
# simple script to organize the vxheaven collection
|
||||||
#
|
#
|
||||||
# Usage:
|
# Usage:
|
||||||
# Create input list with "ls > ../list" ran inside where you extracted the vxheaven archive
|
# Create input list
|
||||||
# Run this script
|
# Run this script
|
||||||
# ... Watch it go?
|
# ... Watch it go?
|
||||||
|
|
||||||
|
@ -13,43 +13,57 @@ import sys
|
||||||
|
|
||||||
from pathlib import Path, PurePath
|
from pathlib import Path, PurePath
|
||||||
|
|
||||||
class VxFolderInfo:
|
import tree
|
||||||
def __init__(self, vtype, platform, family, filename):
|
import vxheaven_parse
|
||||||
self.vtype = vtype
|
|
||||||
self.platform = platform
|
|
||||||
self.family = family
|
|
||||||
self.filename = filename
|
|
||||||
|
|
||||||
def MakePath(self):
|
# tree used to hold samples
|
||||||
return Path(os.getcwd()) / self.vtype / self.platform / self.family
|
sample_tree = tree.Tree()
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def get_sample_name_from_tree_node(node):
|
||||||
folderInfo = []
|
sample_name = ''
|
||||||
listfile = open('../list', 'r')
|
if not node.is_sample:
|
||||||
|
return sample_name
|
||||||
|
|
||||||
for line in listfile:
|
parent_list = []
|
||||||
line = line.strip()
|
parent = node.parent
|
||||||
split = line.split(".")
|
while parent is not None:
|
||||||
|
# reached the root node of the tree
|
||||||
|
if parent.item is None:
|
||||||
|
break
|
||||||
|
parent_list.append(parent.item)
|
||||||
|
parent = parent.parent
|
||||||
|
|
||||||
try:
|
for item in reversed(parent_list):
|
||||||
folderInfo.append(VxFolderInfo(split[0], split[1], split[2], line))
|
sample_name += f'{item}.'
|
||||||
except:
|
|
||||||
print(f'invalid format for {split}/{line}')
|
|
||||||
|
|
||||||
listfile.close()
|
sample_name += node.item
|
||||||
|
return sample_name
|
||||||
|
|
||||||
for item in folderInfo:
|
# python doesn't have true anonymous functions.
|
||||||
srcPath = Path(item.filename)
|
# I really regret writing this in python but i've sunk too much
|
||||||
dstPath = item.MakePath()
|
# in to rewrite this in C++ or something
|
||||||
|
def walk_cb(node):
|
||||||
|
node_name = '(root)'
|
||||||
|
sample_name = get_sample_name_from_tree_node(node)
|
||||||
|
|
||||||
if not dstPath.is_dir():
|
if node.item is not None:
|
||||||
print(f'making directory tree {str(dstPath)}')
|
node_name = node.item
|
||||||
dstPath.mkdir(parents=True)
|
|
||||||
|
|
||||||
if srcPath.is_file():
|
# Tab the list for clarity
|
||||||
newDst = dstPath / item.filename
|
tab = ''
|
||||||
print(f'moving {str(srcPath)} to {str(newDst)}')
|
for i in range(0, node.get_parent_count()):
|
||||||
srcPath.rename(newDst)
|
tab += '\t'
|
||||||
|
|
||||||
main()
|
leaf = ''
|
||||||
|
if node.is_sample:
|
||||||
|
leaf = f' (sample {sample_name})'
|
||||||
|
|
||||||
|
print(f"{tab}{node_name}{leaf}")
|
||||||
|
|
||||||
|
|
||||||
|
# Parse sample tree from vxheaven list
|
||||||
|
vxheaven_parse.parse_into_tree(sample_tree, './samples.sort')
|
||||||
|
|
||||||
|
# Walk the sample tree (currently, just dumps it for debugging)
|
||||||
|
sample_tree.walk(walk_cb)
|
Loading…
Reference in a new issue