Compare commits
10 commits
a100d6e508
...
ac32c07d7f
Author | SHA1 | Date | |
---|---|---|---|
ac32c07d7f | |||
59ac84466f | |||
9af77c6159 | |||
a997a356d2 | |||
0eaca7d0c9 | |||
6f41bc9180 | |||
9bdff0be14 | |||
f8285f9e4c | |||
af49206a16 | |||
20d72e0cd2 |
19 changed files with 708 additions and 0 deletions
44
.clang-format
Executable file
44
.clang-format
Executable file
|
@ -0,0 +1,44 @@
|
|||
BasedOnStyle: Google
|
||||
|
||||
# force T* or T&
|
||||
DerivePointerAlignment: false
|
||||
PointerAlignment: Left
|
||||
|
||||
TabWidth: 4
|
||||
IndentWidth: 4
|
||||
UseTab: Always
|
||||
IndentPPDirectives: BeforeHash
|
||||
|
||||
AllowAllParametersOfDeclarationOnNextLine: true
|
||||
AllowShortBlocksOnASingleLine: false
|
||||
AllowShortFunctionsOnASingleLine: InlineOnly
|
||||
AllowShortIfStatementsOnASingleLine: Never
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
AllowShortCaseLabelsOnASingleLine: true
|
||||
|
||||
BinPackArguments: true
|
||||
BinPackParameters: true
|
||||
BreakConstructorInitializers: BeforeColon
|
||||
BreakStringLiterals: false
|
||||
|
||||
ColumnLimit: 150
|
||||
CompactNamespaces: false
|
||||
|
||||
ConstructorInitializerAllOnOneLineOrOnePerLine: true
|
||||
ContinuationIndentWidth: 0
|
||||
|
||||
# turning this on causes major issues with initializer lists
|
||||
Cpp11BracedListStyle: false
|
||||
SpaceBeforeCpp11BracedList: true
|
||||
|
||||
FixNamespaceComments: true
|
||||
|
||||
NamespaceIndentation: All
|
||||
ReflowComments: true
|
||||
|
||||
SortIncludes: CaseInsensitive
|
||||
SortUsingDeclarations: true
|
||||
|
||||
SpacesInSquareBrackets: false
|
||||
SpaceBeforeParens: Never
|
||||
SpacesBeforeTrailingComments: 1
|
7
.gitignore
vendored
Normal file
7
.gitignore
vendored
Normal file
|
@ -0,0 +1,7 @@
|
|||
/__pycache__
|
||||
/.cache
|
||||
*.o
|
||||
/vxorg
|
||||
/tree_test
|
||||
/testdata
|
||||
/compile_commands.json
|
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
[submodule "indicators"]
|
||||
path = indicators
|
||||
url = https://github.com/p-ranav/indicators
|
7
LICENSE
Normal file
7
LICENSE
Normal file
|
@ -0,0 +1,7 @@
|
|||
Copyright 2023-2024 Lily Tsuru
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
Makefile
Normal file
22
Makefile
Normal file
|
@ -0,0 +1,22 @@
|
|||
CXX = g++ -std=c++23 -O3 -I indicators/include
|
||||
|
||||
all: vxorg tree_test
|
||||
|
||||
clean:
|
||||
rm vxorg tree_test *.o
|
||||
|
||||
vxorg: vxorg.o vxheaven_parse.o threadpool.o
|
||||
$(CXX) $^ -o $@
|
||||
|
||||
tree_test: tree_test.o
|
||||
$(CXX) $^ -o $@
|
||||
|
||||
%.o: %.cpp
|
||||
$(CXX) -c $< -o $@
|
||||
|
||||
|
||||
# dep rules
|
||||
# I feel like it's 1970 again
|
||||
tree_test.o: tree.hpp
|
||||
vxorg.o: tree.hpp vxheaven_parse.hpp threadpool.hpp
|
||||
vxheaven_parse.o: tree.hpp vxheaven_parse.hpp
|
24
README.md
Normal file
24
README.md
Normal file
|
@ -0,0 +1,24 @@
|
|||
# vxorg
|
||||
|
||||
vxheaven organizer (converts it from a flat hierarchy of ~270k+ files to a neat tree). Originally written in python, I rewrote it in C++ for performance reasons.
|
||||
|
||||
# History
|
||||
|
||||
- 2018: I wrote a really shoddy attempt at doing organization in Bash. It sucked because I wasn't taking care of many idosyncracies about sample naming.
|
||||
- It also was very primitive and slow, since it would continually spawn `mv` processes just to move files. (same for `mkdir` too, but that is less of a concern since it's done less)
|
||||
- 2023: I wrote a new script in Python. It was "better" but still didn't work
|
||||
- I actually made the same mistake and tried to write in Bash again, but even Python was worlds faster, so I rewrote it in python
|
||||
- October 21, 2024: I decided to start rewriting the Python script I wrote to parse into a N-ary tree for memory savings while still allowing memoization. (and be modular instead of one blob)
|
||||
- Later in the day, as an experiment, I rewrote the parsing algorithm (fixing a bug in the process) in C++. It was 100x faster, so I committed to a rewrite in C++
|
||||
|
||||
# Building
|
||||
|
||||
`make`
|
||||
|
||||
# Usage
|
||||
|
||||
- Generate a list of samples.
|
||||
- `tar tf xxx/viruses-2010-05-18.tar.bz2 | sed 's/\.\///g' | awk NF | sort > list` is one option. Not the best but it's (basically) what I did
|
||||
- Run with `./vxorg list src/ dest/`
|
||||
- `dest/` will be created if it does not exist.
|
||||
- It will show a progress bar as it completes.
|
1
indicators
Submodule
1
indicators
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit 9c855c95e7782541a419597242535562fa9e41d7
|
3
python_refonly/README.md
Normal file
3
python_refonly/README.md
Normal file
|
@ -0,0 +1,3 @@
|
|||
# python
|
||||
|
||||
This was the original vxorg refactor/rewrite. It was abanodoned because the tree algorithms in python were so unbearably slow that even a iffy c++ reimplementation of the same tree is 100x faster
|
70
threadpool.cpp
Normal file
70
threadpool.cpp
Normal file
|
@ -0,0 +1,70 @@
|
|||
#include "threadpool.hpp"
|
||||
|
||||
void ThreadPool::ThreadEntry(ThreadPool* pPool, std::size_t myIndex) {
|
||||
auto& pool = *pPool;
|
||||
|
||||
// set a cutesy name
|
||||
#ifdef __linux__
|
||||
pthread_setname_np(pthread_self(), "PoolWorker");
|
||||
#endif
|
||||
|
||||
// The thread loop
|
||||
while(true) {
|
||||
{
|
||||
// wait for at least a single task, or shutdown notification (one of the two)
|
||||
std::unique_lock lk(pool.taskQueues[myIndex].lock);
|
||||
pool.queueCv.wait(lk, [&]() {
|
||||
if(pool.threadsShouldShutdown)
|
||||
return true;
|
||||
return !pool.taskQueues[myIndex].queue.empty();
|
||||
});
|
||||
}
|
||||
|
||||
// Exit if the pool is to shutdown
|
||||
if(pool.threadsShouldShutdown && pPool->QueueEmpty(myIndex))
|
||||
break;
|
||||
|
||||
// pop and run tasks until we run out of tasks to run
|
||||
{
|
||||
std::unique_lock lk(pool.taskQueues[myIndex].lock);
|
||||
|
||||
// TODO: Work-steal from other threads.
|
||||
|
||||
while(!pool.taskQueues[myIndex].queue.empty()) {
|
||||
auto& cb = pool.taskQueues[myIndex].queue.back();
|
||||
cb();
|
||||
pool.taskQueues[myIndex].queue.pop_back();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void ThreadPool::launch(std::size_t nrThreads) {
|
||||
threadsShouldShutdown = false;
|
||||
this->nrThreads = nrThreads;
|
||||
|
||||
threads.resize(this->nrThreads);
|
||||
taskQueues = new TaskQueue[this->nrThreads];
|
||||
|
||||
for(std::size_t i = 0; i < this->nrThreads; ++i)
|
||||
threads.emplace_back(std::thread(&ThreadEntry, this, i));
|
||||
}
|
||||
|
||||
// Shutdown the thread pool
|
||||
void ThreadPool::shutdown() {
|
||||
if(!threadsShouldShutdown)
|
||||
threadsShouldShutdown = true;
|
||||
|
||||
queueCv.notify_all();
|
||||
|
||||
// join all the threads (if possible) to make sure they all exit
|
||||
for(auto& thread : threads)
|
||||
if(thread.joinable())
|
||||
thread.join();
|
||||
|
||||
nrThreads = 0;
|
||||
|
||||
delete[] taskQueues;
|
||||
taskQueues = nullptr;
|
||||
}
|
92
threadpool.hpp
Normal file
92
threadpool.hpp
Normal file
|
@ -0,0 +1,92 @@
|
|||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
#include <deque>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
/// A simple thread pool executor.
|
||||
/// Not at all optimized, and probably terrible for latency.
|
||||
struct ThreadPool {
|
||||
ThreadPool() = default;
|
||||
|
||||
// shorthand to call launch(nrThreads) automatically
|
||||
inline explicit ThreadPool(std::size_t nrThreads) { launch(nrThreads); }
|
||||
|
||||
// move could be allowed, I guess
|
||||
ThreadPool(const ThreadPool&) = delete;
|
||||
ThreadPool(ThreadPool&&) = delete;
|
||||
|
||||
inline ~ThreadPool() { shutdown(); }
|
||||
|
||||
// takes anything that is callable with void() signature
|
||||
// This includes capturable lambdas, so be careful or make sure you're locking state!
|
||||
template <class Callable>
|
||||
void add_task(Callable&& cb) {
|
||||
auto worker = PickWorker();
|
||||
|
||||
//printf("picked worker %zu\n", worker);
|
||||
|
||||
// N.B: These wrappers still allow the thread to progress
|
||||
if(QueueLength(worker) >= 4) {
|
||||
//std::printf("queue for worker %zu too large. Blocking until it is empty\n", worker);
|
||||
while(!QueueEmpty(worker)) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
}
|
||||
}
|
||||
|
||||
// add it to the task queue for that thread
|
||||
{
|
||||
std::unique_lock lk(this->taskQueues[worker].lock);
|
||||
taskQueues[worker].queue.push_front(cb);
|
||||
}
|
||||
|
||||
// Wake threads up if they are waiting for work
|
||||
queueCv.notify_all();
|
||||
}
|
||||
|
||||
void launch(std::size_t nrThreads);
|
||||
// Shutdown the thread pool
|
||||
void shutdown();
|
||||
|
||||
private:
|
||||
// could just use unique_ptr<T[]> for both of these,
|
||||
// or an analogue, since they will only increase or decrease in size on a call to launch()
|
||||
std::vector<std::thread> threads {}; // or analogue
|
||||
|
||||
std::size_t nrThreads = 0;
|
||||
|
||||
struct TaskQueue {
|
||||
std::mutex lock {};
|
||||
std::deque<std::function<void()>> queue {};
|
||||
};
|
||||
|
||||
TaskQueue* taskQueues {};
|
||||
|
||||
// Used to notify threads when work is available or to shutdown
|
||||
std::condition_variable queueCv {};
|
||||
|
||||
/// Used to notify on shutdown
|
||||
std::atomic_bool threadsShouldShutdown { false };
|
||||
|
||||
// implement these out of line
|
||||
|
||||
std::size_t QueueLength(std::size_t worker) const {
|
||||
std::unique_lock lk(this->taskQueues[worker].lock);
|
||||
return this->taskQueues[worker].queue.size();
|
||||
}
|
||||
|
||||
bool QueueEmpty(std::size_t worker) const {
|
||||
std::unique_lock lk(this->taskQueues[worker].lock);
|
||||
return this->taskQueues[worker].queue.empty();
|
||||
}
|
||||
|
||||
std::size_t PickWorker() const {
|
||||
return std::rand() % nrThreads;
|
||||
}
|
||||
|
||||
static void ThreadEntry(ThreadPool* pPool, std::size_t myIndex);
|
||||
};
|
93
tree.hpp
Normal file
93
tree.hpp
Normal file
|
@ -0,0 +1,93 @@
|
|||
#pragma once
|
||||
#include <vector>
|
||||
|
||||
/// A simplistic N-ary/generic tree. Probably not very good for data locality.
|
||||
template <class T>
|
||||
struct Tree {
|
||||
// FIXME:
|
||||
// - make T not require default constructability
|
||||
// - move instead of copy into leaf
|
||||
// - use "btree-like" repressentation of N-ary nodes to save memory
|
||||
struct Node {
|
||||
protected:
|
||||
friend Tree;
|
||||
Node* parent = nullptr;
|
||||
std::vector<Node*> children {};
|
||||
T item {};
|
||||
|
||||
public:
|
||||
~Node() {
|
||||
for(auto& child : children)
|
||||
delete child;
|
||||
}
|
||||
|
||||
T& data() { return item; }
|
||||
|
||||
const T& data() const { return item; }
|
||||
|
||||
bool is_leaf() const { return children.size() == 0; }
|
||||
|
||||
bool is_root() const { return parent == nullptr; }
|
||||
|
||||
Node* parent_node() { return parent; }
|
||||
|
||||
Node* create_leaf(const T& item) {
|
||||
auto* node = new Node;
|
||||
node->parent = this;
|
||||
node->item = item;
|
||||
children.push_back(node);
|
||||
return node;
|
||||
}
|
||||
|
||||
template <class Fn>
|
||||
void walk(Fn&& fn) {
|
||||
fn(this);
|
||||
if(!is_leaf()) {
|
||||
for(auto& child : children)
|
||||
child->walk(fn);
|
||||
}
|
||||
}
|
||||
|
||||
template <class Pred>
|
||||
Node* find_child(Pred&& predicate) {
|
||||
if(predicate(this) == true)
|
||||
return this;
|
||||
|
||||
for(auto& child : children)
|
||||
if(predicate(child) == true)
|
||||
return child;
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::size_t parent_count() const {
|
||||
auto* parent = this->parent;
|
||||
auto parent_count = 0z;
|
||||
while(parent) {
|
||||
parent_count++;
|
||||
parent = parent->parent;
|
||||
}
|
||||
return parent_count;
|
||||
}
|
||||
};
|
||||
|
||||
Tree() { root = new Node; }
|
||||
|
||||
~Tree() { delete root; }
|
||||
|
||||
// Trees are not copyable but they can move
|
||||
Tree(const Tree&) = delete;
|
||||
Tree(Tree&&) = default;
|
||||
|
||||
template <class Fn>
|
||||
void walk(Fn&& fn) {
|
||||
root->walk(fn);
|
||||
}
|
||||
|
||||
Node* create_leaf(const T& item) { return root->create_leaf(item); }
|
||||
|
||||
Node* root_node() { return root; }
|
||||
|
||||
private:
|
||||
Node* root;
|
||||
};
|
35
tree_test.cpp
Normal file
35
tree_test.cpp
Normal file
|
@ -0,0 +1,35 @@
|
|||
#include "tree.hpp"
|
||||
#include <string>
|
||||
|
||||
void test_tree() {
|
||||
Tree<std::string> tree;
|
||||
|
||||
auto* virus = tree.create_leaf("Virus");
|
||||
auto* worm = tree.create_leaf("Worm");
|
||||
|
||||
auto* test = virus->create_leaf("test");
|
||||
|
||||
test->create_leaf("a");
|
||||
test->create_leaf("b");
|
||||
test->create_leaf("c");
|
||||
test->create_leaf("884");
|
||||
|
||||
tree.walk([](auto* node) {
|
||||
auto tab_count = node->parent_count();
|
||||
auto& data = node->data();
|
||||
|
||||
for(auto i = 0; i < tab_count; ++i)
|
||||
std::printf("\t");
|
||||
|
||||
if(node->is_root()) {
|
||||
std::printf("(root)\n");
|
||||
} else {
|
||||
std::printf("%s\n", data.c_str());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
int main() {
|
||||
test_tree();
|
||||
return 0;
|
||||
}
|
150
vxheaven_parse.cpp
Normal file
150
vxheaven_parse.cpp
Normal file
|
@ -0,0 +1,150 @@
|
|||
#include "vxheaven_parse.hpp"
|
||||
|
||||
#include <format>
|
||||
#include <fstream>
|
||||
#include <optional>
|
||||
#include <ranges>
|
||||
#include <span>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
namespace vxorg {
|
||||
|
||||
/// Wrapper for std::views::split.
|
||||
/// Make sure line outlives the vector.
|
||||
std::vector<std::string_view> split_by(const std::string& string, char delim) {
|
||||
auto res = std::vector<std::string_view> {};
|
||||
for(auto word : std::views::split(string, delim)) {
|
||||
res.push_back(std::string_view(word));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
template<class Fn>
|
||||
void walk_parents_in_tree_order(VxHeavenTree::Node* node, Fn&& fn) {
|
||||
std::string sample_name {};
|
||||
std::vector<VxHeavenTree::Node*> parent_list {};
|
||||
vxorg::VxHeavenTree::Node* parent = node->parent_node();
|
||||
|
||||
while(parent) {
|
||||
if(parent->is_root())
|
||||
break;
|
||||
|
||||
parent_list.push_back(parent);
|
||||
parent = parent->parent_node();
|
||||
}
|
||||
|
||||
for(auto& item : std::views::reverse(parent_list)) {
|
||||
fn(item);
|
||||
}
|
||||
}
|
||||
|
||||
std::string get_sample_name(VxHeavenTree::Node* node) {
|
||||
if(node == nullptr)
|
||||
return "";
|
||||
|
||||
if(!node->data().is_sample)
|
||||
return node->data().name;
|
||||
|
||||
std::string sample_name {};
|
||||
|
||||
walk_parents_in_tree_order(node, [&](auto* node) {
|
||||
sample_name += std::format("{}.", node->data().name);
|
||||
});
|
||||
|
||||
sample_name += node->data().name;
|
||||
|
||||
return sample_name;
|
||||
}
|
||||
|
||||
std::filesystem::path get_sample_path(VxHeavenTree::Node* node) {
|
||||
if(node == nullptr)
|
||||
return {};
|
||||
|
||||
std::filesystem::path path;
|
||||
walk_parents_in_tree_order(node, [&](auto* node) {
|
||||
path /= node->data().name;
|
||||
});
|
||||
|
||||
return path;
|
||||
}
|
||||
|
||||
void parse_into_tree(VxHeavenTree& tree, std::istream& is) {
|
||||
std::string line {};
|
||||
while(std::getline(is, line)) {
|
||||
auto split = split_by(line, '.');
|
||||
|
||||
VxHeavenTree::Node* type_leaf { nullptr };
|
||||
VxHeavenTree::Node* platform_leaf { nullptr };
|
||||
VxHeavenTree::Node* family_leaf { nullptr };
|
||||
VxHeavenTree::Node* sample_leaf { nullptr };
|
||||
|
||||
if(auto* node = tree.root_node()->find_child([&](auto* node) { return node->data().name == split[0]; }); node == nullptr) {
|
||||
// std::printf("making leaf for type %.*s\n", split[0].length(), split[0].data());
|
||||
type_leaf = tree.create_leaf({ .name = std::string(split[0].data(), split[0].length()), .is_sample = false });
|
||||
} else {
|
||||
// std::printf("using existing leaf for type %.*s\n", split[0].length(), split[0].data());
|
||||
type_leaf = node;
|
||||
}
|
||||
|
||||
if(split.size() == 1) {
|
||||
type_leaf->data().is_sample = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(auto* n = type_leaf->find_child([&](auto* node) {
|
||||
// auto matches = node->data().name == split[1];
|
||||
// std::printf("trying to find %.*s in node %s's child %s: %s\n", split[1].length(), split[1].data(),
|
||||
// type_leaf->data().name.c_str(), node->data().name.c_str(), matches ? "matches": "doesnt fucking match god damn it");
|
||||
return node->data().name == split[1];
|
||||
});
|
||||
n == nullptr) {
|
||||
// std::printf("making leaf for platform %s %.*s\n", type_leaf->data().name.c_str(), split[1].length(), split[1].data());
|
||||
platform_leaf = type_leaf->create_leaf({ .name = std::string(split[1].data(), split[1].length()), .is_sample = false });
|
||||
} else {
|
||||
// std::printf("using existing leaf for platform %.*s\n", split[1].length(), split[1].data());
|
||||
platform_leaf = n;
|
||||
}
|
||||
|
||||
if(auto* n = platform_leaf->find_child([&](auto* node) { return node->data().name == split[2]; }); n == nullptr) {
|
||||
// std::printf("making leaf for platform %s %.*s\n", type_leaf->data().name.c_str(), split[1].length(), split[1].data());
|
||||
family_leaf = platform_leaf->create_leaf({ .name = std::string(split[2].data(), split[2].length()), .is_sample = false });
|
||||
} else {
|
||||
// std::printf("using existing leaf for platform %.*s\n", split[1].length(), split[1].data());
|
||||
family_leaf = n;
|
||||
}
|
||||
|
||||
// Handle famlies with a variantless sample inside of them
|
||||
if(split.size() == 3) {
|
||||
family_leaf->data().is_sample = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(split.size() > 4) {
|
||||
auto subvariants = std::span(split.data() + 3, split.size() - 3);
|
||||
auto leaf = family_leaf;
|
||||
|
||||
for(auto& subvariant : subvariants) {
|
||||
if(auto* node = leaf->find_child([&](auto* node) { return node->data().name == subvariant; }); node == nullptr) {
|
||||
//printf("creating variant %.*s %.*s\n", split[2].length(), split[2].data(), subvariant.length(), subvariant.data());
|
||||
leaf = leaf->create_leaf({ .name = std::string(subvariant.data(), subvariant.length()), .is_sample = false });
|
||||
} else {
|
||||
// existing node for a subvariant
|
||||
//printf("existing variant %.*s %.*s\n", split[2].length(), split[2].data(), subvariant.length(), subvariant.data());
|
||||
leaf = node;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// The last node we visit is the sample
|
||||
leaf->data().is_sample = true;
|
||||
} else {
|
||||
auto subvariant = split[3];
|
||||
if(auto* node = family_leaf->find_child([&](auto* node) { return node->data().name == subvariant; }); node == nullptr) {
|
||||
family_leaf->create_leaf({ .name = std::string(subvariant.data(), subvariant.length()), .is_sample = true });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace vxorg
|
23
vxheaven_parse.hpp
Normal file
23
vxheaven_parse.hpp
Normal file
|
@ -0,0 +1,23 @@
|
|||
#pragma once
|
||||
#include <string>
|
||||
|
||||
#include "tree.hpp"
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
namespace vxorg {
|
||||
|
||||
struct VxHeavenItem {
|
||||
std::string name;
|
||||
// True if this item is also a sample
|
||||
bool is_sample;
|
||||
};
|
||||
|
||||
using VxHeavenTree = Tree<VxHeavenItem>;
|
||||
|
||||
std::string get_sample_name(VxHeavenTree::Node* node);
|
||||
|
||||
std::filesystem::path get_sample_path(VxHeavenTree::Node* node);
|
||||
|
||||
void parse_into_tree(VxHeavenTree& tree, std::istream& is);
|
||||
} // namespace vxorg
|
134
vxorg.cpp
Normal file
134
vxorg.cpp
Normal file
|
@ -0,0 +1,134 @@
|
|||
#include <cstring>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <indicators/progress_bar.hpp>
|
||||
#include <indicators/terminal_size.hpp>
|
||||
#include <string>
|
||||
|
||||
#include "indicators/terminal_size.hpp"
|
||||
#include "threadpool.hpp"
|
||||
#include "tree.hpp"
|
||||
#include "vxheaven_parse.hpp"
|
||||
|
||||
namespace ind = indicators;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
if(argc != 4) {
|
||||
std::fprintf(stderr, "usage: %s [path to list] [source path] [destination path]\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
vxorg::VxHeavenTree sample_tree;
|
||||
|
||||
// used for os filesystem ops
|
||||
ThreadPool filesystem_threadpool(4);
|
||||
|
||||
// Parse into the sample tree
|
||||
std::ifstream ifs(argv[1]);
|
||||
|
||||
if(!ifs) {
|
||||
char err[256]{};
|
||||
strerror_r(errno, &err[0], sizeof(err)-1);
|
||||
std::fprintf(stderr, "Could not open sample list \"%s\": %s", argv[1], err);
|
||||
return 1;
|
||||
}
|
||||
|
||||
vxorg::parse_into_tree(sample_tree, ifs);
|
||||
|
||||
std::filesystem::path unorganized_source_path = argv[2];
|
||||
std::filesystem::path organized_destination_path = argv[3];
|
||||
|
||||
|
||||
if(!std::filesystem::exists(unorganized_source_path)) {
|
||||
std::fprintf(stderr, "Source path \"%s\" does not exist", argv[2]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if(!std::filesystem::exists(organized_destination_path))
|
||||
std::filesystem::create_directories(organized_destination_path);
|
||||
|
||||
std::size_t sampleCount = 0;
|
||||
|
||||
// Walk the tree to get the amount of sample nodes
|
||||
sample_tree.walk([&](auto* node) {
|
||||
if(node->data().is_sample)
|
||||
sampleCount++;
|
||||
});
|
||||
|
||||
ind::ProgressBar bar { ind::option::BarWidth { indicators::terminal_width() - 64 },
|
||||
ind::option::Start { "[" },
|
||||
ind::option::Fill { "■" },
|
||||
ind::option::Lead { "■" },
|
||||
ind::option::Remainder { "-" },
|
||||
ind::option::End { " ]" },
|
||||
ind::option::ForegroundColor { ind::Color::red },
|
||||
ind::option::FontStyles { std::vector<ind::FontStyle> { ind::FontStyle::bold } },
|
||||
ind::option::MaxProgress { sampleCount } };
|
||||
|
||||
// Walk the tree to perform the operation
|
||||
sample_tree.walk([&](auto* node) {
|
||||
auto& data = node->data();
|
||||
|
||||
#if 0
|
||||
auto tabulation_level = node->parent_count();
|
||||
|
||||
if(tabulation_level != 0) {
|
||||
for(auto i = 0; i < tabulation_level; ++i) {
|
||||
std::printf("\t");
|
||||
}
|
||||
}
|
||||
|
||||
if(node->is_root()) {
|
||||
std::printf("(root)\n");
|
||||
} else {
|
||||
if(data.is_sample) {
|
||||
std::string sample_name = vxorg::get_sample_name(node);
|
||||
std::printf("%s (sample %s)\n", data.name.c_str(), sample_name.c_str());
|
||||
} else {
|
||||
std::printf("%s\n", data.name.c_str());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 1
|
||||
if(!node->is_root()) {
|
||||
if(data.is_sample) {
|
||||
std::string sample_name = vxorg::get_sample_name(node);
|
||||
|
||||
// paths
|
||||
auto path = organized_destination_path / vxorg::get_sample_path(node);
|
||||
auto source_path = unorganized_source_path / vxorg::get_sample_name(node);
|
||||
|
||||
if(!std::filesystem::exists(source_path)) {
|
||||
std::printf("WARNING: sample %s in tree (source disk file %s) does not exist\n", sample_name.c_str(),
|
||||
source_path.string().c_str());
|
||||
} else {
|
||||
filesystem_threadpool.add_task([path, source_path, sample_name, &bar]() {
|
||||
bar.set_option(ind::option::PostfixText { std::format("Moving {}", sample_name) });
|
||||
|
||||
auto dest_path = path / sample_name;
|
||||
|
||||
// possibly TOCTOUable but it should:tm: be fine?
|
||||
if(!std::filesystem::exists(path)) {
|
||||
std::filesystem::create_directories(path);
|
||||
}
|
||||
|
||||
if(std::filesystem::exists(dest_path)) {
|
||||
std::filesystem::remove(dest_path);
|
||||
}
|
||||
|
||||
std::filesystem::rename(source_path, dest_path);
|
||||
bar.tick();
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
});
|
||||
|
||||
filesystem_threadpool.shutdown();
|
||||
|
||||
bar.mark_as_completed();
|
||||
std::printf("Done.\n");
|
||||
return 0;
|
||||
}
|
Loading…
Reference in a new issue