libeuropa/io: Rewrite PakFile to use a sumtype to store pak file data
This allows pak writer file data to cleanly come from one of two possible sources: - A file on the filesystem (in which the PakWriter will open the file, tee it into the package file efficently, and then close it) - A data buffer (which functions like before) PakReader however will always output data buffers.
This commit is contained in:
parent
788fcd9677
commit
5060bc4fb6
9 changed files with 234 additions and 120 deletions
|
@ -11,6 +11,11 @@
|
|||
|
||||
#include <cstdint>
|
||||
#include <europa/structs/Pak.hpp>
|
||||
#include <europa/util/Overloaded.hpp>
|
||||
#include <filesystem>
|
||||
#include <stdexcept>
|
||||
#include <type_traits>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
namespace europa::io {
|
||||
|
@ -18,14 +23,72 @@ namespace europa::io {
|
|||
struct PakReader;
|
||||
struct PakWriter;
|
||||
|
||||
/// sumtype
|
||||
struct PakFileData {
|
||||
// clang-format off
|
||||
using Variant = std::variant<
|
||||
// File data
|
||||
std::vector<std::uint8_t>,
|
||||
|
||||
// Path
|
||||
std::filesystem::path
|
||||
>;
|
||||
// clang-format on
|
||||
|
||||
static PakFileData InitAsBuffer(std::vector<std::uint8_t>&& buffer) {
|
||||
return PakFileData {
|
||||
.variant_ = Variant(std::move(buffer))
|
||||
};
|
||||
}
|
||||
|
||||
static PakFileData InitAsPath(const std::filesystem::path& path) {
|
||||
return PakFileData {
|
||||
.variant_ = Variant(path)
|
||||
};
|
||||
}
|
||||
|
||||
std::uint32_t GetSize() const {
|
||||
// FIXME: make this just a overloaded lambda
|
||||
struct SizeVisitor {
|
||||
std::uint32_t& size;
|
||||
|
||||
// bleh
|
||||
void operator()(std::vector<uint8_t>& buffer) {
|
||||
size = static_cast<std::uint32_t>(buffer.size());
|
||||
}
|
||||
|
||||
void operator()(std::filesystem::path& fsPath) {
|
||||
if(!std::filesystem::exists(fsPath) && !std::filesystem::is_regular_file(fsPath))
|
||||
throw std::runtime_error("invalid path in path file");
|
||||
size = static_cast<std::uint32_t>(std::filesystem::file_size(fsPath));
|
||||
}
|
||||
};
|
||||
|
||||
std::uint32_t size {};
|
||||
auto visitor = SizeVisitor { size };
|
||||
|
||||
std::visit(visitor, variant_);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
const T* GetIf() const {
|
||||
return std::get_if<T>(&variant_);
|
||||
}
|
||||
|
||||
// private:
|
||||
PakFileData::Variant variant_;
|
||||
};
|
||||
|
||||
/// Repressents a package file.
|
||||
/// FIXME: Maybe make this not hold a buffer at some point,
|
||||
/// or a sumtype which can contain either buffer OR path to os file
|
||||
/// (which we can then efficiently tee into)
|
||||
struct PakFile {
|
||||
using DataType = std::vector<std::uint8_t>;
|
||||
using DataType = PakFileData;
|
||||
|
||||
template<class T>
|
||||
template <class T>
|
||||
void InitAs(const T& value) {
|
||||
toc = value;
|
||||
}
|
||||
|
@ -33,78 +96,88 @@ namespace europa::io {
|
|||
void InitAs(structs::PakVersion version) {
|
||||
switch(version) {
|
||||
case structs::PakVersion::Ver3:
|
||||
toc = structs::PakHeader_V3::TocEntry{};
|
||||
toc = structs::PakHeader_V3::TocEntry {};
|
||||
break;
|
||||
case structs::PakVersion::Ver4:
|
||||
toc = structs::PakHeader_V4::TocEntry{};
|
||||
toc = structs::PakHeader_V4::TocEntry {};
|
||||
break;
|
||||
case structs::PakVersion::Ver5:
|
||||
toc = structs::PakHeader_V5::TocEntry{};
|
||||
toc = structs::PakHeader_V5::TocEntry {};
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool HasData() const {
|
||||
return fileData.has_value();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the file data.
|
||||
*/
|
||||
[[nodiscard]] const DataType& GetData() const {
|
||||
return data;
|
||||
if(!fileData.has_value())
|
||||
throw std::runtime_error("no file data to get!");
|
||||
return fileData.value();
|
||||
}
|
||||
|
||||
/// Sets data.
|
||||
void SetData(DataType&& data) {
|
||||
this->fileData = std::move(data);
|
||||
|
||||
// Update the TOC size.
|
||||
std::visit([&](auto& entry) {
|
||||
entry.size = this->fileData.value().GetSize();
|
||||
},
|
||||
toc);
|
||||
}
|
||||
|
||||
/// Purge read file data.
|
||||
void PurgeData() {
|
||||
this->fileData = std::nullopt;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the TOC entry responsible.
|
||||
*/
|
||||
template<class T>
|
||||
template <class T>
|
||||
[[nodiscard]] const T& GetTOCEntry() const {
|
||||
return std::get<T>(toc);
|
||||
}
|
||||
|
||||
void SetData(DataType&& data) {
|
||||
this->data = std::move(data);
|
||||
|
||||
// Update the TOC size.
|
||||
std::visit([&](auto& entry) {
|
||||
entry.size = this->data.size();
|
||||
}, toc);
|
||||
}
|
||||
|
||||
std::uint32_t GetCreationUnixTime() const {
|
||||
std::uint32_t time{};
|
||||
std::uint32_t time {};
|
||||
|
||||
std::visit([&](auto& entry) {
|
||||
time = entry.creationUnixTime;
|
||||
}, toc);
|
||||
},
|
||||
toc);
|
||||
|
||||
return time;
|
||||
}
|
||||
|
||||
std::uint32_t GetOffset() const {
|
||||
std::uint32_t size{};
|
||||
std::uint32_t size {};
|
||||
|
||||
std::visit([&](auto& entry) {
|
||||
size = entry.offset;
|
||||
}, toc);
|
||||
},
|
||||
toc);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
std::uint32_t GetSize() const {
|
||||
std::uint32_t size{};
|
||||
std::uint32_t size {};
|
||||
|
||||
std::visit([&](auto& entry) {
|
||||
size = entry.size;
|
||||
}, toc);
|
||||
},
|
||||
toc);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
void FillTOCEntry() {
|
||||
std::visit([&](auto& entry) {
|
||||
entry.size = static_cast<std::uint32_t>(data.size());
|
||||
}, toc);
|
||||
}
|
||||
|
||||
template<class Cb>
|
||||
template <class Cb>
|
||||
void Visit(const Cb& cb) {
|
||||
std::visit(cb, toc);
|
||||
}
|
||||
|
@ -113,7 +186,7 @@ namespace europa::io {
|
|||
friend PakReader;
|
||||
friend PakWriter;
|
||||
|
||||
std::vector<std::uint8_t> data;
|
||||
std::optional<PakFileData> fileData;
|
||||
structs::PakTocEntryVariant toc;
|
||||
};
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
//
|
||||
// (C) 2021-2022 modeco80 <lily.modeco80@protonmail.ch>
|
||||
//
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
// SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
//
|
||||
|
||||
#ifndef EUROPA_UTIL_FIXEDSTRING_H
|
||||
|
|
24
include/europa/util/Overloaded.hpp
Normal file
24
include/europa/util/Overloaded.hpp
Normal file
|
@ -0,0 +1,24 @@
|
|||
//
|
||||
// EuropaTools
|
||||
//
|
||||
// (C) 2021-2025 modeco80 <lily.modeco80@protonmail.ch>
|
||||
//
|
||||
// SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
//
|
||||
|
||||
#ifndef EUROPA_UTIL_OVERLOADED_HPP
|
||||
#define EUROPA_UTIL_OVERLOADED_HPP
|
||||
|
||||
namespace europa {
|
||||
template <class... Ts>
|
||||
struct overloaded : Ts... {
|
||||
using Ts::operator()...;
|
||||
};
|
||||
|
||||
// Suppposedly this isn't needed but the CTAD is required in this case
|
||||
|
||||
template <class... Ts>
|
||||
overloaded(Ts...) -> overloaded<Ts...>;
|
||||
} // namespace europa
|
||||
|
||||
#endif
|
|
@ -6,15 +6,18 @@
|
|||
// SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
//
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <europa/io/PakReader.hpp>
|
||||
#include <europa/structs/Pak.hpp>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "europa/io/PakFile.hpp"
|
||||
#include "StreamUtils.h"
|
||||
|
||||
namespace europa::io {
|
||||
|
||||
/*
|
||||
/*
|
||||
inline std::optional<PakHeader> GetPakHeader(const PakHeader_Common& common_header) {
|
||||
switch(common_header.version) {
|
||||
case PakVersion::Ver3:
|
||||
|
@ -33,13 +36,11 @@ namespace europa::io {
|
|||
}
|
||||
*/
|
||||
|
||||
|
||||
|
||||
PakReader::PakReader(std::istream& is)
|
||||
: stream(is) {
|
||||
}
|
||||
|
||||
template<class T>
|
||||
template <class T>
|
||||
void PakReader::ReadData_Impl() {
|
||||
auto header_type = impl::ReadStreamType<T>(stream);
|
||||
|
||||
|
@ -48,7 +49,7 @@ namespace europa::io {
|
|||
return;
|
||||
}
|
||||
|
||||
bool isStreams{false};
|
||||
bool isStreams { false };
|
||||
|
||||
if(header_type.tocOffset > 0x17000000)
|
||||
isStreams = true;
|
||||
|
@ -67,7 +68,6 @@ namespace europa::io {
|
|||
files[filename].Visit([&](auto& tocEntry) {
|
||||
tocEntry.creationUnixTime = impl::ReadStreamType<structs::u32>(stream);
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
header = header_type;
|
||||
|
@ -101,16 +101,22 @@ namespace europa::io {
|
|||
|
||||
void PakReader::ReadFile(const std::string& file) {
|
||||
auto& fileObject = files[file];
|
||||
std::vector<std::uint8_t> buffer;
|
||||
|
||||
buffer.resize(fileObject.GetSize());
|
||||
|
||||
// This file was already read in, or has data
|
||||
// the user may not want to overwrite.
|
||||
if(!fileObject.data.empty())
|
||||
if(!fileObject.HasData())
|
||||
return;
|
||||
|
||||
fileObject.data.resize(fileObject.GetSize());
|
||||
|
||||
stream.seekg(fileObject.GetOffset(), std::istream::beg);
|
||||
stream.read(reinterpret_cast<char*>(&fileObject.data[0]), fileObject.GetSize());
|
||||
stream.read(reinterpret_cast<char*>(&buffer[0]), buffer.size());
|
||||
if(!stream)
|
||||
throw std::runtime_error("Stream went bad while trying to read file");
|
||||
|
||||
auto data = PakFileData::InitAsBuffer(std::move(buffer));
|
||||
fileObject.SetData(std::move(data));
|
||||
}
|
||||
|
||||
PakReader::MapType& PakReader::GetFiles() {
|
||||
|
|
|
@ -9,10 +9,13 @@
|
|||
#include <algorithm>
|
||||
#include <europa/io/PakWriter.hpp>
|
||||
#include <europa/util/TupleElement.hpp>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "StreamUtils.h"
|
||||
#include "europa/structs/Pak.hpp"
|
||||
#include "StreamUtils.h"
|
||||
|
||||
namespace europa::io {
|
||||
|
||||
|
@ -26,12 +29,12 @@ namespace europa::io {
|
|||
|
||||
// move to a util/ header
|
||||
|
||||
template<class T>
|
||||
template <class T>
|
||||
constexpr T AlignBy(T value, std::size_t alignment) {
|
||||
return (-value) & alignment - 1;
|
||||
}
|
||||
|
||||
void PakWriter::Write(std::ostream &os, std::vector<FlattenedType> &&vec, PakProgressReportSink &sink) {
|
||||
void PakWriter::Write(std::ostream& os, std::vector<FlattenedType>&& vec, PakProgressReportSink& sink) {
|
||||
switch(version) {
|
||||
case structs::PakVersion::Ver3:
|
||||
WriteImpl<structs::PakHeader_V3>(os, std::move(vec), sink);
|
||||
|
@ -42,28 +45,28 @@ namespace europa::io {
|
|||
case structs::PakVersion::Ver5:
|
||||
WriteImpl<structs::PakHeader_V3>(os, std::move(vec), sink);
|
||||
break;
|
||||
default:
|
||||
throw std::invalid_argument("Invalid version");
|
||||
}
|
||||
}
|
||||
|
||||
template<class T>
|
||||
template <class THeader>
|
||||
void PakWriter::WriteImpl(std::ostream& os, std::vector<FlattenedType>&& vec, PakProgressReportSink& sink, bool sectorAligned) {
|
||||
|
||||
std::vector<FlattenedType> sortedFiles = std::move(vec);
|
||||
|
||||
T pakHeader{};
|
||||
THeader pakHeader {};
|
||||
|
||||
// Sort the flattened array by file size, the biggest first.
|
||||
// Doesn't seem to help (neither does name length)
|
||||
std::ranges::sort(sortedFiles, std::greater{}, [](const FlattenedType& elem) {
|
||||
// Sort the flattened array.
|
||||
std::ranges::sort(sortedFiles, std::greater {}, [](const FlattenedType& elem) {
|
||||
return elem.second.GetCreationUnixTime();
|
||||
});
|
||||
|
||||
// Leave space for the header
|
||||
os.seekp(sizeof(T), std::ostream::beg);
|
||||
os.seekp(sizeof(THeader), std::ostream::beg);
|
||||
|
||||
// Version 5 paks seem to have an additional bit of reserved data
|
||||
// (which is all zeros.)
|
||||
if(T::VERSION == structs::PakVersion::Ver5) {
|
||||
if(THeader::VERSION == structs::PakVersion::Ver5) {
|
||||
os.seekp(6, std::ostream::cur);
|
||||
}
|
||||
|
||||
|
@ -71,48 +74,48 @@ namespace europa::io {
|
|||
if(sectorAligned)
|
||||
os.seekp(
|
||||
AlignBy(os.tellp(), kCDSectorSize),
|
||||
std::istream::beg
|
||||
);
|
||||
std::istream::beg);
|
||||
|
||||
// Write file data
|
||||
// Write all the file data
|
||||
for(auto& [filename, file] : sortedFiles) {
|
||||
sink.OnEvent({
|
||||
PakProgressReportSink::FileEvent::Type::FileBeginWrite,
|
||||
filename
|
||||
});
|
||||
sink.OnEvent({ PakProgressReportSink::FileEvent::Type::FileBeginWrite,
|
||||
filename });
|
||||
|
||||
// Update the offset to where we currently are, since we will be writing the file there
|
||||
file.Visit([&](auto& tocEntry) {
|
||||
tocEntry.offset = os.tellp();
|
||||
});
|
||||
|
||||
// FIXME: Should we rely on GetSize() when writing? Honestly, it seems like a bit of a
|
||||
// mistake that caused a pretty glaring bug.
|
||||
os.write(reinterpret_cast<const char*>(file.GetData().data()), file.GetSize());
|
||||
auto& fileData = file.GetData();
|
||||
|
||||
// FIXME: use a visitor or something. For now I'm lazy and this should work
|
||||
if(auto* path = fileData.template GetIf<std::filesystem::path>(); path) {
|
||||
auto fs = std::ifstream((*path).string(), std::ifstream::binary);
|
||||
if(!fs)
|
||||
throw std::runtime_error("couldnt open input file? HOW");
|
||||
|
||||
// tee data from the file stream efficiently
|
||||
impl::TeeInOut(fs, os);
|
||||
} else if(auto* buffer = fileData.template GetIf<std::vector<std::uint8_t>>(); buffer) {
|
||||
os.write(reinterpret_cast<const char*>((*buffer).data()), file.GetSize());
|
||||
}
|
||||
|
||||
// Align to sector boundary.
|
||||
if(sectorAligned)
|
||||
os.seekp(
|
||||
AlignBy(os.tellp(), kCDSectorSize),
|
||||
std::istream::beg
|
||||
);
|
||||
std::istream::beg);
|
||||
|
||||
sink.OnEvent({
|
||||
PakProgressReportSink::FileEvent::Type::FileEndWrite,
|
||||
filename
|
||||
});
|
||||
sink.OnEvent({ PakProgressReportSink::FileEvent::Type::FileEndWrite,
|
||||
filename });
|
||||
}
|
||||
|
||||
pakHeader.tocOffset = os.tellp();
|
||||
|
||||
sink.OnEvent({
|
||||
PakProgressReportSink::PakEvent::Type::WritingToc
|
||||
});
|
||||
sink.OnEvent({ PakProgressReportSink::PakEvent::Type::WritingToc });
|
||||
|
||||
// Write the TOC
|
||||
for(auto& [filename, file] : sortedFiles) {
|
||||
file.FillTOCEntry();
|
||||
|
||||
// Write the filename Pascal string.
|
||||
os.put(static_cast<char>(filename.length() + 1));
|
||||
for(const auto c : filename)
|
||||
|
@ -124,20 +127,14 @@ namespace europa::io {
|
|||
});
|
||||
}
|
||||
|
||||
|
||||
sink.OnEvent({
|
||||
PakProgressReportSink::PakEvent::Type::FillInHeader
|
||||
});
|
||||
sink.OnEvent({ PakProgressReportSink::PakEvent::Type::FillInHeader });
|
||||
|
||||
// Fill out the rest of the header.
|
||||
pakHeader.fileCount = sortedFiles.size();
|
||||
pakHeader.tocSize = static_cast<std::uint32_t>(os.tellp()) - (pakHeader.tocOffset - 1);
|
||||
pakHeader.creationUnixTime = 132890732;
|
||||
|
||||
|
||||
sink.OnEvent({
|
||||
PakProgressReportSink::PakEvent::Type::WritingHeader
|
||||
});
|
||||
sink.OnEvent({ PakProgressReportSink::PakEvent::Type::WritingHeader });
|
||||
|
||||
// As the last step, write it.
|
||||
os.seekp(0, std::ostream::beg);
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
//
|
||||
|
||||
#include "StreamUtils.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace europa::io::impl {
|
||||
|
@ -65,4 +66,15 @@ namespace europa::io::impl {
|
|||
}
|
||||
}
|
||||
|
||||
void TeeInOut(std::istream& is, std::ostream& os) {
|
||||
std::uint8_t buffer[4096] {};
|
||||
|
||||
while(!is.eof()) {
|
||||
if(!is.read(reinterpret_cast<char*>(&buffer[0]), sizeof(buffer)))
|
||||
break;
|
||||
|
||||
os.write(reinterpret_cast<char*>(&buffer[0]), is.gcount());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace europa::io::impl
|
||||
|
|
|
@ -53,6 +53,9 @@ namespace europa::io::impl {
|
|||
std::string ReadZeroTerminatedString(std::istream& is);
|
||||
std::string ReadPString(std::istream& is);
|
||||
|
||||
/// Tees a input stream to an output stream until the input stream signals EOF.
|
||||
void TeeInOut(std::istream& is, std::ostream& os);
|
||||
|
||||
} // namespace europa::io::impl
|
||||
|
||||
#endif // EUROPA_TOOLS_STREAMUTILS_H
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include <iostream>
|
||||
#include <tasks/CreateTask.hpp>
|
||||
#include <Utils.hpp>
|
||||
#include "europa/io/PakFile.hpp"
|
||||
|
||||
namespace eupak::tasks {
|
||||
|
||||
|
@ -125,21 +126,8 @@ namespace eupak::tasks {
|
|||
|
||||
progress.set_option(indicators::option::PostfixText { relativePathName + " (" + std::to_string(currFile + 1) + '/' + std::to_string(fileCount) + ")" });
|
||||
|
||||
std::ifstream ifs(ent.path(), std::ifstream::binary);
|
||||
|
||||
if(!ifs) {
|
||||
std::cout << "Error: Couldn't open file for archive path \"" << relativePathName << "\"\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
europa::io::PakFile file;
|
||||
europa::io::PakFile::DataType pakData;
|
||||
|
||||
ifs.seekg(0, std::ifstream::end);
|
||||
pakData.resize(ifs.tellg());
|
||||
ifs.seekg(0, std::ifstream::beg);
|
||||
|
||||
ifs.read(reinterpret_cast<char*>(&pakData[0]), pakData.size());
|
||||
europa::io::PakFile::DataType pakData = europa::io::PakFileData::InitAsPath(ent.path());
|
||||
|
||||
file.InitAs(args.pakVersion);
|
||||
|
||||
|
|
|
@ -6,13 +6,13 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
//
|
||||
|
||||
#include <tasks/ExtractTask.hpp>
|
||||
|
||||
#include <europa/io/PakReader.hpp>
|
||||
#include <fstream>
|
||||
#include <indicators/cursor_control.hpp>
|
||||
#include <indicators/progress_bar.hpp>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <tasks/ExtractTask.hpp>
|
||||
|
||||
// this actually is pretty fast so maybe I won't bother doing crazy thread optimizations..
|
||||
|
||||
|
@ -80,8 +80,19 @@ namespace eupak::tasks {
|
|||
std::cerr << "Extracting file \"" << filename << "\"...\n";
|
||||
}
|
||||
|
||||
ofs.write(reinterpret_cast<const char*>(file.GetData().data()), static_cast<std::streampos>(file.GetSize()));
|
||||
{
|
||||
auto& fileData = file.GetData();
|
||||
if(auto* buffer = fileData.GetIf<std::vector<std::uint8_t>>(); buffer) {
|
||||
ofs.write(reinterpret_cast<const char*>((*buffer).data()), (*buffer).size());
|
||||
ofs.flush();
|
||||
} else {
|
||||
throw std::runtime_error("???? why are we getting paths here?");
|
||||
}
|
||||
}
|
||||
|
||||
// We no longer need the file data anymore, so let's purge it to save memory
|
||||
file.PurgeData();
|
||||
|
||||
progress.tick();
|
||||
}
|
||||
|
||||
|
@ -89,4 +100,4 @@ namespace eupak::tasks {
|
|||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
} // namespace eupak::tasks
|
Loading…
Reference in a new issue