libeuropa/io: Rewrite PakFile to use a sumtype to store pak file data

This allows pak writer file data to cleanly come from one of two possible sources:

- A file on the filesystem (in which the PakWriter will open the file, tee it into the package file efficently, and then close it)
- A data buffer (which functions like before)

PakReader however will always output data buffers.
This commit is contained in:
Lily Tsuru 2025-01-06 17:12:58 -05:00
parent 788fcd9677
commit 5060bc4fb6
9 changed files with 234 additions and 120 deletions

View file

@ -11,6 +11,11 @@
#include <cstdint> #include <cstdint>
#include <europa/structs/Pak.hpp> #include <europa/structs/Pak.hpp>
#include <europa/util/Overloaded.hpp>
#include <filesystem>
#include <stdexcept>
#include <type_traits>
#include <variant>
#include <vector> #include <vector>
namespace europa::io { namespace europa::io {
@ -18,14 +23,72 @@ namespace europa::io {
struct PakReader; struct PakReader;
struct PakWriter; struct PakWriter;
/// sumtype
struct PakFileData {
// clang-format off
using Variant = std::variant<
// File data
std::vector<std::uint8_t>,
// Path
std::filesystem::path
>;
// clang-format on
static PakFileData InitAsBuffer(std::vector<std::uint8_t>&& buffer) {
return PakFileData {
.variant_ = Variant(std::move(buffer))
};
}
static PakFileData InitAsPath(const std::filesystem::path& path) {
return PakFileData {
.variant_ = Variant(path)
};
}
std::uint32_t GetSize() const {
// FIXME: make this just a overloaded lambda
struct SizeVisitor {
std::uint32_t& size;
// bleh
void operator()(std::vector<uint8_t>& buffer) {
size = static_cast<std::uint32_t>(buffer.size());
}
void operator()(std::filesystem::path& fsPath) {
if(!std::filesystem::exists(fsPath) && !std::filesystem::is_regular_file(fsPath))
throw std::runtime_error("invalid path in path file");
size = static_cast<std::uint32_t>(std::filesystem::file_size(fsPath));
}
};
std::uint32_t size {};
auto visitor = SizeVisitor { size };
std::visit(visitor, variant_);
return size;
}
template <class T>
const T* GetIf() const {
return std::get_if<T>(&variant_);
}
// private:
PakFileData::Variant variant_;
};
/// Repressents a package file. /// Repressents a package file.
/// FIXME: Maybe make this not hold a buffer at some point, /// FIXME: Maybe make this not hold a buffer at some point,
/// or a sumtype which can contain either buffer OR path to os file /// or a sumtype which can contain either buffer OR path to os file
/// (which we can then efficiently tee into) /// (which we can then efficiently tee into)
struct PakFile { struct PakFile {
using DataType = std::vector<std::uint8_t>; using DataType = PakFileData;
template<class T> template <class T>
void InitAs(const T& value) { void InitAs(const T& value) {
toc = value; toc = value;
} }
@ -33,78 +96,88 @@ namespace europa::io {
void InitAs(structs::PakVersion version) { void InitAs(structs::PakVersion version) {
switch(version) { switch(version) {
case structs::PakVersion::Ver3: case structs::PakVersion::Ver3:
toc = structs::PakHeader_V3::TocEntry{}; toc = structs::PakHeader_V3::TocEntry {};
break; break;
case structs::PakVersion::Ver4: case structs::PakVersion::Ver4:
toc = structs::PakHeader_V4::TocEntry{}; toc = structs::PakHeader_V4::TocEntry {};
break; break;
case structs::PakVersion::Ver5: case structs::PakVersion::Ver5:
toc = structs::PakHeader_V5::TocEntry{}; toc = structs::PakHeader_V5::TocEntry {};
break; break;
} }
} }
bool HasData() const {
return fileData.has_value();
}
/** /**
* Get the file data. * Get the file data.
*/ */
[[nodiscard]] const DataType& GetData() const { [[nodiscard]] const DataType& GetData() const {
return data; if(!fileData.has_value())
throw std::runtime_error("no file data to get!");
return fileData.value();
}
/// Sets data.
void SetData(DataType&& data) {
this->fileData = std::move(data);
// Update the TOC size.
std::visit([&](auto& entry) {
entry.size = this->fileData.value().GetSize();
},
toc);
}
/// Purge read file data.
void PurgeData() {
this->fileData = std::nullopt;
} }
/** /**
* Get the TOC entry responsible. * Get the TOC entry responsible.
*/ */
template<class T> template <class T>
[[nodiscard]] const T& GetTOCEntry() const { [[nodiscard]] const T& GetTOCEntry() const {
return std::get<T>(toc); return std::get<T>(toc);
} }
void SetData(DataType&& data) {
this->data = std::move(data);
// Update the TOC size.
std::visit([&](auto& entry) {
entry.size = this->data.size();
}, toc);
}
std::uint32_t GetCreationUnixTime() const { std::uint32_t GetCreationUnixTime() const {
std::uint32_t time{}; std::uint32_t time {};
std::visit([&](auto& entry) { std::visit([&](auto& entry) {
time = entry.creationUnixTime; time = entry.creationUnixTime;
}, toc); },
toc);
return time; return time;
} }
std::uint32_t GetOffset() const { std::uint32_t GetOffset() const {
std::uint32_t size{}; std::uint32_t size {};
std::visit([&](auto& entry) { std::visit([&](auto& entry) {
size = entry.offset; size = entry.offset;
}, toc); },
toc);
return size; return size;
} }
std::uint32_t GetSize() const { std::uint32_t GetSize() const {
std::uint32_t size{}; std::uint32_t size {};
std::visit([&](auto& entry) { std::visit([&](auto& entry) {
size = entry.size; size = entry.size;
}, toc); },
toc);
return size; return size;
} }
void FillTOCEntry() { template <class Cb>
std::visit([&](auto& entry) {
entry.size = static_cast<std::uint32_t>(data.size());
}, toc);
}
template<class Cb>
void Visit(const Cb& cb) { void Visit(const Cb& cb) {
std::visit(cb, toc); std::visit(cb, toc);
} }
@ -113,7 +186,7 @@ namespace europa::io {
friend PakReader; friend PakReader;
friend PakWriter; friend PakWriter;
std::vector<std::uint8_t> data; std::optional<PakFileData> fileData;
structs::PakTocEntryVariant toc; structs::PakTocEntryVariant toc;
}; };

View file

@ -3,7 +3,7 @@
// //
// (C) 2021-2022 modeco80 <lily.modeco80@protonmail.ch> // (C) 2021-2022 modeco80 <lily.modeco80@protonmail.ch>
// //
// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-License-Identifier: LGPL-3.0-or-later
// //
#ifndef EUROPA_UTIL_FIXEDSTRING_H #ifndef EUROPA_UTIL_FIXEDSTRING_H

View file

@ -0,0 +1,24 @@
//
// EuropaTools
//
// (C) 2021-2025 modeco80 <lily.modeco80@protonmail.ch>
//
// SPDX-License-Identifier: LGPL-3.0-or-later
//
#ifndef EUROPA_UTIL_OVERLOADED_HPP
#define EUROPA_UTIL_OVERLOADED_HPP
namespace europa {
template <class... Ts>
struct overloaded : Ts... {
using Ts::operator()...;
};
// Suppposedly this isn't needed but the CTAD is required in this case
template <class... Ts>
overloaded(Ts...) -> overloaded<Ts...>;
} // namespace europa
#endif

View file

@ -6,40 +6,41 @@
// SPDX-License-Identifier: LGPL-3.0-or-later // SPDX-License-Identifier: LGPL-3.0-or-later
// //
#include <cstdint>
#include <cstring> #include <cstring>
#include <europa/io/PakReader.hpp> #include <europa/io/PakReader.hpp>
#include <europa/structs/Pak.hpp> #include <europa/structs/Pak.hpp>
#include <stdexcept>
#include "europa/io/PakFile.hpp"
#include "StreamUtils.h" #include "StreamUtils.h"
namespace europa::io { namespace europa::io {
/* /*
inline std::optional<PakHeader> GetPakHeader(const PakHeader_Common& common_header) { inline std::optional<PakHeader> GetPakHeader(const PakHeader_Common& common_header) {
switch(common_header.version) { switch(common_header.version) {
case PakVersion::Ver3: case PakVersion::Ver3:
return PakHeader_V3(common_header); return PakHeader_V3(common_header);
case PakVersion::Ver4: case PakVersion::Ver4:
return PakHeader_V4(common_header); return PakHeader_V4(common_header);
case PakVersion::Ver5: case PakVersion::Ver5:
return PakHeader_V5(common_header); return PakHeader_V5(common_header);
case PakVersion::Invalid: case PakVersion::Invalid:
default: default:
return std::nullopt; return std::nullopt;
}
} }
} */
*/
PakReader::PakReader(std::istream& is) PakReader::PakReader(std::istream& is)
: stream(is) { : stream(is) {
} }
template<class T> template <class T>
void PakReader::ReadData_Impl() { void PakReader::ReadData_Impl() {
auto header_type = impl::ReadStreamType<T>(stream); auto header_type = impl::ReadStreamType<T>(stream);
@ -48,7 +49,7 @@ namespace europa::io {
return; return;
} }
bool isStreams{false}; bool isStreams { false };
if(header_type.tocOffset > 0x17000000) if(header_type.tocOffset > 0x17000000)
isStreams = true; isStreams = true;
@ -67,7 +68,6 @@ namespace europa::io {
files[filename].Visit([&](auto& tocEntry) { files[filename].Visit([&](auto& tocEntry) {
tocEntry.creationUnixTime = impl::ReadStreamType<structs::u32>(stream); tocEntry.creationUnixTime = impl::ReadStreamType<structs::u32>(stream);
}); });
} }
header = header_type; header = header_type;
@ -101,16 +101,22 @@ namespace europa::io {
void PakReader::ReadFile(const std::string& file) { void PakReader::ReadFile(const std::string& file) {
auto& fileObject = files[file]; auto& fileObject = files[file];
std::vector<std::uint8_t> buffer;
buffer.resize(fileObject.GetSize());
// This file was already read in, or has data // This file was already read in, or has data
// the user may not want to overwrite. // the user may not want to overwrite.
if(!fileObject.data.empty()) if(!fileObject.HasData())
return; return;
fileObject.data.resize(fileObject.GetSize());
stream.seekg(fileObject.GetOffset(), std::istream::beg); stream.seekg(fileObject.GetOffset(), std::istream::beg);
stream.read(reinterpret_cast<char*>(&fileObject.data[0]), fileObject.GetSize()); stream.read(reinterpret_cast<char*>(&buffer[0]), buffer.size());
if(!stream)
throw std::runtime_error("Stream went bad while trying to read file");
auto data = PakFileData::InitAsBuffer(std::move(buffer));
fileObject.SetData(std::move(data));
} }
PakReader::MapType& PakReader::GetFiles() { PakReader::MapType& PakReader::GetFiles() {

View file

@ -9,10 +9,13 @@
#include <algorithm> #include <algorithm>
#include <europa/io/PakWriter.hpp> #include <europa/io/PakWriter.hpp>
#include <europa/util/TupleElement.hpp> #include <europa/util/TupleElement.hpp>
#include <filesystem>
#include <fstream>
#include <iostream> #include <iostream>
#include <stdexcept>
#include "StreamUtils.h"
#include "europa/structs/Pak.hpp" #include "europa/structs/Pak.hpp"
#include "StreamUtils.h"
namespace europa::io { namespace europa::io {
@ -26,12 +29,12 @@ namespace europa::io {
// move to a util/ header // move to a util/ header
template<class T> template <class T>
constexpr T AlignBy(T value, std::size_t alignment) { constexpr T AlignBy(T value, std::size_t alignment) {
return (-value) & alignment - 1; return (-value) & alignment - 1;
} }
void PakWriter::Write(std::ostream &os, std::vector<FlattenedType> &&vec, PakProgressReportSink &sink) { void PakWriter::Write(std::ostream& os, std::vector<FlattenedType>&& vec, PakProgressReportSink& sink) {
switch(version) { switch(version) {
case structs::PakVersion::Ver3: case structs::PakVersion::Ver3:
WriteImpl<structs::PakHeader_V3>(os, std::move(vec), sink); WriteImpl<structs::PakHeader_V3>(os, std::move(vec), sink);
@ -42,77 +45,77 @@ namespace europa::io {
case structs::PakVersion::Ver5: case structs::PakVersion::Ver5:
WriteImpl<structs::PakHeader_V3>(os, std::move(vec), sink); WriteImpl<structs::PakHeader_V3>(os, std::move(vec), sink);
break; break;
default:
throw std::invalid_argument("Invalid version");
} }
} }
template<class T> template <class THeader>
void PakWriter::WriteImpl(std::ostream& os, std::vector<FlattenedType>&& vec, PakProgressReportSink& sink, bool sectorAligned) { void PakWriter::WriteImpl(std::ostream& os, std::vector<FlattenedType>&& vec, PakProgressReportSink& sink, bool sectorAligned) {
std::vector<FlattenedType> sortedFiles = std::move(vec); std::vector<FlattenedType> sortedFiles = std::move(vec);
T pakHeader{}; THeader pakHeader {};
// Sort the flattened array by file size, the biggest first. // Sort the flattened array.
// Doesn't seem to help (neither does name length) std::ranges::sort(sortedFiles, std::greater {}, [](const FlattenedType& elem) {
std::ranges::sort(sortedFiles, std::greater{}, [](const FlattenedType& elem) {
return elem.second.GetCreationUnixTime(); return elem.second.GetCreationUnixTime();
}); });
// Leave space for the header // Leave space for the header
os.seekp(sizeof(T), std::ostream::beg); os.seekp(sizeof(THeader), std::ostream::beg);
// Version 5 paks seem to have an additional bit of reserved data // Version 5 paks seem to have an additional bit of reserved data
// (which is all zeros.) // (which is all zeros.)
if(T::VERSION == structs::PakVersion::Ver5) { if(THeader::VERSION == structs::PakVersion::Ver5) {
os.seekp(6, std::ostream::cur); os.seekp(6, std::ostream::cur);
} }
// Align first file to sector boundary. // Align first file to sector boundary.
if(sectorAligned) if(sectorAligned)
os.seekp( os.seekp(
AlignBy(os.tellp(), kCDSectorSize), AlignBy(os.tellp(), kCDSectorSize),
std::istream::beg std::istream::beg);
);
// Write file data // Write all the file data
for(auto& [filename, file] : sortedFiles) { for(auto& [filename, file] : sortedFiles) {
sink.OnEvent({ sink.OnEvent({ PakProgressReportSink::FileEvent::Type::FileBeginWrite,
PakProgressReportSink::FileEvent::Type::FileBeginWrite, filename });
filename
});
// Update the offset to where we currently are, since we will be writing the file there // Update the offset to where we currently are, since we will be writing the file there
file.Visit([&](auto& tocEntry) { file.Visit([&](auto& tocEntry) {
tocEntry.offset = os.tellp(); tocEntry.offset = os.tellp();
}); });
// FIXME: Should we rely on GetSize() when writing? Honestly, it seems like a bit of a auto& fileData = file.GetData();
// mistake that caused a pretty glaring bug.
os.write(reinterpret_cast<const char*>(file.GetData().data()), file.GetSize()); // FIXME: use a visitor or something. For now I'm lazy and this should work
if(auto* path = fileData.template GetIf<std::filesystem::path>(); path) {
auto fs = std::ifstream((*path).string(), std::ifstream::binary);
if(!fs)
throw std::runtime_error("couldnt open input file? HOW");
// tee data from the file stream efficiently
impl::TeeInOut(fs, os);
} else if(auto* buffer = fileData.template GetIf<std::vector<std::uint8_t>>(); buffer) {
os.write(reinterpret_cast<const char*>((*buffer).data()), file.GetSize());
}
// Align to sector boundary. // Align to sector boundary.
if(sectorAligned) if(sectorAligned)
os.seekp( os.seekp(
AlignBy(os.tellp(), kCDSectorSize), AlignBy(os.tellp(), kCDSectorSize),
std::istream::beg std::istream::beg);
);
sink.OnEvent({ sink.OnEvent({ PakProgressReportSink::FileEvent::Type::FileEndWrite,
PakProgressReportSink::FileEvent::Type::FileEndWrite, filename });
filename
});
} }
pakHeader.tocOffset = os.tellp(); pakHeader.tocOffset = os.tellp();
sink.OnEvent({ sink.OnEvent({ PakProgressReportSink::PakEvent::Type::WritingToc });
PakProgressReportSink::PakEvent::Type::WritingToc
});
// Write the TOC // Write the TOC
for(auto& [filename, file] : sortedFiles) { for(auto& [filename, file] : sortedFiles) {
file.FillTOCEntry();
// Write the filename Pascal string. // Write the filename Pascal string.
os.put(static_cast<char>(filename.length() + 1)); os.put(static_cast<char>(filename.length() + 1));
for(const auto c : filename) for(const auto c : filename)
@ -124,20 +127,14 @@ namespace europa::io {
}); });
} }
sink.OnEvent({ PakProgressReportSink::PakEvent::Type::FillInHeader });
sink.OnEvent({
PakProgressReportSink::PakEvent::Type::FillInHeader
});
// Fill out the rest of the header. // Fill out the rest of the header.
pakHeader.fileCount = sortedFiles.size(); pakHeader.fileCount = sortedFiles.size();
pakHeader.tocSize = static_cast<std::uint32_t>(os.tellp()) - (pakHeader.tocOffset - 1); pakHeader.tocSize = static_cast<std::uint32_t>(os.tellp()) - (pakHeader.tocOffset - 1);
pakHeader.creationUnixTime = 132890732; pakHeader.creationUnixTime = 132890732;
sink.OnEvent({ PakProgressReportSink::PakEvent::Type::WritingHeader });
sink.OnEvent({
PakProgressReportSink::PakEvent::Type::WritingHeader
});
// As the last step, write it. // As the last step, write it.
os.seekp(0, std::ostream::beg); os.seekp(0, std::ostream::beg);

View file

@ -7,6 +7,7 @@
// //
#include "StreamUtils.h" #include "StreamUtils.h"
#include <cstdint> #include <cstdint>
namespace europa::io::impl { namespace europa::io::impl {
@ -65,4 +66,15 @@ namespace europa::io::impl {
} }
} }
void TeeInOut(std::istream& is, std::ostream& os) {
std::uint8_t buffer[4096] {};
while(!is.eof()) {
if(!is.read(reinterpret_cast<char*>(&buffer[0]), sizeof(buffer)))
break;
os.write(reinterpret_cast<char*>(&buffer[0]), is.gcount());
}
}
} // namespace europa::io::impl } // namespace europa::io::impl

View file

@ -53,6 +53,9 @@ namespace europa::io::impl {
std::string ReadZeroTerminatedString(std::istream& is); std::string ReadZeroTerminatedString(std::istream& is);
std::string ReadPString(std::istream& is); std::string ReadPString(std::istream& is);
/// Tees a input stream to an output stream until the input stream signals EOF.
void TeeInOut(std::istream& is, std::ostream& os);
} // namespace europa::io::impl } // namespace europa::io::impl
#endif // EUROPA_TOOLS_STREAMUTILS_H #endif // EUROPA_TOOLS_STREAMUTILS_H

View file

@ -14,6 +14,7 @@
#include <iostream> #include <iostream>
#include <tasks/CreateTask.hpp> #include <tasks/CreateTask.hpp>
#include <Utils.hpp> #include <Utils.hpp>
#include "europa/io/PakFile.hpp"
namespace eupak::tasks { namespace eupak::tasks {
@ -125,21 +126,8 @@ namespace eupak::tasks {
progress.set_option(indicators::option::PostfixText { relativePathName + " (" + std::to_string(currFile + 1) + '/' + std::to_string(fileCount) + ")" }); progress.set_option(indicators::option::PostfixText { relativePathName + " (" + std::to_string(currFile + 1) + '/' + std::to_string(fileCount) + ")" });
std::ifstream ifs(ent.path(), std::ifstream::binary);
if(!ifs) {
std::cout << "Error: Couldn't open file for archive path \"" << relativePathName << "\"\n";
return 1;
}
europa::io::PakFile file; europa::io::PakFile file;
europa::io::PakFile::DataType pakData; europa::io::PakFile::DataType pakData = europa::io::PakFileData::InitAsPath(ent.path());
ifs.seekg(0, std::ifstream::end);
pakData.resize(ifs.tellg());
ifs.seekg(0, std::ifstream::beg);
ifs.read(reinterpret_cast<char*>(&pakData[0]), pakData.size());
file.InitAs(args.pakVersion); file.InitAs(args.pakVersion);

View file

@ -6,13 +6,13 @@
// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-License-Identifier: GPL-3.0-or-later
// //
#include <tasks/ExtractTask.hpp>
#include <europa/io/PakReader.hpp> #include <europa/io/PakReader.hpp>
#include <fstream> #include <fstream>
#include <indicators/cursor_control.hpp> #include <indicators/cursor_control.hpp>
#include <indicators/progress_bar.hpp> #include <indicators/progress_bar.hpp>
#include <iostream> #include <iostream>
#include <stdexcept>
#include <tasks/ExtractTask.hpp>
// this actually is pretty fast so maybe I won't bother doing crazy thread optimizations.. // this actually is pretty fast so maybe I won't bother doing crazy thread optimizations..
@ -80,8 +80,19 @@ namespace eupak::tasks {
std::cerr << "Extracting file \"" << filename << "\"...\n"; std::cerr << "Extracting file \"" << filename << "\"...\n";
} }
ofs.write(reinterpret_cast<const char*>(file.GetData().data()), static_cast<std::streampos>(file.GetSize())); {
ofs.flush(); auto& fileData = file.GetData();
if(auto* buffer = fileData.GetIf<std::vector<std::uint8_t>>(); buffer) {
ofs.write(reinterpret_cast<const char*>((*buffer).data()), (*buffer).size());
ofs.flush();
} else {
throw std::runtime_error("???? why are we getting paths here?");
}
}
// We no longer need the file data anymore, so let's purge it to save memory
file.PurgeData();
progress.tick(); progress.tick();
} }
@ -89,4 +100,4 @@ namespace eupak::tasks {
return 0; return 0;
} }
} } // namespace eupak::tasks