libeuropa/io: Rewrite PakFile to use a sumtype to store pak file data

This allows pak writer file data to cleanly come from one of two possible sources:

- A file on the filesystem (in which the PakWriter will open the file, tee it into the package file efficently, and then close it)
- A data buffer (which functions like before)

PakReader however will always output data buffers.
This commit is contained in:
Lily Tsuru 2025-01-06 17:12:58 -05:00
parent 788fcd9677
commit 5060bc4fb6
9 changed files with 234 additions and 120 deletions

View file

@ -11,6 +11,11 @@
#include <cstdint>
#include <europa/structs/Pak.hpp>
#include <europa/util/Overloaded.hpp>
#include <filesystem>
#include <stdexcept>
#include <type_traits>
#include <variant>
#include <vector>
namespace europa::io {
@ -18,14 +23,72 @@ namespace europa::io {
struct PakReader;
struct PakWriter;
/// sumtype
struct PakFileData {
// clang-format off
using Variant = std::variant<
// File data
std::vector<std::uint8_t>,
// Path
std::filesystem::path
>;
// clang-format on
static PakFileData InitAsBuffer(std::vector<std::uint8_t>&& buffer) {
return PakFileData {
.variant_ = Variant(std::move(buffer))
};
}
static PakFileData InitAsPath(const std::filesystem::path& path) {
return PakFileData {
.variant_ = Variant(path)
};
}
std::uint32_t GetSize() const {
// FIXME: make this just a overloaded lambda
struct SizeVisitor {
std::uint32_t& size;
// bleh
void operator()(std::vector<uint8_t>& buffer) {
size = static_cast<std::uint32_t>(buffer.size());
}
void operator()(std::filesystem::path& fsPath) {
if(!std::filesystem::exists(fsPath) && !std::filesystem::is_regular_file(fsPath))
throw std::runtime_error("invalid path in path file");
size = static_cast<std::uint32_t>(std::filesystem::file_size(fsPath));
}
};
std::uint32_t size {};
auto visitor = SizeVisitor { size };
std::visit(visitor, variant_);
return size;
}
template <class T>
const T* GetIf() const {
return std::get_if<T>(&variant_);
}
// private:
PakFileData::Variant variant_;
};
/// Repressents a package file.
/// FIXME: Maybe make this not hold a buffer at some point,
/// or a sumtype which can contain either buffer OR path to os file
/// (which we can then efficiently tee into)
struct PakFile {
using DataType = std::vector<std::uint8_t>;
using DataType = PakFileData;
template<class T>
template <class T>
void InitAs(const T& value) {
toc = value;
}
@ -33,78 +96,88 @@ namespace europa::io {
void InitAs(structs::PakVersion version) {
switch(version) {
case structs::PakVersion::Ver3:
toc = structs::PakHeader_V3::TocEntry{};
toc = structs::PakHeader_V3::TocEntry {};
break;
case structs::PakVersion::Ver4:
toc = structs::PakHeader_V4::TocEntry{};
toc = structs::PakHeader_V4::TocEntry {};
break;
case structs::PakVersion::Ver5:
toc = structs::PakHeader_V5::TocEntry{};
toc = structs::PakHeader_V5::TocEntry {};
break;
}
}
bool HasData() const {
return fileData.has_value();
}
/**
* Get the file data.
*/
[[nodiscard]] const DataType& GetData() const {
return data;
if(!fileData.has_value())
throw std::runtime_error("no file data to get!");
return fileData.value();
}
/// Sets data.
void SetData(DataType&& data) {
this->fileData = std::move(data);
// Update the TOC size.
std::visit([&](auto& entry) {
entry.size = this->fileData.value().GetSize();
},
toc);
}
/// Purge read file data.
void PurgeData() {
this->fileData = std::nullopt;
}
/**
* Get the TOC entry responsible.
*/
template<class T>
template <class T>
[[nodiscard]] const T& GetTOCEntry() const {
return std::get<T>(toc);
}
void SetData(DataType&& data) {
this->data = std::move(data);
// Update the TOC size.
std::visit([&](auto& entry) {
entry.size = this->data.size();
}, toc);
}
std::uint32_t GetCreationUnixTime() const {
std::uint32_t time{};
std::uint32_t time {};
std::visit([&](auto& entry) {
time = entry.creationUnixTime;
}, toc);
},
toc);
return time;
}
std::uint32_t GetOffset() const {
std::uint32_t size{};
std::uint32_t size {};
std::visit([&](auto& entry) {
size = entry.offset;
}, toc);
},
toc);
return size;
}
std::uint32_t GetSize() const {
std::uint32_t size{};
std::uint32_t size {};
std::visit([&](auto& entry) {
size = entry.size;
}, toc);
},
toc);
return size;
}
void FillTOCEntry() {
std::visit([&](auto& entry) {
entry.size = static_cast<std::uint32_t>(data.size());
}, toc);
}
template<class Cb>
template <class Cb>
void Visit(const Cb& cb) {
std::visit(cb, toc);
}
@ -113,7 +186,7 @@ namespace europa::io {
friend PakReader;
friend PakWriter;
std::vector<std::uint8_t> data;
std::optional<PakFileData> fileData;
structs::PakTocEntryVariant toc;
};

View file

@ -3,7 +3,7 @@
//
// (C) 2021-2022 modeco80 <lily.modeco80@protonmail.ch>
//
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-License-Identifier: LGPL-3.0-or-later
//
#ifndef EUROPA_UTIL_FIXEDSTRING_H

View file

@ -0,0 +1,24 @@
//
// EuropaTools
//
// (C) 2021-2025 modeco80 <lily.modeco80@protonmail.ch>
//
// SPDX-License-Identifier: LGPL-3.0-or-later
//
#ifndef EUROPA_UTIL_OVERLOADED_HPP
#define EUROPA_UTIL_OVERLOADED_HPP
namespace europa {
template <class... Ts>
struct overloaded : Ts... {
using Ts::operator()...;
};
// Suppposedly this isn't needed but the CTAD is required in this case
template <class... Ts>
overloaded(Ts...) -> overloaded<Ts...>;
} // namespace europa
#endif

View file

@ -6,40 +6,41 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
//
#include <cstdint>
#include <cstring>
#include <europa/io/PakReader.hpp>
#include <europa/structs/Pak.hpp>
#include <stdexcept>
#include "europa/io/PakFile.hpp"
#include "StreamUtils.h"
namespace europa::io {
/*
inline std::optional<PakHeader> GetPakHeader(const PakHeader_Common& common_header) {
switch(common_header.version) {
case PakVersion::Ver3:
return PakHeader_V3(common_header);
/*
inline std::optional<PakHeader> GetPakHeader(const PakHeader_Common& common_header) {
switch(common_header.version) {
case PakVersion::Ver3:
return PakHeader_V3(common_header);
case PakVersion::Ver4:
return PakHeader_V4(common_header);
case PakVersion::Ver4:
return PakHeader_V4(common_header);
case PakVersion::Ver5:
return PakHeader_V5(common_header);
case PakVersion::Ver5:
return PakHeader_V5(common_header);
case PakVersion::Invalid:
default:
return std::nullopt;
case PakVersion::Invalid:
default:
return std::nullopt;
}
}
}
*/
*/
PakReader::PakReader(std::istream& is)
: stream(is) {
}
template<class T>
template <class T>
void PakReader::ReadData_Impl() {
auto header_type = impl::ReadStreamType<T>(stream);
@ -47,12 +48,12 @@ namespace europa::io {
invalid = true;
return;
}
bool isStreams{false};
bool isStreams { false };
if(header_type.tocOffset > 0x17000000)
isStreams = true;
// Read the archive TOC
stream.seekg(header_type.tocOffset, std::istream::beg);
for(auto i = 0; i < header_type.fileCount; ++i) {
@ -67,7 +68,6 @@ namespace europa::io {
files[filename].Visit([&](auto& tocEntry) {
tocEntry.creationUnixTime = impl::ReadStreamType<structs::u32>(stream);
});
}
header = header_type;
@ -101,16 +101,22 @@ namespace europa::io {
void PakReader::ReadFile(const std::string& file) {
auto& fileObject = files[file];
std::vector<std::uint8_t> buffer;
buffer.resize(fileObject.GetSize());
// This file was already read in, or has data
// the user may not want to overwrite.
if(!fileObject.data.empty())
if(!fileObject.HasData())
return;
fileObject.data.resize(fileObject.GetSize());
stream.seekg(fileObject.GetOffset(), std::istream::beg);
stream.read(reinterpret_cast<char*>(&fileObject.data[0]), fileObject.GetSize());
stream.read(reinterpret_cast<char*>(&buffer[0]), buffer.size());
if(!stream)
throw std::runtime_error("Stream went bad while trying to read file");
auto data = PakFileData::InitAsBuffer(std::move(buffer));
fileObject.SetData(std::move(data));
}
PakReader::MapType& PakReader::GetFiles() {

View file

@ -9,10 +9,13 @@
#include <algorithm>
#include <europa/io/PakWriter.hpp>
#include <europa/util/TupleElement.hpp>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <stdexcept>
#include "StreamUtils.h"
#include "europa/structs/Pak.hpp"
#include "StreamUtils.h"
namespace europa::io {
@ -26,12 +29,12 @@ namespace europa::io {
// move to a util/ header
template<class T>
template <class T>
constexpr T AlignBy(T value, std::size_t alignment) {
return (-value) & alignment - 1;
}
void PakWriter::Write(std::ostream &os, std::vector<FlattenedType> &&vec, PakProgressReportSink &sink) {
void PakWriter::Write(std::ostream& os, std::vector<FlattenedType>&& vec, PakProgressReportSink& sink) {
switch(version) {
case structs::PakVersion::Ver3:
WriteImpl<structs::PakHeader_V3>(os, std::move(vec), sink);
@ -42,77 +45,77 @@ namespace europa::io {
case structs::PakVersion::Ver5:
WriteImpl<structs::PakHeader_V3>(os, std::move(vec), sink);
break;
default:
throw std::invalid_argument("Invalid version");
}
}
template<class T>
void PakWriter::WriteImpl(std::ostream& os, std::vector<FlattenedType>&& vec, PakProgressReportSink& sink, bool sectorAligned) {
template <class THeader>
void PakWriter::WriteImpl(std::ostream& os, std::vector<FlattenedType>&& vec, PakProgressReportSink& sink, bool sectorAligned) {
std::vector<FlattenedType> sortedFiles = std::move(vec);
T pakHeader{};
THeader pakHeader {};
// Sort the flattened array by file size, the biggest first.
// Doesn't seem to help (neither does name length)
std::ranges::sort(sortedFiles, std::greater{}, [](const FlattenedType& elem) {
// Sort the flattened array.
std::ranges::sort(sortedFiles, std::greater {}, [](const FlattenedType& elem) {
return elem.second.GetCreationUnixTime();
});
// Leave space for the header
os.seekp(sizeof(T), std::ostream::beg);
os.seekp(sizeof(THeader), std::ostream::beg);
// Version 5 paks seem to have an additional bit of reserved data
// (which is all zeros.)
if(T::VERSION == structs::PakVersion::Ver5) {
if(THeader::VERSION == structs::PakVersion::Ver5) {
os.seekp(6, std::ostream::cur);
}
// Align first file to sector boundary.
if(sectorAligned)
os.seekp(
AlignBy(os.tellp(), kCDSectorSize),
std::istream::beg
);
os.seekp(
AlignBy(os.tellp(), kCDSectorSize),
std::istream::beg);
// Write file data
// Write all the file data
for(auto& [filename, file] : sortedFiles) {
sink.OnEvent({
PakProgressReportSink::FileEvent::Type::FileBeginWrite,
filename
});
sink.OnEvent({ PakProgressReportSink::FileEvent::Type::FileBeginWrite,
filename });
// Update the offset to where we currently are, since we will be writing the file there
file.Visit([&](auto& tocEntry) {
tocEntry.offset = os.tellp();
});
// FIXME: Should we rely on GetSize() when writing? Honestly, it seems like a bit of a
// mistake that caused a pretty glaring bug.
os.write(reinterpret_cast<const char*>(file.GetData().data()), file.GetSize());
auto& fileData = file.GetData();
// FIXME: use a visitor or something. For now I'm lazy and this should work
if(auto* path = fileData.template GetIf<std::filesystem::path>(); path) {
auto fs = std::ifstream((*path).string(), std::ifstream::binary);
if(!fs)
throw std::runtime_error("couldnt open input file? HOW");
// tee data from the file stream efficiently
impl::TeeInOut(fs, os);
} else if(auto* buffer = fileData.template GetIf<std::vector<std::uint8_t>>(); buffer) {
os.write(reinterpret_cast<const char*>((*buffer).data()), file.GetSize());
}
// Align to sector boundary.
if(sectorAligned)
os.seekp(
AlignBy(os.tellp(), kCDSectorSize),
std::istream::beg
);
os.seekp(
AlignBy(os.tellp(), kCDSectorSize),
std::istream::beg);
sink.OnEvent({
PakProgressReportSink::FileEvent::Type::FileEndWrite,
filename
});
sink.OnEvent({ PakProgressReportSink::FileEvent::Type::FileEndWrite,
filename });
}
pakHeader.tocOffset = os.tellp();
sink.OnEvent({
PakProgressReportSink::PakEvent::Type::WritingToc
});
sink.OnEvent({ PakProgressReportSink::PakEvent::Type::WritingToc });
// Write the TOC
for(auto& [filename, file] : sortedFiles) {
file.FillTOCEntry();
// Write the filename Pascal string.
os.put(static_cast<char>(filename.length() + 1));
for(const auto c : filename)
@ -124,20 +127,14 @@ namespace europa::io {
});
}
sink.OnEvent({
PakProgressReportSink::PakEvent::Type::FillInHeader
});
sink.OnEvent({ PakProgressReportSink::PakEvent::Type::FillInHeader });
// Fill out the rest of the header.
pakHeader.fileCount = sortedFiles.size();
pakHeader.tocSize = static_cast<std::uint32_t>(os.tellp()) - (pakHeader.tocOffset - 1);
pakHeader.creationUnixTime = 132890732;
sink.OnEvent({
PakProgressReportSink::PakEvent::Type::WritingHeader
});
sink.OnEvent({ PakProgressReportSink::PakEvent::Type::WritingHeader });
// As the last step, write it.
os.seekp(0, std::ostream::beg);

View file

@ -7,6 +7,7 @@
//
#include "StreamUtils.h"
#include <cstdint>
namespace europa::io::impl {
@ -65,4 +66,15 @@ namespace europa::io::impl {
}
}
void TeeInOut(std::istream& is, std::ostream& os) {
std::uint8_t buffer[4096] {};
while(!is.eof()) {
if(!is.read(reinterpret_cast<char*>(&buffer[0]), sizeof(buffer)))
break;
os.write(reinterpret_cast<char*>(&buffer[0]), is.gcount());
}
}
} // namespace europa::io::impl

View file

@ -53,6 +53,9 @@ namespace europa::io::impl {
std::string ReadZeroTerminatedString(std::istream& is);
std::string ReadPString(std::istream& is);
/// Tees a input stream to an output stream until the input stream signals EOF.
void TeeInOut(std::istream& is, std::ostream& os);
} // namespace europa::io::impl
#endif // EUROPA_TOOLS_STREAMUTILS_H

View file

@ -14,6 +14,7 @@
#include <iostream>
#include <tasks/CreateTask.hpp>
#include <Utils.hpp>
#include "europa/io/PakFile.hpp"
namespace eupak::tasks {
@ -125,21 +126,8 @@ namespace eupak::tasks {
progress.set_option(indicators::option::PostfixText { relativePathName + " (" + std::to_string(currFile + 1) + '/' + std::to_string(fileCount) + ")" });
std::ifstream ifs(ent.path(), std::ifstream::binary);
if(!ifs) {
std::cout << "Error: Couldn't open file for archive path \"" << relativePathName << "\"\n";
return 1;
}
europa::io::PakFile file;
europa::io::PakFile::DataType pakData;
ifs.seekg(0, std::ifstream::end);
pakData.resize(ifs.tellg());
ifs.seekg(0, std::ifstream::beg);
ifs.read(reinterpret_cast<char*>(&pakData[0]), pakData.size());
europa::io::PakFile::DataType pakData = europa::io::PakFileData::InitAsPath(ent.path());
file.InitAs(args.pakVersion);

View file

@ -6,13 +6,13 @@
// SPDX-License-Identifier: GPL-3.0-or-later
//
#include <tasks/ExtractTask.hpp>
#include <europa/io/PakReader.hpp>
#include <fstream>
#include <indicators/cursor_control.hpp>
#include <indicators/progress_bar.hpp>
#include <iostream>
#include <stdexcept>
#include <tasks/ExtractTask.hpp>
// this actually is pretty fast so maybe I won't bother doing crazy thread optimizations..
@ -80,8 +80,19 @@ namespace eupak::tasks {
std::cerr << "Extracting file \"" << filename << "\"...\n";
}
ofs.write(reinterpret_cast<const char*>(file.GetData().data()), static_cast<std::streampos>(file.GetSize()));
ofs.flush();
{
auto& fileData = file.GetData();
if(auto* buffer = fileData.GetIf<std::vector<std::uint8_t>>(); buffer) {
ofs.write(reinterpret_cast<const char*>((*buffer).data()), (*buffer).size());
ofs.flush();
} else {
throw std::runtime_error("???? why are we getting paths here?");
}
}
// We no longer need the file data anymore, so let's purge it to save memory
file.PurgeData();
progress.tick();
}
@ -89,4 +100,4 @@ namespace eupak::tasks {
return 0;
}
}
} // namespace eupak::tasks