From 5060bc4fb604423f31312de4988aad28f4a57e88 Mon Sep 17 00:00:00 2001 From: modeco80 Date: Mon, 6 Jan 2025 17:12:58 -0500 Subject: [PATCH] libeuropa/io: Rewrite PakFile to use a sumtype to store pak file data This allows pak writer file data to cleanly come from one of two possible sources: - A file on the filesystem (in which the PakWriter will open the file, tee it into the package file efficently, and then close it) - A data buffer (which functions like before) PakReader however will always output data buffers. --- include/europa/io/PakFile.hpp | 133 ++++++++++++++++++++------ include/europa/util/FixedString.hpp | 2 +- include/europa/util/Overloaded.hpp | 24 +++++ src/libeuropa/io/PakReader.cpp | 56 ++++++----- src/libeuropa/io/PakWriter.cpp | 87 ++++++++--------- src/libeuropa/io/StreamUtils.cpp | 12 +++ src/libeuropa/io/StreamUtils.h | 3 + src/tools/eupak/tasks/CreateTask.cpp | 16 +--- src/tools/eupak/tasks/ExtractTask.cpp | 21 +++- 9 files changed, 234 insertions(+), 120 deletions(-) create mode 100644 include/europa/util/Overloaded.hpp diff --git a/include/europa/io/PakFile.hpp b/include/europa/io/PakFile.hpp index 96b9d60..80efddd 100644 --- a/include/europa/io/PakFile.hpp +++ b/include/europa/io/PakFile.hpp @@ -11,6 +11,11 @@ #include #include +#include +#include +#include +#include +#include #include namespace europa::io { @@ -18,14 +23,72 @@ namespace europa::io { struct PakReader; struct PakWriter; + /// sumtype + struct PakFileData { + // clang-format off + using Variant = std::variant< + // File data + std::vector, + + // Path + std::filesystem::path + >; + // clang-format on + + static PakFileData InitAsBuffer(std::vector&& buffer) { + return PakFileData { + .variant_ = Variant(std::move(buffer)) + }; + } + + static PakFileData InitAsPath(const std::filesystem::path& path) { + return PakFileData { + .variant_ = Variant(path) + }; + } + + std::uint32_t GetSize() const { + // FIXME: make this just a overloaded lambda + struct SizeVisitor { + std::uint32_t& size; + + // bleh + void operator()(std::vector& buffer) { + size = static_cast(buffer.size()); + } + + void operator()(std::filesystem::path& fsPath) { + if(!std::filesystem::exists(fsPath) && !std::filesystem::is_regular_file(fsPath)) + throw std::runtime_error("invalid path in path file"); + size = static_cast(std::filesystem::file_size(fsPath)); + } + }; + + std::uint32_t size {}; + auto visitor = SizeVisitor { size }; + + std::visit(visitor, variant_); + + return size; + } + + template + const T* GetIf() const { + return std::get_if(&variant_); + } + + // private: + PakFileData::Variant variant_; + }; + /// Repressents a package file. /// FIXME: Maybe make this not hold a buffer at some point, /// or a sumtype which can contain either buffer OR path to os file /// (which we can then efficiently tee into) struct PakFile { - using DataType = std::vector; + using DataType = PakFileData; - template + template void InitAs(const T& value) { toc = value; } @@ -33,78 +96,88 @@ namespace europa::io { void InitAs(structs::PakVersion version) { switch(version) { case structs::PakVersion::Ver3: - toc = structs::PakHeader_V3::TocEntry{}; + toc = structs::PakHeader_V3::TocEntry {}; break; case structs::PakVersion::Ver4: - toc = structs::PakHeader_V4::TocEntry{}; + toc = structs::PakHeader_V4::TocEntry {}; break; case structs::PakVersion::Ver5: - toc = structs::PakHeader_V5::TocEntry{}; + toc = structs::PakHeader_V5::TocEntry {}; break; } } + bool HasData() const { + return fileData.has_value(); + } + /** * Get the file data. */ [[nodiscard]] const DataType& GetData() const { - return data; + if(!fileData.has_value()) + throw std::runtime_error("no file data to get!"); + return fileData.value(); + } + + /// Sets data. + void SetData(DataType&& data) { + this->fileData = std::move(data); + + // Update the TOC size. + std::visit([&](auto& entry) { + entry.size = this->fileData.value().GetSize(); + }, + toc); + } + + /// Purge read file data. + void PurgeData() { + this->fileData = std::nullopt; } /** * Get the TOC entry responsible. */ - template + template [[nodiscard]] const T& GetTOCEntry() const { return std::get(toc); } - void SetData(DataType&& data) { - this->data = std::move(data); - - // Update the TOC size. - std::visit([&](auto& entry) { - entry.size = this->data.size(); - }, toc); - } - std::uint32_t GetCreationUnixTime() const { - std::uint32_t time{}; + std::uint32_t time {}; std::visit([&](auto& entry) { time = entry.creationUnixTime; - }, toc); + }, + toc); return time; } std::uint32_t GetOffset() const { - std::uint32_t size{}; + std::uint32_t size {}; std::visit([&](auto& entry) { size = entry.offset; - }, toc); + }, + toc); return size; } std::uint32_t GetSize() const { - std::uint32_t size{}; + std::uint32_t size {}; std::visit([&](auto& entry) { size = entry.size; - }, toc); + }, + toc); return size; } - void FillTOCEntry() { - std::visit([&](auto& entry) { - entry.size = static_cast(data.size()); - }, toc); - } - - template + template void Visit(const Cb& cb) { std::visit(cb, toc); } @@ -113,7 +186,7 @@ namespace europa::io { friend PakReader; friend PakWriter; - std::vector data; + std::optional fileData; structs::PakTocEntryVariant toc; }; diff --git a/include/europa/util/FixedString.hpp b/include/europa/util/FixedString.hpp index e2158c3..d40c4eb 100644 --- a/include/europa/util/FixedString.hpp +++ b/include/europa/util/FixedString.hpp @@ -3,7 +3,7 @@ // // (C) 2021-2022 modeco80 // -// SPDX-License-Identifier: GPL-3.0-or-later +// SPDX-License-Identifier: LGPL-3.0-or-later // #ifndef EUROPA_UTIL_FIXEDSTRING_H diff --git a/include/europa/util/Overloaded.hpp b/include/europa/util/Overloaded.hpp new file mode 100644 index 0000000..21e78e5 --- /dev/null +++ b/include/europa/util/Overloaded.hpp @@ -0,0 +1,24 @@ +// +// EuropaTools +// +// (C) 2021-2025 modeco80 +// +// SPDX-License-Identifier: LGPL-3.0-or-later +// + +#ifndef EUROPA_UTIL_OVERLOADED_HPP +#define EUROPA_UTIL_OVERLOADED_HPP + +namespace europa { + template + struct overloaded : Ts... { + using Ts::operator()...; + }; + + // Suppposedly this isn't needed but the CTAD is required in this case + + template + overloaded(Ts...) -> overloaded; +} // namespace europa + +#endif \ No newline at end of file diff --git a/src/libeuropa/io/PakReader.cpp b/src/libeuropa/io/PakReader.cpp index 9dea982..329654b 100644 --- a/src/libeuropa/io/PakReader.cpp +++ b/src/libeuropa/io/PakReader.cpp @@ -6,40 +6,41 @@ // SPDX-License-Identifier: LGPL-3.0-or-later // +#include #include #include #include +#include +#include "europa/io/PakFile.hpp" #include "StreamUtils.h" namespace europa::io { -/* - inline std::optional GetPakHeader(const PakHeader_Common& common_header) { - switch(common_header.version) { - case PakVersion::Ver3: - return PakHeader_V3(common_header); + /* + inline std::optional GetPakHeader(const PakHeader_Common& common_header) { + switch(common_header.version) { + case PakVersion::Ver3: + return PakHeader_V3(common_header); - case PakVersion::Ver4: - return PakHeader_V4(common_header); + case PakVersion::Ver4: + return PakHeader_V4(common_header); - case PakVersion::Ver5: - return PakHeader_V5(common_header); + case PakVersion::Ver5: + return PakHeader_V5(common_header); - case PakVersion::Invalid: - default: - return std::nullopt; + case PakVersion::Invalid: + default: + return std::nullopt; + } } - } - */ - - + */ PakReader::PakReader(std::istream& is) : stream(is) { } - template + template void PakReader::ReadData_Impl() { auto header_type = impl::ReadStreamType(stream); @@ -47,12 +48,12 @@ namespace europa::io { invalid = true; return; } - - bool isStreams{false}; + + bool isStreams { false }; if(header_type.tocOffset > 0x17000000) isStreams = true; - + // Read the archive TOC stream.seekg(header_type.tocOffset, std::istream::beg); for(auto i = 0; i < header_type.fileCount; ++i) { @@ -67,7 +68,6 @@ namespace europa::io { files[filename].Visit([&](auto& tocEntry) { tocEntry.creationUnixTime = impl::ReadStreamType(stream); }); - } header = header_type; @@ -101,16 +101,22 @@ namespace europa::io { void PakReader::ReadFile(const std::string& file) { auto& fileObject = files[file]; + std::vector buffer; + + buffer.resize(fileObject.GetSize()); // This file was already read in, or has data // the user may not want to overwrite. - if(!fileObject.data.empty()) + if(!fileObject.HasData()) return; - fileObject.data.resize(fileObject.GetSize()); - stream.seekg(fileObject.GetOffset(), std::istream::beg); - stream.read(reinterpret_cast(&fileObject.data[0]), fileObject.GetSize()); + stream.read(reinterpret_cast(&buffer[0]), buffer.size()); + if(!stream) + throw std::runtime_error("Stream went bad while trying to read file"); + + auto data = PakFileData::InitAsBuffer(std::move(buffer)); + fileObject.SetData(std::move(data)); } PakReader::MapType& PakReader::GetFiles() { diff --git a/src/libeuropa/io/PakWriter.cpp b/src/libeuropa/io/PakWriter.cpp index a596ff2..63fe302 100644 --- a/src/libeuropa/io/PakWriter.cpp +++ b/src/libeuropa/io/PakWriter.cpp @@ -9,10 +9,13 @@ #include #include #include +#include +#include #include +#include -#include "StreamUtils.h" #include "europa/structs/Pak.hpp" +#include "StreamUtils.h" namespace europa::io { @@ -26,12 +29,12 @@ namespace europa::io { // move to a util/ header - template + template constexpr T AlignBy(T value, std::size_t alignment) { return (-value) & alignment - 1; } - void PakWriter::Write(std::ostream &os, std::vector &&vec, PakProgressReportSink &sink) { + void PakWriter::Write(std::ostream& os, std::vector&& vec, PakProgressReportSink& sink) { switch(version) { case structs::PakVersion::Ver3: WriteImpl(os, std::move(vec), sink); @@ -42,77 +45,77 @@ namespace europa::io { case structs::PakVersion::Ver5: WriteImpl(os, std::move(vec), sink); break; + default: + throw std::invalid_argument("Invalid version"); } } - template - void PakWriter::WriteImpl(std::ostream& os, std::vector&& vec, PakProgressReportSink& sink, bool sectorAligned) { - + template + void PakWriter::WriteImpl(std::ostream& os, std::vector&& vec, PakProgressReportSink& sink, bool sectorAligned) { std::vector sortedFiles = std::move(vec); - T pakHeader{}; + THeader pakHeader {}; - // Sort the flattened array by file size, the biggest first. - // Doesn't seem to help (neither does name length) - std::ranges::sort(sortedFiles, std::greater{}, [](const FlattenedType& elem) { + // Sort the flattened array. + std::ranges::sort(sortedFiles, std::greater {}, [](const FlattenedType& elem) { return elem.second.GetCreationUnixTime(); }); // Leave space for the header - os.seekp(sizeof(T), std::ostream::beg); + os.seekp(sizeof(THeader), std::ostream::beg); // Version 5 paks seem to have an additional bit of reserved data // (which is all zeros.) - if(T::VERSION == structs::PakVersion::Ver5) { + if(THeader::VERSION == structs::PakVersion::Ver5) { os.seekp(6, std::ostream::cur); } // Align first file to sector boundary. if(sectorAligned) - os.seekp( - AlignBy(os.tellp(), kCDSectorSize), - std::istream::beg - ); + os.seekp( + AlignBy(os.tellp(), kCDSectorSize), + std::istream::beg); - // Write file data + // Write all the file data for(auto& [filename, file] : sortedFiles) { - sink.OnEvent({ - PakProgressReportSink::FileEvent::Type::FileBeginWrite, - filename - }); + sink.OnEvent({ PakProgressReportSink::FileEvent::Type::FileBeginWrite, + filename }); // Update the offset to where we currently are, since we will be writing the file there file.Visit([&](auto& tocEntry) { tocEntry.offset = os.tellp(); }); - // FIXME: Should we rely on GetSize() when writing? Honestly, it seems like a bit of a - // mistake that caused a pretty glaring bug. - os.write(reinterpret_cast(file.GetData().data()), file.GetSize()); + auto& fileData = file.GetData(); + + // FIXME: use a visitor or something. For now I'm lazy and this should work + if(auto* path = fileData.template GetIf(); path) { + auto fs = std::ifstream((*path).string(), std::ifstream::binary); + if(!fs) + throw std::runtime_error("couldnt open input file? HOW"); + + // tee data from the file stream efficiently + impl::TeeInOut(fs, os); + } else if(auto* buffer = fileData.template GetIf>(); buffer) { + os.write(reinterpret_cast((*buffer).data()), file.GetSize()); + } // Align to sector boundary. if(sectorAligned) - os.seekp( - AlignBy(os.tellp(), kCDSectorSize), - std::istream::beg - ); + os.seekp( + AlignBy(os.tellp(), kCDSectorSize), + std::istream::beg); - sink.OnEvent({ - PakProgressReportSink::FileEvent::Type::FileEndWrite, - filename - }); + sink.OnEvent({ PakProgressReportSink::FileEvent::Type::FileEndWrite, + filename }); } pakHeader.tocOffset = os.tellp(); - sink.OnEvent({ - PakProgressReportSink::PakEvent::Type::WritingToc - }); + sink.OnEvent({ PakProgressReportSink::PakEvent::Type::WritingToc }); // Write the TOC for(auto& [filename, file] : sortedFiles) { - file.FillTOCEntry(); - // Write the filename Pascal string. os.put(static_cast(filename.length() + 1)); for(const auto c : filename) @@ -124,20 +127,14 @@ namespace europa::io { }); } - - sink.OnEvent({ - PakProgressReportSink::PakEvent::Type::FillInHeader - }); + sink.OnEvent({ PakProgressReportSink::PakEvent::Type::FillInHeader }); // Fill out the rest of the header. pakHeader.fileCount = sortedFiles.size(); pakHeader.tocSize = static_cast(os.tellp()) - (pakHeader.tocOffset - 1); pakHeader.creationUnixTime = 132890732; - - sink.OnEvent({ - PakProgressReportSink::PakEvent::Type::WritingHeader - }); + sink.OnEvent({ PakProgressReportSink::PakEvent::Type::WritingHeader }); // As the last step, write it. os.seekp(0, std::ostream::beg); diff --git a/src/libeuropa/io/StreamUtils.cpp b/src/libeuropa/io/StreamUtils.cpp index 9b20b01..b42dec4 100644 --- a/src/libeuropa/io/StreamUtils.cpp +++ b/src/libeuropa/io/StreamUtils.cpp @@ -7,6 +7,7 @@ // #include "StreamUtils.h" + #include namespace europa::io::impl { @@ -65,4 +66,15 @@ namespace europa::io::impl { } } + void TeeInOut(std::istream& is, std::ostream& os) { + std::uint8_t buffer[4096] {}; + + while(!is.eof()) { + if(!is.read(reinterpret_cast(&buffer[0]), sizeof(buffer))) + break; + + os.write(reinterpret_cast(&buffer[0]), is.gcount()); + } + } + } // namespace europa::io::impl diff --git a/src/libeuropa/io/StreamUtils.h b/src/libeuropa/io/StreamUtils.h index a64357d..44ac9e2 100644 --- a/src/libeuropa/io/StreamUtils.h +++ b/src/libeuropa/io/StreamUtils.h @@ -53,6 +53,9 @@ namespace europa::io::impl { std::string ReadZeroTerminatedString(std::istream& is); std::string ReadPString(std::istream& is); + /// Tees a input stream to an output stream until the input stream signals EOF. + void TeeInOut(std::istream& is, std::ostream& os); + } // namespace europa::io::impl #endif // EUROPA_TOOLS_STREAMUTILS_H diff --git a/src/tools/eupak/tasks/CreateTask.cpp b/src/tools/eupak/tasks/CreateTask.cpp index f631940..a9145dc 100644 --- a/src/tools/eupak/tasks/CreateTask.cpp +++ b/src/tools/eupak/tasks/CreateTask.cpp @@ -14,6 +14,7 @@ #include #include #include +#include "europa/io/PakFile.hpp" namespace eupak::tasks { @@ -125,21 +126,8 @@ namespace eupak::tasks { progress.set_option(indicators::option::PostfixText { relativePathName + " (" + std::to_string(currFile + 1) + '/' + std::to_string(fileCount) + ")" }); - std::ifstream ifs(ent.path(), std::ifstream::binary); - - if(!ifs) { - std::cout << "Error: Couldn't open file for archive path \"" << relativePathName << "\"\n"; - return 1; - } - europa::io::PakFile file; - europa::io::PakFile::DataType pakData; - - ifs.seekg(0, std::ifstream::end); - pakData.resize(ifs.tellg()); - ifs.seekg(0, std::ifstream::beg); - - ifs.read(reinterpret_cast(&pakData[0]), pakData.size()); + europa::io::PakFile::DataType pakData = europa::io::PakFileData::InitAsPath(ent.path()); file.InitAs(args.pakVersion); diff --git a/src/tools/eupak/tasks/ExtractTask.cpp b/src/tools/eupak/tasks/ExtractTask.cpp index 9bf94b9..a815fce 100644 --- a/src/tools/eupak/tasks/ExtractTask.cpp +++ b/src/tools/eupak/tasks/ExtractTask.cpp @@ -6,13 +6,13 @@ // SPDX-License-Identifier: GPL-3.0-or-later // -#include - #include #include #include #include #include +#include +#include // this actually is pretty fast so maybe I won't bother doing crazy thread optimizations.. @@ -80,8 +80,19 @@ namespace eupak::tasks { std::cerr << "Extracting file \"" << filename << "\"...\n"; } - ofs.write(reinterpret_cast(file.GetData().data()), static_cast(file.GetSize())); - ofs.flush(); + { + auto& fileData = file.GetData(); + if(auto* buffer = fileData.GetIf>(); buffer) { + ofs.write(reinterpret_cast((*buffer).data()), (*buffer).size()); + ofs.flush(); + } else { + throw std::runtime_error("???? why are we getting paths here?"); + } + } + + // We no longer need the file data anymore, so let's purge it to save memory + file.PurgeData(); + progress.tick(); } @@ -89,4 +100,4 @@ namespace eupak::tasks { return 0; } -} \ No newline at end of file +} // namespace eupak::tasks \ No newline at end of file