*: Remove map flattening copy
Memory size decrease and possible huge performance improvement! Yay!
This commit is contained in:
parent
cf9d84cb24
commit
e698d1da3b
3 changed files with 15 additions and 61 deletions
|
@ -13,7 +13,7 @@
|
|||
#include <europa/io/PakProgressReportSink.hpp>
|
||||
#include <iosfwd>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
namespace europa::io {
|
||||
|
||||
|
@ -21,21 +21,19 @@ namespace europa::io {
|
|||
* Writer for package files.
|
||||
*/
|
||||
struct PakWriter {
|
||||
using FlattenedType = std::pair<std::string, PakFile>;
|
||||
|
||||
void Init(structs::PakHeader::Version version);
|
||||
|
||||
// TODO: accessor for header
|
||||
// use flattened vector format anyhow (less allocs, higher perf)
|
||||
|
||||
std::unordered_map<std::string, PakFile>& GetFiles();
|
||||
const structs::PakHeader& GetHeader() const { return pakHeader; }
|
||||
|
||||
/**
|
||||
* Write the resulting archive to the given output stream.
|
||||
*/
|
||||
void Write(std::ostream& os, PakProgressReportSink& sink);
|
||||
void Write(std::ostream& os, std::vector<FlattenedType>&& vec, PakProgressReportSink& sink);
|
||||
|
||||
private:
|
||||
structs::PakHeader pakHeader {};
|
||||
std::unordered_map<std::string, PakFile> archiveFiles;
|
||||
};
|
||||
|
||||
} // namespace europa::io
|
||||
|
|
|
@ -20,10 +20,6 @@ namespace europa::io {
|
|||
pakHeader.Init(version);
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, PakFile>& PakWriter::GetFiles() {
|
||||
return archiveFiles;
|
||||
}
|
||||
|
||||
// move to a util/ header
|
||||
|
||||
template<class T>
|
||||
|
@ -31,53 +27,17 @@ namespace europa::io {
|
|||
return (-value) & alignment - 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Class functor for flattening a map.
|
||||
*/
|
||||
template<class Map>
|
||||
struct MapFlatten {
|
||||
/**
|
||||
* Storage type to store one key -> value pair.
|
||||
*/
|
||||
using FlattenedType = std::pair<typename Map::key_type, typename Map::mapped_type>;
|
||||
using ArrayType = std::vector<FlattenedType>;
|
||||
|
||||
constexpr explicit MapFlatten(Map& mapToFlatten)
|
||||
: map(mapToFlatten) {
|
||||
|
||||
}
|
||||
|
||||
ArrayType operator()() const {
|
||||
ArrayType arr;
|
||||
arr.reserve(map.size());
|
||||
|
||||
for(auto& [ key, value ] : map)
|
||||
arr.emplace_back(std::make_pair(key, value));
|
||||
|
||||
return arr;
|
||||
}
|
||||
|
||||
private:
|
||||
Map& map;
|
||||
};
|
||||
|
||||
// TODO:
|
||||
// - Composable operations (WriteTOC, WriteFile, WriteHeader)
|
||||
// - Add IProgressReportSink reporting
|
||||
|
||||
void PakWriter::Write(std::ostream& os, PakProgressReportSink& sink) {
|
||||
void PakWriter::Write(std::ostream& os, std::vector<FlattenedType>&& vec, PakProgressReportSink& sink) {
|
||||
|
||||
// This essentially converts our map we use for faster insertion
|
||||
// into a flat array we can sort easily.
|
||||
//
|
||||
// NB: this copies by value, so during this function we use 2x the ram.
|
||||
// doesn't seem to be a big problem though.
|
||||
auto sortedFiles = MapFlatten{archiveFiles}();
|
||||
std::vector<FlattenedType> sortedFiles = std::move(vec);
|
||||
|
||||
// Sort the flattened array by file size, the biggest first.
|
||||
// Doesn't seem to help (neither does name length)
|
||||
std::ranges::sort(sortedFiles, std::greater{}, [](const decltype(MapFlatten{archiveFiles})::FlattenedType& elem) {
|
||||
return std::get<1>(elem).GetTOCEntry().size;
|
||||
std::ranges::sort(sortedFiles, std::greater{}, [](const FlattenedType& elem) {
|
||||
return elem.second.GetTOCEntry().size;
|
||||
});
|
||||
|
||||
// Leave space for the header
|
||||
|
@ -91,9 +51,6 @@ namespace europa::io {
|
|||
|
||||
// Write file data
|
||||
for(auto& [filename, file] : sortedFiles) {
|
||||
//std::cout << "PakWriteFile \"" << filename << "\"\n Size " << file.GetTOCEntry().size << "\n";
|
||||
|
||||
|
||||
sink.OnEvent({
|
||||
PakProgressReportSink::FileEvent::Type::FileBeginWrite,
|
||||
filename
|
||||
|
@ -105,7 +62,6 @@ namespace europa::io {
|
|||
// Flush on file writing
|
||||
os.flush();
|
||||
|
||||
|
||||
sink.OnEvent({
|
||||
PakProgressReportSink::FileEvent::Type::FileEndWrite,
|
||||
filename
|
||||
|
@ -114,7 +70,6 @@ namespace europa::io {
|
|||
|
||||
pakHeader.tocOffset = os.tellp();
|
||||
|
||||
|
||||
sink.OnEvent({
|
||||
PakProgressReportSink::PakEvent::Type::WritingToc
|
||||
});
|
||||
|
@ -139,7 +94,7 @@ namespace europa::io {
|
|||
|
||||
|
||||
// Fill out the rest of the header.
|
||||
pakHeader.fileCount = archiveFiles.size();
|
||||
pakHeader.fileCount = sortedFiles.size();
|
||||
pakHeader.tocSize = static_cast<std::uint32_t>(os.tellp()) - (pakHeader.tocOffset - 1);
|
||||
|
||||
|
||||
|
|
|
@ -108,6 +108,9 @@ namespace eupak::tasks {
|
|||
// TODO: use time to write in the header
|
||||
// also: is there any point to verbosity? could add archive written size ig
|
||||
|
||||
std::vector<europa::io::PakWriter::FlattenedType> files;
|
||||
files.reserve(fileCount);
|
||||
|
||||
for(auto& ent : fs::recursive_directory_iterator(args.inputDirectory)) {
|
||||
if(ent.is_directory())
|
||||
continue;
|
||||
|
@ -144,8 +147,7 @@ namespace eupak::tasks {
|
|||
|
||||
file.GetTOCEntry().creationUnixTime = static_cast<std::uint32_t>(lastModified.time_since_epoch().count());
|
||||
|
||||
writer.GetFiles()[relativePathName] = std::move(file);
|
||||
|
||||
files.emplace_back(std::make_pair(relativePathName, std::move(file)));
|
||||
progress.tick();
|
||||
currFile++;
|
||||
}
|
||||
|
@ -159,10 +161,9 @@ namespace eupak::tasks {
|
|||
return 1;
|
||||
}
|
||||
|
||||
|
||||
CreateArchiveReportSink sink(fileCount);
|
||||
|
||||
writer.Write(ofs, sink);
|
||||
writer.Write(ofs, std::move(files), sink);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue