*: Remove map flattening copy

Memory size decrease and possible huge performance improvement! Yay!
This commit is contained in:
Lily Tsuru 2022-09-22 19:15:06 -05:00
parent cf9d84cb24
commit e698d1da3b
3 changed files with 15 additions and 61 deletions

View file

@ -13,7 +13,7 @@
#include <europa/io/PakProgressReportSink.hpp>
#include <iosfwd>
#include <string>
#include <unordered_map>
#include <utility>
namespace europa::io {
@ -21,21 +21,19 @@ namespace europa::io {
* Writer for package files.
*/
struct PakWriter {
using FlattenedType = std::pair<std::string, PakFile>;
void Init(structs::PakHeader::Version version);
// TODO: accessor for header
// use flattened vector format anyhow (less allocs, higher perf)
std::unordered_map<std::string, PakFile>& GetFiles();
const structs::PakHeader& GetHeader() const { return pakHeader; }
/**
* Write the resulting archive to the given output stream.
*/
void Write(std::ostream& os, PakProgressReportSink& sink);
void Write(std::ostream& os, std::vector<FlattenedType>&& vec, PakProgressReportSink& sink);
private:
structs::PakHeader pakHeader {};
std::unordered_map<std::string, PakFile> archiveFiles;
};
} // namespace europa::io

View file

@ -20,10 +20,6 @@ namespace europa::io {
pakHeader.Init(version);
}
std::unordered_map<std::string, PakFile>& PakWriter::GetFiles() {
return archiveFiles;
}
// move to a util/ header
template<class T>
@ -31,53 +27,17 @@ namespace europa::io {
return (-value) & alignment - 1;
}
/**
* Class functor for flattening a map.
*/
template<class Map>
struct MapFlatten {
/**
* Storage type to store one key -> value pair.
*/
using FlattenedType = std::pair<typename Map::key_type, typename Map::mapped_type>;
using ArrayType = std::vector<FlattenedType>;
constexpr explicit MapFlatten(Map& mapToFlatten)
: map(mapToFlatten) {
}
ArrayType operator()() const {
ArrayType arr;
arr.reserve(map.size());
for(auto& [ key, value ] : map)
arr.emplace_back(std::make_pair(key, value));
return arr;
}
private:
Map& map;
};
// TODO:
// - Composable operations (WriteTOC, WriteFile, WriteHeader)
// - Add IProgressReportSink reporting
void PakWriter::Write(std::ostream& os, PakProgressReportSink& sink) {
void PakWriter::Write(std::ostream& os, std::vector<FlattenedType>&& vec, PakProgressReportSink& sink) {
// This essentially converts our map we use for faster insertion
// into a flat array we can sort easily.
//
// NB: this copies by value, so during this function we use 2x the ram.
// doesn't seem to be a big problem though.
auto sortedFiles = MapFlatten{archiveFiles}();
std::vector<FlattenedType> sortedFiles = std::move(vec);
// Sort the flattened array by file size, the biggest first.
// Doesn't seem to help (neither does name length)
std::ranges::sort(sortedFiles, std::greater{}, [](const decltype(MapFlatten{archiveFiles})::FlattenedType& elem) {
return std::get<1>(elem).GetTOCEntry().size;
std::ranges::sort(sortedFiles, std::greater{}, [](const FlattenedType& elem) {
return elem.second.GetTOCEntry().size;
});
// Leave space for the header
@ -91,9 +51,6 @@ namespace europa::io {
// Write file data
for(auto& [filename, file] : sortedFiles) {
//std::cout << "PakWriteFile \"" << filename << "\"\n Size " << file.GetTOCEntry().size << "\n";
sink.OnEvent({
PakProgressReportSink::FileEvent::Type::FileBeginWrite,
filename
@ -105,7 +62,6 @@ namespace europa::io {
// Flush on file writing
os.flush();
sink.OnEvent({
PakProgressReportSink::FileEvent::Type::FileEndWrite,
filename
@ -114,7 +70,6 @@ namespace europa::io {
pakHeader.tocOffset = os.tellp();
sink.OnEvent({
PakProgressReportSink::PakEvent::Type::WritingToc
});
@ -139,7 +94,7 @@ namespace europa::io {
// Fill out the rest of the header.
pakHeader.fileCount = archiveFiles.size();
pakHeader.fileCount = sortedFiles.size();
pakHeader.tocSize = static_cast<std::uint32_t>(os.tellp()) - (pakHeader.tocOffset - 1);

View file

@ -108,6 +108,9 @@ namespace eupak::tasks {
// TODO: use time to write in the header
// also: is there any point to verbosity? could add archive written size ig
std::vector<europa::io::PakWriter::FlattenedType> files;
files.reserve(fileCount);
for(auto& ent : fs::recursive_directory_iterator(args.inputDirectory)) {
if(ent.is_directory())
continue;
@ -144,8 +147,7 @@ namespace eupak::tasks {
file.GetTOCEntry().creationUnixTime = static_cast<std::uint32_t>(lastModified.time_since_epoch().count());
writer.GetFiles()[relativePathName] = std::move(file);
files.emplace_back(std::make_pair(relativePathName, std::move(file)));
progress.tick();
currFile++;
}
@ -159,10 +161,9 @@ namespace eupak::tasks {
return 1;
}
CreateArchiveReportSink sink(fileCount);
writer.Write(ofs, sink);
writer.Write(ofs, std::move(files), sink);
return 0;
}