Only copy changed tiles

this reduces the average CPU usage of the agent. Pretty cool.
This commit is contained in:
Lily Tsuru 2024-12-02 00:40:45 -05:00
parent 758df315a1
commit 5de6726140
4 changed files with 142 additions and 135 deletions

View file

@ -30,7 +30,11 @@ namespace hazelnut {
virtual bool Initialize() = 0; virtual bool Initialize() = 0;
/// Performs the capture. /// New CaptureFrame() API; now capture is copied directly to output buffer
/// instead of copy->copy.
virtual DisplayCaptureResult CaptureFrameTemp(u32* pOut) = 0;
/// Performs the capture. THIS VERSION IS DEPRECATED AND WILL BE REMOVED
virtual DisplayCaptureResult CaptureFrame() = 0; virtual DisplayCaptureResult CaptureFrame() = 0;
/// Get framebuffer information. /// Get framebuffer information.
@ -43,6 +47,7 @@ namespace hazelnut {
enum class DisplayCaptureInterface : u32 { enum class DisplayCaptureInterface : u32 {
Invalid, Invalid,
NVFBC, // NVFBC capture NVFBC, // NVFBC capture
// FIXME: DXGI support // FIXME: DXGI support
}; };

View file

@ -71,9 +71,14 @@ namespace hazelnut {
unique_buffer<u32> convertedFramebuffer; unique_buffer<u32> convertedFramebuffer;
u8* pDiffMap; u8* pDiffMap;
u32 diffmapWidth; u32 diffmapWidth;
u32 diffmapHeight; u32 diffmapHeight;
// cached
u32 tileWidth;
u32 tileHeight;
public: public:
virtual ~NVFBCDisplayCapture() { Shutdown(); } virtual ~NVFBCDisplayCapture() { Shutdown(); }
@ -164,6 +169,104 @@ namespace hazelnut {
return true; return true;
} }
void PaintWithTileOptimization(u32* pBufferData) {
bool copiedTiles = false;
for(u32 dy = 0; dy < diffmapHeight; ++dy) {
for(u32 dx = 0; dx < diffmapWidth; ++dx) {
auto& bl = pDiffMap[dy * diffmapWidth + dx];
if(bl != 0) {
copiedTiles = true;
#if 0
x * (framebuffer.width / diff.diffmapWidth), // x
y * (framebuffer.height / diff.diffMapHeight), // y
framebuffer.width / diff.diffmapWidth, // width
framebuffer.height / diff.diffMapHeight // height
#endif
u32 xOffset = dx * tileWidth;
u32 yOffset = dy * tileHeight;
for(u32 y = 0; y < tileHeight; ++y) {
memcpy(&pBufferData[((y + yOffset) * width + xOffset)],
&pRawFramebuffer[((y + yOffset) * grabInfo.dwBufferWidth + xOffset)], tileWidth * 4);
}
// printf("copied %ux%u @ %ux%u tile\n", tileWidth, tileHeight, xOffset, yOffset);
}
}
}
}
void PaintFull(u32* pBufferData) {
for(u32 y = 0; y < height; ++y) {
// Convert to BGRA
// FIXME: Make this SIMD. I can't into this very well
#if 0
usize srcStart = (y * grabInfo.dwBufferWidth) * 4;
usize dstStart = (y * width) * 4;
for(u32 x = 0; x < grabInfo.dwWidth * 4; x += 4) {
pBufferData[(dstStart + x) + 0] = pSrcData[(srcStart + x) + 2]; // B
pBufferData[(dstStart + x) + 1] = pSrcData[(srcStart + x) + 1]; // G
pBufferData[(dstStart + x) + 2] = pSrcData[(srcStart + x) + 0]; // R
pBufferData[(dstStart + x) + 3] = 0xff; // A
}
#endif
memcpy(&pBufferData[(y * width)], &pRawFramebuffer[(y * grabInfo.dwBufferWidth)], width * 4);
}
}
DisplayCaptureResult CaptureFrameTemp(u32* pOut) override {
auto nvStatus = nvfbc->NvFBCToSysGrabFrame(&this->fbcSysGrabParams);
switch(nvStatus) {
case NVFBC_SUCCESS: break;
// Need to recreate the session. If it fails then we fail too.
case NVFBC_ERROR_INVALIDATED_SESSION: {
if(!NvfbcInitSession())
return DisplayCaptureResult::Fail;
// Recurse. This looks naughty, but will allow us to directly retry
// the capture. (Plus if this causes issues whatever has happened is probably beyond saving)
return CaptureFrame();
} break;
default: return DisplayCaptureResult::Fail;
}
auto result = DisplayCaptureResult::Ok;
bool useTilePainting = true;
if(width != grabInfo.dwWidth || height != grabInfo.dwHeight) {
width = grabInfo.dwWidth;
height = grabInfo.dwHeight;
// update diffmap stuff
diffmapWidth = DiffMapDimension(width, blockSize);
diffmapHeight = DiffMapDimension(height, blockSize);
tileWidth = width / diffmapWidth;
tileHeight = height / diffmapHeight;
result = DisplayCaptureResult::OkButResized;
// Disable tile-painting optimization for the first frame
useTilePainting = false;
}
if(useTilePainting) [[likely]] {
PaintWithTileOptimization(&pOut[0]);
} else {
PaintFull(&pOut[0]);
}
return result;
}
DisplayCaptureResult CaptureFrame() override { DisplayCaptureResult CaptureFrame() override {
auto nvStatus = nvfbc->NvFBCToSysGrabFrame(&this->fbcSysGrabParams); auto nvStatus = nvfbc->NvFBCToSysGrabFrame(&this->fbcSysGrabParams);
@ -225,8 +328,6 @@ namespace hazelnut {
// diffmapWidth = (u32)ceil((f32)width / 32); // diffmapWidth = (u32)ceil((f32)width / 32);
// diffmapHeight = (u32)ceil((f32)height / 32); // diffmapHeight = (u32)ceil((f32)height / 32);
diffmapWidth = DiffMapDimension(width, blockSize);
diffmapHeight = DiffMapDimension(height, blockSize);
return DiffInformation { pDiffMap, diffmapWidth, diffmapHeight }; return DiffInformation { pDiffMap, diffmapWidth, diffmapHeight };
} }
}; };

View file

@ -6,25 +6,13 @@
#include <windows.h> #include <windows.h>
#include <stdio.h> #include <stdio.h>
#include <vector>
#include "capture.hpp" #include "capture.hpp"
#include "Utils.hpp" #include "Utils.hpp"
// clang-format on // clang-format on
#include "atomic_spinlock.hpp"
#include "ivshmem_protocol.hpp" #include "ivshmem_protocol.hpp"
struct tileRect {
u32 x, y, width, height;
};
struct Test {
hazelnut::AtomicSpinlock lk {};
std::atomic<u32> sessionId {};
std::atomic<u32> pingPong {};
};
int main(int argc, char** argv) { int main(int argc, char** argv) {
hazelnut::IvshmemDevice dev; hazelnut::IvshmemDevice dev;
@ -61,142 +49,53 @@ int main(int argc, char** argv) {
printf("Successfully created a framebuffer capture interface\n"); printf("Successfully created a framebuffer capture interface\n");
bool firstFrame = true; bool firstFrame = true;
std::vector<tileRect> tiles {};
hazelnut::FramebufferInformation framebuffer {}; hazelnut::FramebufferInformation framebuffer {};
hazelnut::DiffInformation diff {}; hazelnut::DiffInformation diff {};
while(true) { while(true) {
auto result = capture->CaptureFrame(); bool changed = false;
{
auto guard = pHeader->lock.lock();
auto result = capture->CaptureFrameTemp(pFrameHeader->bits());
if(result == hazelnut::DisplayCaptureResult::Ok) { if(result == hazelnut::DisplayCaptureResult::Ok) {
tiles.clear();
#if 0
if(firstFrame == false) { if(firstFrame == false) {
for(u32 y = 0; y < diff.diffMapHeight; ++y) { for(u32 y = 0; y < diff.diffMapHeight; ++y) {
for(u32 x = 0; x < diff.diffmapWidth; ++x) { for(u32 x = 0; x < diff.diffmapWidth; ++x) {
auto& bl = diff.pDiffMap[y * diff.diffmapWidth + x]; auto& bl = diff.pDiffMap[y * diff.diffmapWidth + x];
if(bl != 0) { if(bl != 0) {
tiles.push_back(tileRect { changed = true;
x * (framebuffer.width / diff.diffmapWidth), // x
y * (framebuffer.height / diff.diffMapHeight), // y
framebuffer.width / diff.diffmapWidth, // width
framebuffer.height / diff.diffMapHeight // height
});
} }
} }
} }
if(tiles.empty()) if(!changed)
continue; continue;
} }
#endif } else if(result == hazelnut::DisplayCaptureResult::OkButResized) {
// We resized. Notify of that
framebuffer = capture->GetFramebufferInformation();
diff = capture->GetDiffInformation();
firstFrame = true;
{ } else {
auto guard = pHeader->lock.lock(); printf("Failed to capture\n");
break;
}
pFrameHeader->serial.fetch_add(1); pFrameHeader->serial.fetch_add(1);
pFrameHeader->width.store(framebuffer.width); pFrameHeader->width.store(framebuffer.width);
pFrameHeader->height.store(framebuffer.height); pFrameHeader->height.store(framebuffer.height);
if(framebuffer.pFramebuffer == nullptr) // memcpy(pFrameHeader->bits(), &framebuffer.pFramebuffer[0], (framebuffer.width * framebuffer.height) * 4);
continue;
memcpy(pFrameHeader->bits(), &framebuffer.pFramebuffer[0], (framebuffer.width * framebuffer.height) * 4);
// printf("FRAME SERIAL %u loaded\n", pFrameHeader->serial.load()); // printf("FRAME SERIAL %u loaded\n", pFrameHeader->serial.load());
}
if(firstFrame) if(firstFrame)
firstFrame = false; firstFrame = false;
} else if(result == hazelnut::DisplayCaptureResult::OkButResized) {
// We resized. Notify of that
framebuffer = capture->GetFramebufferInformation();
diff = capture->GetDiffInformation();
firstFrame = true;
} else {
printf("Failed to capture\n");
break;
} }
} }
#if 0
while(true) {
// lock
{
auto guard = pHeader->lk.lock();
Sleep(5);
}
printf("pingpong %u\n", pHeader->pingPong.load());
if(tries++ == curTries) {
tries = 0;
pHeader->pingPong.fetch_add(1);
}
}
#endif
return 0;
#if 0
printf("Hazelnut agent\n");
// Create a capture interface
auto capture = hazelnut::CreateDisplayCapture(hazelnut::GuessBestCaptureInterface());
if(!capture) {
printf("Failed to create a capture interface\n");
return 1;
}
printf("Successfully created a framebuffer capture interface\n");
bool firstFrame = true;
std::vector<tileRect> tiles {};
hazelnut::FramebufferInformation framebuffer {};
hazelnut::DiffInformation diff {};
while(true) {
auto result = capture->CaptureFrame();
if(result == hazelnut::DisplayCaptureResult::Ok) {
tiles.clear();
if(firstFrame == false) {
for(u32 y = 0; y < diff.diffMapHeight; ++y) {
for(u32 x = 0; x < diff.diffmapWidth; ++x) {
auto& bl = diff.pDiffMap[y * diff.diffmapWidth + x];
if(bl != 0) {
tiles.push_back(tileRect {
x * (framebuffer.width / diff.diffmapWidth), // x
y * (framebuffer.height / diff.diffMapHeight), // y
framebuffer.width / diff.diffmapWidth, // width
framebuffer.height / diff.diffMapHeight // height
});
}
}
}
if(tiles.empty())
continue;
}
if(firstFrame)
firstFrame = false;
} else if(result == hazelnut::DisplayCaptureResult::OkButResized) {
// We resized. Notify of that
framebuffer = capture->GetFramebufferInformation();
diff = capture->GetDiffInformation();
firstFrame = true;
} else {
printf("Failed to capture\n");
break;
}
}
#endif
return 0; return 0;
} }

View file

@ -30,8 +30,10 @@ namespace hazelnut {
void lock_manually() { void lock_manually() {
u32 expected = 0; u32 expected = 0;
while(!__lock.compare_exchange_strong(expected, 1, std::memory_order::seq_cst)) { while(!__lock.compare_exchange_strong(expected, 1, std::memory_order::seq_cst)) {
//printf("LOCK CONTENDED\n");
expected = 0; expected = 0;
#ifdef _WIN32
//Sleep(1);
#endif
} }
} }