Only copy changed tiles
this reduces the average CPU usage of the agent. Pretty cool.
This commit is contained in:
parent
758df315a1
commit
5de6726140
4 changed files with 142 additions and 135 deletions
|
@ -30,7 +30,11 @@ namespace hazelnut {
|
|||
|
||||
virtual bool Initialize() = 0;
|
||||
|
||||
/// Performs the capture.
|
||||
/// New CaptureFrame() API; now capture is copied directly to output buffer
|
||||
/// instead of copy->copy.
|
||||
virtual DisplayCaptureResult CaptureFrameTemp(u32* pOut) = 0;
|
||||
|
||||
/// Performs the capture. THIS VERSION IS DEPRECATED AND WILL BE REMOVED
|
||||
virtual DisplayCaptureResult CaptureFrame() = 0;
|
||||
|
||||
/// Get framebuffer information.
|
||||
|
@ -43,6 +47,7 @@ namespace hazelnut {
|
|||
enum class DisplayCaptureInterface : u32 {
|
||||
Invalid,
|
||||
NVFBC, // NVFBC capture
|
||||
|
||||
// FIXME: DXGI support
|
||||
};
|
||||
|
||||
|
|
|
@ -71,9 +71,14 @@ namespace hazelnut {
|
|||
unique_buffer<u32> convertedFramebuffer;
|
||||
|
||||
u8* pDiffMap;
|
||||
|
||||
u32 diffmapWidth;
|
||||
u32 diffmapHeight;
|
||||
|
||||
// cached
|
||||
u32 tileWidth;
|
||||
u32 tileHeight;
|
||||
|
||||
public:
|
||||
virtual ~NVFBCDisplayCapture() { Shutdown(); }
|
||||
|
||||
|
@ -164,6 +169,104 @@ namespace hazelnut {
|
|||
return true;
|
||||
}
|
||||
|
||||
void PaintWithTileOptimization(u32* pBufferData) {
|
||||
bool copiedTiles = false;
|
||||
|
||||
for(u32 dy = 0; dy < diffmapHeight; ++dy) {
|
||||
for(u32 dx = 0; dx < diffmapWidth; ++dx) {
|
||||
auto& bl = pDiffMap[dy * diffmapWidth + dx];
|
||||
|
||||
if(bl != 0) {
|
||||
copiedTiles = true;
|
||||
|
||||
#if 0
|
||||
x * (framebuffer.width / diff.diffmapWidth), // x
|
||||
y * (framebuffer.height / diff.diffMapHeight), // y
|
||||
framebuffer.width / diff.diffmapWidth, // width
|
||||
framebuffer.height / diff.diffMapHeight // height
|
||||
#endif
|
||||
|
||||
u32 xOffset = dx * tileWidth;
|
||||
u32 yOffset = dy * tileHeight;
|
||||
|
||||
for(u32 y = 0; y < tileHeight; ++y) {
|
||||
memcpy(&pBufferData[((y + yOffset) * width + xOffset)],
|
||||
&pRawFramebuffer[((y + yOffset) * grabInfo.dwBufferWidth + xOffset)], tileWidth * 4);
|
||||
}
|
||||
|
||||
// printf("copied %ux%u @ %ux%u tile\n", tileWidth, tileHeight, xOffset, yOffset);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PaintFull(u32* pBufferData) {
|
||||
for(u32 y = 0; y < height; ++y) {
|
||||
// Convert to BGRA
|
||||
// FIXME: Make this SIMD. I can't into this very well
|
||||
#if 0
|
||||
usize srcStart = (y * grabInfo.dwBufferWidth) * 4;
|
||||
usize dstStart = (y * width) * 4;
|
||||
for(u32 x = 0; x < grabInfo.dwWidth * 4; x += 4) {
|
||||
pBufferData[(dstStart + x) + 0] = pSrcData[(srcStart + x) + 2]; // B
|
||||
pBufferData[(dstStart + x) + 1] = pSrcData[(srcStart + x) + 1]; // G
|
||||
pBufferData[(dstStart + x) + 2] = pSrcData[(srcStart + x) + 0]; // R
|
||||
pBufferData[(dstStart + x) + 3] = 0xff; // A
|
||||
}
|
||||
#endif
|
||||
|
||||
memcpy(&pBufferData[(y * width)], &pRawFramebuffer[(y * grabInfo.dwBufferWidth)], width * 4);
|
||||
}
|
||||
}
|
||||
|
||||
DisplayCaptureResult CaptureFrameTemp(u32* pOut) override {
|
||||
auto nvStatus = nvfbc->NvFBCToSysGrabFrame(&this->fbcSysGrabParams);
|
||||
|
||||
switch(nvStatus) {
|
||||
case NVFBC_SUCCESS: break;
|
||||
|
||||
// Need to recreate the session. If it fails then we fail too.
|
||||
case NVFBC_ERROR_INVALIDATED_SESSION: {
|
||||
if(!NvfbcInitSession())
|
||||
return DisplayCaptureResult::Fail;
|
||||
|
||||
// Recurse. This looks naughty, but will allow us to directly retry
|
||||
// the capture. (Plus if this causes issues whatever has happened is probably beyond saving)
|
||||
return CaptureFrame();
|
||||
} break;
|
||||
|
||||
default: return DisplayCaptureResult::Fail;
|
||||
}
|
||||
|
||||
auto result = DisplayCaptureResult::Ok;
|
||||
bool useTilePainting = true;
|
||||
|
||||
if(width != grabInfo.dwWidth || height != grabInfo.dwHeight) {
|
||||
width = grabInfo.dwWidth;
|
||||
height = grabInfo.dwHeight;
|
||||
|
||||
// update diffmap stuff
|
||||
diffmapWidth = DiffMapDimension(width, blockSize);
|
||||
diffmapHeight = DiffMapDimension(height, blockSize);
|
||||
|
||||
tileWidth = width / diffmapWidth;
|
||||
tileHeight = height / diffmapHeight;
|
||||
|
||||
result = DisplayCaptureResult::OkButResized;
|
||||
|
||||
// Disable tile-painting optimization for the first frame
|
||||
useTilePainting = false;
|
||||
}
|
||||
|
||||
if(useTilePainting) [[likely]] {
|
||||
PaintWithTileOptimization(&pOut[0]);
|
||||
} else {
|
||||
PaintFull(&pOut[0]);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
DisplayCaptureResult CaptureFrame() override {
|
||||
auto nvStatus = nvfbc->NvFBCToSysGrabFrame(&this->fbcSysGrabParams);
|
||||
|
||||
|
@ -225,8 +328,6 @@ namespace hazelnut {
|
|||
// diffmapWidth = (u32)ceil((f32)width / 32);
|
||||
// diffmapHeight = (u32)ceil((f32)height / 32);
|
||||
|
||||
diffmapWidth = DiffMapDimension(width, blockSize);
|
||||
diffmapHeight = DiffMapDimension(height, blockSize);
|
||||
return DiffInformation { pDiffMap, diffmapWidth, diffmapHeight };
|
||||
}
|
||||
};
|
||||
|
|
|
@ -6,25 +6,13 @@
|
|||
|
||||
#include <windows.h>
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
#include "capture.hpp"
|
||||
#include "Utils.hpp"
|
||||
// clang-format on
|
||||
|
||||
#include "atomic_spinlock.hpp"
|
||||
#include "ivshmem_protocol.hpp"
|
||||
|
||||
struct tileRect {
|
||||
u32 x, y, width, height;
|
||||
};
|
||||
|
||||
struct Test {
|
||||
hazelnut::AtomicSpinlock lk {};
|
||||
std::atomic<u32> sessionId {};
|
||||
std::atomic<u32> pingPong {};
|
||||
};
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
hazelnut::IvshmemDevice dev;
|
||||
|
||||
|
@ -61,142 +49,53 @@ int main(int argc, char** argv) {
|
|||
printf("Successfully created a framebuffer capture interface\n");
|
||||
|
||||
bool firstFrame = true;
|
||||
std::vector<tileRect> tiles {};
|
||||
hazelnut::FramebufferInformation framebuffer {};
|
||||
hazelnut::DiffInformation diff {};
|
||||
|
||||
while(true) {
|
||||
auto result = capture->CaptureFrame();
|
||||
if(result == hazelnut::DisplayCaptureResult::Ok) {
|
||||
tiles.clear();
|
||||
#if 0
|
||||
if(firstFrame == false) {
|
||||
for(u32 y = 0; y < diff.diffMapHeight; ++y) {
|
||||
for(u32 x = 0; x < diff.diffmapWidth; ++x) {
|
||||
auto& bl = diff.pDiffMap[y * diff.diffmapWidth + x];
|
||||
if(bl != 0) {
|
||||
tiles.push_back(tileRect {
|
||||
x * (framebuffer.width / diff.diffmapWidth), // x
|
||||
y * (framebuffer.height / diff.diffMapHeight), // y
|
||||
framebuffer.width / diff.diffmapWidth, // width
|
||||
framebuffer.height / diff.diffMapHeight // height
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(tiles.empty())
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
{
|
||||
auto guard = pHeader->lock.lock();
|
||||
|
||||
pFrameHeader->serial.fetch_add(1);
|
||||
pFrameHeader->width.store(framebuffer.width);
|
||||
pFrameHeader->height.store(framebuffer.height);
|
||||
|
||||
if(framebuffer.pFramebuffer == nullptr)
|
||||
continue;
|
||||
|
||||
memcpy(pFrameHeader->bits(), &framebuffer.pFramebuffer[0], (framebuffer.width * framebuffer.height) * 4);
|
||||
|
||||
//printf("FRAME SERIAL %u loaded\n", pFrameHeader->serial.load());
|
||||
}
|
||||
|
||||
if(firstFrame)
|
||||
firstFrame = false;
|
||||
} else if(result == hazelnut::DisplayCaptureResult::OkButResized) {
|
||||
// We resized. Notify of that
|
||||
framebuffer = capture->GetFramebufferInformation();
|
||||
diff = capture->GetDiffInformation();
|
||||
firstFrame = true;
|
||||
|
||||
} else {
|
||||
printf("Failed to capture\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
while(true) {
|
||||
// lock
|
||||
bool changed = false;
|
||||
{
|
||||
auto guard = pHeader->lk.lock();
|
||||
Sleep(5);
|
||||
}
|
||||
auto guard = pHeader->lock.lock();
|
||||
|
||||
auto result = capture->CaptureFrameTemp(pFrameHeader->bits());
|
||||
|
||||
printf("pingpong %u\n", pHeader->pingPong.load());
|
||||
|
||||
if(tries++ == curTries) {
|
||||
tries = 0;
|
||||
pHeader->pingPong.fetch_add(1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
|
||||
#if 0
|
||||
printf("Hazelnut agent\n");
|
||||
|
||||
// Create a capture interface
|
||||
auto capture = hazelnut::CreateDisplayCapture(hazelnut::GuessBestCaptureInterface());
|
||||
if(!capture) {
|
||||
printf("Failed to create a capture interface\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("Successfully created a framebuffer capture interface\n");
|
||||
|
||||
|
||||
bool firstFrame = true;
|
||||
std::vector<tileRect> tiles {};
|
||||
hazelnut::FramebufferInformation framebuffer {};
|
||||
hazelnut::DiffInformation diff {};
|
||||
|
||||
while(true) {
|
||||
auto result = capture->CaptureFrame();
|
||||
if(result == hazelnut::DisplayCaptureResult::Ok) {
|
||||
tiles.clear();
|
||||
|
||||
if(firstFrame == false) {
|
||||
for(u32 y = 0; y < diff.diffMapHeight; ++y) {
|
||||
for(u32 x = 0; x < diff.diffmapWidth; ++x) {
|
||||
auto& bl = diff.pDiffMap[y * diff.diffmapWidth + x];
|
||||
if(bl != 0) {
|
||||
tiles.push_back(tileRect {
|
||||
x * (framebuffer.width / diff.diffmapWidth), // x
|
||||
y * (framebuffer.height / diff.diffMapHeight), // y
|
||||
framebuffer.width / diff.diffmapWidth, // width
|
||||
framebuffer.height / diff.diffMapHeight // height
|
||||
});
|
||||
if(result == hazelnut::DisplayCaptureResult::Ok) {
|
||||
if(firstFrame == false) {
|
||||
for(u32 y = 0; y < diff.diffMapHeight; ++y) {
|
||||
for(u32 x = 0; x < diff.diffmapWidth; ++x) {
|
||||
auto& bl = diff.pDiffMap[y * diff.diffmapWidth + x];
|
||||
if(bl != 0) {
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(tiles.empty())
|
||||
continue;
|
||||
if(!changed)
|
||||
continue;
|
||||
}
|
||||
} else if(result == hazelnut::DisplayCaptureResult::OkButResized) {
|
||||
// We resized. Notify of that
|
||||
framebuffer = capture->GetFramebufferInformation();
|
||||
diff = capture->GetDiffInformation();
|
||||
firstFrame = true;
|
||||
|
||||
} else {
|
||||
printf("Failed to capture\n");
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
pFrameHeader->serial.fetch_add(1);
|
||||
pFrameHeader->width.store(framebuffer.width);
|
||||
pFrameHeader->height.store(framebuffer.height);
|
||||
|
||||
// memcpy(pFrameHeader->bits(), &framebuffer.pFramebuffer[0], (framebuffer.width * framebuffer.height) * 4);
|
||||
|
||||
// printf("FRAME SERIAL %u loaded\n", pFrameHeader->serial.load());
|
||||
|
||||
if(firstFrame)
|
||||
firstFrame = false;
|
||||
} else if(result == hazelnut::DisplayCaptureResult::OkButResized) {
|
||||
// We resized. Notify of that
|
||||
framebuffer = capture->GetFramebufferInformation();
|
||||
diff = capture->GetDiffInformation();
|
||||
firstFrame = true;
|
||||
|
||||
} else {
|
||||
printf("Failed to capture\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -30,8 +30,10 @@ namespace hazelnut {
|
|||
void lock_manually() {
|
||||
u32 expected = 0;
|
||||
while(!__lock.compare_exchange_strong(expected, 1, std::memory_order::seq_cst)) {
|
||||
//printf("LOCK CONTENDED\n");
|
||||
expected = 0;
|
||||
#ifdef _WIN32
|
||||
//Sleep(1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue