Compare commits

...

2 commits

Author SHA1 Message Date
ba5ccea2fe heavily simplified main loop 2024-12-02 01:43:14 -05:00
5de6726140 Only copy changed tiles
this reduces the average CPU usage of the agent. Pretty cool.
2024-12-02 00:40:45 -05:00
5 changed files with 139 additions and 157 deletions

View file

@ -30,7 +30,11 @@ namespace hazelnut {
virtual bool Initialize() = 0;
/// Performs the capture.
/// New CaptureFrame() API; now capture is copied directly to output buffer
/// instead of copy->copy.
virtual DisplayCaptureResult CaptureFrameTemp(u32* pOut) = 0;
/// Performs the capture. THIS VERSION IS DEPRECATED AND WILL BE REMOVED
virtual DisplayCaptureResult CaptureFrame() = 0;
/// Get framebuffer information.
@ -43,6 +47,7 @@ namespace hazelnut {
enum class DisplayCaptureInterface : u32 {
Invalid,
NVFBC, // NVFBC capture
// FIXME: DXGI support
};

View file

@ -71,9 +71,14 @@ namespace hazelnut {
unique_buffer<u32> convertedFramebuffer;
u8* pDiffMap;
u32 diffmapWidth;
u32 diffmapHeight;
// cached
u32 tileWidth;
u32 tileHeight;
public:
virtual ~NVFBCDisplayCapture() { Shutdown(); }
@ -164,6 +169,104 @@ namespace hazelnut {
return true;
}
void PaintWithTileOptimization(u32* pBufferData) {
bool copiedTiles = false;
for(u32 dy = 0; dy < diffmapHeight; ++dy) {
for(u32 dx = 0; dx < diffmapWidth; ++dx) {
auto& bl = pDiffMap[dy * diffmapWidth + dx];
if(bl != 0) {
copiedTiles = true;
#if 0
x * (framebuffer.width / diff.diffmapWidth), // x
y * (framebuffer.height / diff.diffMapHeight), // y
framebuffer.width / diff.diffmapWidth, // width
framebuffer.height / diff.diffMapHeight // height
#endif
u32 xOffset = dx * tileWidth;
u32 yOffset = dy * tileHeight;
for(u32 y = 0; y < tileHeight; ++y) {
memcpy(&pBufferData[((y + yOffset) * width + xOffset)],
&pRawFramebuffer[((y + yOffset) * grabInfo.dwBufferWidth + xOffset)], tileWidth * 4);
}
// printf("copied %ux%u @ %ux%u tile\n", tileWidth, tileHeight, xOffset, yOffset);
}
}
}
}
void PaintFull(u32* pBufferData) {
for(u32 y = 0; y < height; ++y) {
// Convert to BGRA
// FIXME: Make this SIMD. I can't into this very well
#if 0
usize srcStart = (y * grabInfo.dwBufferWidth) * 4;
usize dstStart = (y * width) * 4;
for(u32 x = 0; x < grabInfo.dwWidth * 4; x += 4) {
pBufferData[(dstStart + x) + 0] = pSrcData[(srcStart + x) + 2]; // B
pBufferData[(dstStart + x) + 1] = pSrcData[(srcStart + x) + 1]; // G
pBufferData[(dstStart + x) + 2] = pSrcData[(srcStart + x) + 0]; // R
pBufferData[(dstStart + x) + 3] = 0xff; // A
}
#endif
memcpy(&pBufferData[(y * width)], &pRawFramebuffer[(y * grabInfo.dwBufferWidth)], width * 4);
}
}
DisplayCaptureResult CaptureFrameTemp(u32* pOut) override {
auto nvStatus = nvfbc->NvFBCToSysGrabFrame(&this->fbcSysGrabParams);
switch(nvStatus) {
case NVFBC_SUCCESS: break;
// Need to recreate the session. If it fails then we fail too.
case NVFBC_ERROR_INVALIDATED_SESSION: {
if(!NvfbcInitSession())
return DisplayCaptureResult::Fail;
// Recurse. This looks naughty, but will allow us to directly retry
// the capture. (Plus if this causes issues whatever has happened is probably beyond saving)
return CaptureFrame();
} break;
default: return DisplayCaptureResult::Fail;
}
auto result = DisplayCaptureResult::Ok;
bool useTilePainting = true;
if(width != grabInfo.dwWidth || height != grabInfo.dwHeight) {
width = grabInfo.dwWidth;
height = grabInfo.dwHeight;
// update diffmap stuff
diffmapWidth = DiffMapDimension(width, blockSize);
diffmapHeight = DiffMapDimension(height, blockSize);
tileWidth = width / diffmapWidth;
tileHeight = height / diffmapHeight;
result = DisplayCaptureResult::OkButResized;
// Disable tile-painting optimization for the first frame
useTilePainting = false;
}
if(useTilePainting) [[likely]] {
PaintWithTileOptimization(&pOut[0]);
} else {
PaintFull(&pOut[0]);
}
return result;
}
DisplayCaptureResult CaptureFrame() override {
auto nvStatus = nvfbc->NvFBCToSysGrabFrame(&this->fbcSysGrabParams);
@ -225,8 +328,6 @@ namespace hazelnut {
// diffmapWidth = (u32)ceil((f32)width / 32);
// diffmapHeight = (u32)ceil((f32)height / 32);
diffmapWidth = DiffMapDimension(width, blockSize);
diffmapHeight = DiffMapDimension(height, blockSize);
return DiffInformation { pDiffMap, diffmapWidth, diffmapHeight };
}
};

View file

@ -6,28 +6,23 @@
#include <windows.h>
#include <stdio.h>
#include <vector>
#include "capture.hpp"
#include "Utils.hpp"
// clang-format on
#include "atomic_spinlock.hpp"
#include "ivshmem_protocol.hpp"
struct tileRect {
u32 x, y, width, height;
};
struct Test {
hazelnut::AtomicSpinlock lk {};
std::atomic<u32> sessionId {};
std::atomic<u32> pingPong {};
};
int main(int argc, char** argv) {
hazelnut::IvshmemDevice dev;
// Create a capture interface
auto pCaptureInterface = hazelnut::CreateDisplayCapture(hazelnut::GuessBestCaptureInterface());
if(!pCaptureInterface) {
printf("Failed to create a capture interface\n");
return 1;
}
if(!dev.Open()) {
printf("Failed to open ivshmem device\n");
return 1;
@ -41,162 +36,39 @@ int main(int argc, char** argv) {
// wipe the first 1mb
memset(&ptr[0], 0, 1 * (1024 * 1024));
printf("wiped memory\n");
// sex
// initalize laid-out IVSHMEM structs
auto* pHeader = new(&ptr[0]) hazelnut::IvshHeader {};
auto* pFrameHeader = new(&ptr[0x1000]) hazelnut::FrameHeader {};
// reset pingpong counter
// Reset session ID
pHeader->serverSessionId.store(rand());
// Create a capture interface
auto capture = hazelnut::CreateDisplayCapture(hazelnut::GuessBestCaptureInterface());
if(!capture) {
printf("Failed to create a capture interface\n");
return 1;
}
printf("Agent loop starting\n");
printf("Successfully created a framebuffer capture interface\n");
bool firstFrame = true;
std::vector<tileRect> tiles {};
hazelnut::FramebufferInformation framebuffer {};
hazelnut::DiffInformation diff {};
while(true) {
auto result = capture->CaptureFrame();
if(result == hazelnut::DisplayCaptureResult::Ok) {
tiles.clear();
#if 0
if(firstFrame == false) {
for(u32 y = 0; y < diff.diffMapHeight; ++y) {
for(u32 x = 0; x < diff.diffmapWidth; ++x) {
auto& bl = diff.pDiffMap[y * diff.diffmapWidth + x];
if(bl != 0) {
tiles.push_back(tileRect {
x * (framebuffer.width / diff.diffmapWidth), // x
y * (framebuffer.height / diff.diffMapHeight), // y
framebuffer.width / diff.diffmapWidth, // width
framebuffer.height / diff.diffMapHeight // height
});
}
}
}
if(tiles.empty())
continue;
}
#endif
{
auto guard = pHeader->lock.lock();
pFrameHeader->serial.fetch_add(1);
pFrameHeader->width.store(framebuffer.width);
pFrameHeader->height.store(framebuffer.height);
if(framebuffer.pFramebuffer == nullptr)
continue;
memcpy(pFrameHeader->bits(), &framebuffer.pFramebuffer[0], (framebuffer.width * framebuffer.height) * 4);
//printf("FRAME SERIAL %u loaded\n", pFrameHeader->serial.load());
}
if(firstFrame)
firstFrame = false;
} else if(result == hazelnut::DisplayCaptureResult::OkButResized) {
// We resized. Notify of that
framebuffer = capture->GetFramebufferInformation();
diff = capture->GetDiffInformation();
firstFrame = true;
} else {
printf("Failed to capture\n");
break;
}
}
#if 0
while(true) {
// lock
{
auto guard = pHeader->lk.lock();
Sleep(5);
}
auto guard = pHeader->lock.lock();
auto result = pCaptureInterface->CaptureFrameTemp(pFrameHeader->bits());
printf("pingpong %u\n", pHeader->pingPong.load());
if(tries++ == curTries) {
tries = 0;
pHeader->pingPong.fetch_add(1);
}
}
#endif
return 0;
#if 0
printf("Hazelnut agent\n");
// Create a capture interface
auto capture = hazelnut::CreateDisplayCapture(hazelnut::GuessBestCaptureInterface());
if(!capture) {
printf("Failed to create a capture interface\n");
return 1;
}
printf("Successfully created a framebuffer capture interface\n");
bool firstFrame = true;
std::vector<tileRect> tiles {};
hazelnut::FramebufferInformation framebuffer {};
hazelnut::DiffInformation diff {};
while(true) {
auto result = capture->CaptureFrame();
if(result == hazelnut::DisplayCaptureResult::Ok) {
tiles.clear();
if(firstFrame == false) {
for(u32 y = 0; y < diff.diffMapHeight; ++y) {
for(u32 x = 0; x < diff.diffmapWidth; ++x) {
auto& bl = diff.pDiffMap[y * diff.diffmapWidth + x];
if(bl != 0) {
tiles.push_back(tileRect {
x * (framebuffer.width / diff.diffmapWidth), // x
y * (framebuffer.height / diff.diffMapHeight), // y
framebuffer.width / diff.diffmapWidth, // width
framebuffer.height / diff.diffMapHeight // height
});
}
}
}
if(tiles.empty())
continue;
if(result == hazelnut::DisplayCaptureResult::Ok) {
// Do nothing.
} else if(result == hazelnut::DisplayCaptureResult::OkButResized) {
// We resized. Notify of that
framebuffer = pCaptureInterface->GetFramebufferInformation();
diff = pCaptureInterface->GetDiffInformation();
} else {
printf("Failed to capture\n");
break;
}
if(firstFrame)
firstFrame = false;
} else if(result == hazelnut::DisplayCaptureResult::OkButResized) {
// We resized. Notify of that
framebuffer = capture->GetFramebufferInformation();
diff = capture->GetDiffInformation();
firstFrame = true;
} else {
printf("Failed to capture\n");
break;
pFrameHeader->serial.fetch_add(1);
pFrameHeader->width.store(framebuffer.width);
pFrameHeader->height.store(framebuffer.height);
}
}
#endif
return 0;
}

View file

@ -1,3 +1,5 @@
# shared code
This is shared between the two ends. It is cross platform C++20.
This is all of the things shared between the agent and client ends.
It is cross platform C++20.

View file

@ -30,8 +30,10 @@ namespace hazelnut {
void lock_manually() {
u32 expected = 0;
while(!__lock.compare_exchange_strong(expected, 1, std::memory_order::seq_cst)) {
//printf("LOCK CONTENDED\n");
expected = 0;
#ifdef _WIN32
//Sleep(1);
#endif
}
}