diff --git a/agent/src/capture.hpp b/agent/src/capture.hpp index fa0e872..e036352 100644 --- a/agent/src/capture.hpp +++ b/agent/src/capture.hpp @@ -30,7 +30,11 @@ namespace hazelnut { virtual bool Initialize() = 0; - /// Performs the capture. + /// New CaptureFrame() API; now capture is copied directly to output buffer + /// instead of copy->copy. + virtual DisplayCaptureResult CaptureFrameTemp(u32* pOut) = 0; + + /// Performs the capture. THIS VERSION IS DEPRECATED AND WILL BE REMOVED virtual DisplayCaptureResult CaptureFrame() = 0; /// Get framebuffer information. @@ -43,6 +47,7 @@ namespace hazelnut { enum class DisplayCaptureInterface : u32 { Invalid, NVFBC, // NVFBC capture + // FIXME: DXGI support }; diff --git a/agent/src/capture_nvfbc.cpp b/agent/src/capture_nvfbc.cpp index 563f160..b0191ba 100644 --- a/agent/src/capture_nvfbc.cpp +++ b/agent/src/capture_nvfbc.cpp @@ -71,9 +71,14 @@ namespace hazelnut { unique_buffer convertedFramebuffer; u8* pDiffMap; + u32 diffmapWidth; u32 diffmapHeight; + // cached + u32 tileWidth; + u32 tileHeight; + public: virtual ~NVFBCDisplayCapture() { Shutdown(); } @@ -164,6 +169,104 @@ namespace hazelnut { return true; } + void PaintWithTileOptimization(u32* pBufferData) { + bool copiedTiles = false; + + for(u32 dy = 0; dy < diffmapHeight; ++dy) { + for(u32 dx = 0; dx < diffmapWidth; ++dx) { + auto& bl = pDiffMap[dy * diffmapWidth + dx]; + + if(bl != 0) { + copiedTiles = true; + +#if 0 + x * (framebuffer.width / diff.diffmapWidth), // x + y * (framebuffer.height / diff.diffMapHeight), // y + framebuffer.width / diff.diffmapWidth, // width + framebuffer.height / diff.diffMapHeight // height +#endif + + u32 xOffset = dx * tileWidth; + u32 yOffset = dy * tileHeight; + + for(u32 y = 0; y < tileHeight; ++y) { + memcpy(&pBufferData[((y + yOffset) * width + xOffset)], + &pRawFramebuffer[((y + yOffset) * grabInfo.dwBufferWidth + xOffset)], tileWidth * 4); + } + + // printf("copied %ux%u @ %ux%u tile\n", tileWidth, tileHeight, xOffset, yOffset); + } + } + } + } + + void PaintFull(u32* pBufferData) { + for(u32 y = 0; y < height; ++y) { + // Convert to BGRA + // FIXME: Make this SIMD. I can't into this very well +#if 0 + usize srcStart = (y * grabInfo.dwBufferWidth) * 4; + usize dstStart = (y * width) * 4; + for(u32 x = 0; x < grabInfo.dwWidth * 4; x += 4) { + pBufferData[(dstStart + x) + 0] = pSrcData[(srcStart + x) + 2]; // B + pBufferData[(dstStart + x) + 1] = pSrcData[(srcStart + x) + 1]; // G + pBufferData[(dstStart + x) + 2] = pSrcData[(srcStart + x) + 0]; // R + pBufferData[(dstStart + x) + 3] = 0xff; // A + } +#endif + + memcpy(&pBufferData[(y * width)], &pRawFramebuffer[(y * grabInfo.dwBufferWidth)], width * 4); + } + } + + DisplayCaptureResult CaptureFrameTemp(u32* pOut) override { + auto nvStatus = nvfbc->NvFBCToSysGrabFrame(&this->fbcSysGrabParams); + + switch(nvStatus) { + case NVFBC_SUCCESS: break; + + // Need to recreate the session. If it fails then we fail too. + case NVFBC_ERROR_INVALIDATED_SESSION: { + if(!NvfbcInitSession()) + return DisplayCaptureResult::Fail; + + // Recurse. This looks naughty, but will allow us to directly retry + // the capture. (Plus if this causes issues whatever has happened is probably beyond saving) + return CaptureFrame(); + } break; + + default: return DisplayCaptureResult::Fail; + } + + auto result = DisplayCaptureResult::Ok; + bool useTilePainting = true; + + if(width != grabInfo.dwWidth || height != grabInfo.dwHeight) { + width = grabInfo.dwWidth; + height = grabInfo.dwHeight; + + // update diffmap stuff + diffmapWidth = DiffMapDimension(width, blockSize); + diffmapHeight = DiffMapDimension(height, blockSize); + + tileWidth = width / diffmapWidth; + tileHeight = height / diffmapHeight; + + result = DisplayCaptureResult::OkButResized; + + // Disable tile-painting optimization for the first frame + useTilePainting = false; + } + + if(useTilePainting) [[likely]] { + PaintWithTileOptimization(&pOut[0]); + } else { + PaintFull(&pOut[0]); + } + + return result; + } + DisplayCaptureResult CaptureFrame() override { auto nvStatus = nvfbc->NvFBCToSysGrabFrame(&this->fbcSysGrabParams); @@ -225,8 +328,6 @@ namespace hazelnut { // diffmapWidth = (u32)ceil((f32)width / 32); // diffmapHeight = (u32)ceil((f32)height / 32); - diffmapWidth = DiffMapDimension(width, blockSize); - diffmapHeight = DiffMapDimension(height, blockSize); return DiffInformation { pDiffMap, diffmapWidth, diffmapHeight }; } }; diff --git a/agent/src/main.cpp b/agent/src/main.cpp index 5c92b35..5825508 100755 --- a/agent/src/main.cpp +++ b/agent/src/main.cpp @@ -6,25 +6,13 @@ #include #include -#include #include "capture.hpp" #include "Utils.hpp" // clang-format on -#include "atomic_spinlock.hpp" #include "ivshmem_protocol.hpp" -struct tileRect { - u32 x, y, width, height; -}; - -struct Test { - hazelnut::AtomicSpinlock lk {}; - std::atomic sessionId {}; - std::atomic pingPong {}; -}; - int main(int argc, char** argv) { hazelnut::IvshmemDevice dev; @@ -61,142 +49,53 @@ int main(int argc, char** argv) { printf("Successfully created a framebuffer capture interface\n"); bool firstFrame = true; - std::vector tiles {}; hazelnut::FramebufferInformation framebuffer {}; hazelnut::DiffInformation diff {}; while(true) { - auto result = capture->CaptureFrame(); - if(result == hazelnut::DisplayCaptureResult::Ok) { - tiles.clear(); -#if 0 - if(firstFrame == false) { - for(u32 y = 0; y < diff.diffMapHeight; ++y) { - for(u32 x = 0; x < diff.diffmapWidth; ++x) { - auto& bl = diff.pDiffMap[y * diff.diffmapWidth + x]; - if(bl != 0) { - tiles.push_back(tileRect { - x * (framebuffer.width / diff.diffmapWidth), // x - y * (framebuffer.height / diff.diffMapHeight), // y - framebuffer.width / diff.diffmapWidth, // width - framebuffer.height / diff.diffMapHeight // height - }); - } - } - } - - if(tiles.empty()) - continue; - } -#endif - - { - auto guard = pHeader->lock.lock(); - - pFrameHeader->serial.fetch_add(1); - pFrameHeader->width.store(framebuffer.width); - pFrameHeader->height.store(framebuffer.height); - - if(framebuffer.pFramebuffer == nullptr) - continue; - - memcpy(pFrameHeader->bits(), &framebuffer.pFramebuffer[0], (framebuffer.width * framebuffer.height) * 4); - - //printf("FRAME SERIAL %u loaded\n", pFrameHeader->serial.load()); - } - - if(firstFrame) - firstFrame = false; - } else if(result == hazelnut::DisplayCaptureResult::OkButResized) { - // We resized. Notify of that - framebuffer = capture->GetFramebufferInformation(); - diff = capture->GetDiffInformation(); - firstFrame = true; - - } else { - printf("Failed to capture\n"); - break; - } - } - -#if 0 - while(true) { - // lock + bool changed = false; { - auto guard = pHeader->lk.lock(); - Sleep(5); - } + auto guard = pHeader->lock.lock(); + auto result = capture->CaptureFrameTemp(pFrameHeader->bits()); - printf("pingpong %u\n", pHeader->pingPong.load()); - - if(tries++ == curTries) { - tries = 0; - pHeader->pingPong.fetch_add(1); - } - } -#endif - - return 0; - -#if 0 - printf("Hazelnut agent\n"); - - // Create a capture interface - auto capture = hazelnut::CreateDisplayCapture(hazelnut::GuessBestCaptureInterface()); - if(!capture) { - printf("Failed to create a capture interface\n"); - return 1; - } - - printf("Successfully created a framebuffer capture interface\n"); - - - bool firstFrame = true; - std::vector tiles {}; - hazelnut::FramebufferInformation framebuffer {}; - hazelnut::DiffInformation diff {}; - - while(true) { - auto result = capture->CaptureFrame(); - if(result == hazelnut::DisplayCaptureResult::Ok) { - tiles.clear(); - - if(firstFrame == false) { - for(u32 y = 0; y < diff.diffMapHeight; ++y) { - for(u32 x = 0; x < diff.diffmapWidth; ++x) { - auto& bl = diff.pDiffMap[y * diff.diffmapWidth + x]; - if(bl != 0) { - tiles.push_back(tileRect { - x * (framebuffer.width / diff.diffmapWidth), // x - y * (framebuffer.height / diff.diffMapHeight), // y - framebuffer.width / diff.diffmapWidth, // width - framebuffer.height / diff.diffMapHeight // height - }); + if(result == hazelnut::DisplayCaptureResult::Ok) { + if(firstFrame == false) { + for(u32 y = 0; y < diff.diffMapHeight; ++y) { + for(u32 x = 0; x < diff.diffmapWidth; ++x) { + auto& bl = diff.pDiffMap[y * diff.diffmapWidth + x]; + if(bl != 0) { + changed = true; + } } } - } - if(tiles.empty()) - continue; + if(!changed) + continue; + } + } else if(result == hazelnut::DisplayCaptureResult::OkButResized) { + // We resized. Notify of that + framebuffer = capture->GetFramebufferInformation(); + diff = capture->GetDiffInformation(); + firstFrame = true; + + } else { + printf("Failed to capture\n"); + break; } - + pFrameHeader->serial.fetch_add(1); + pFrameHeader->width.store(framebuffer.width); + pFrameHeader->height.store(framebuffer.height); + + // memcpy(pFrameHeader->bits(), &framebuffer.pFramebuffer[0], (framebuffer.width * framebuffer.height) * 4); + + // printf("FRAME SERIAL %u loaded\n", pFrameHeader->serial.load()); if(firstFrame) firstFrame = false; - } else if(result == hazelnut::DisplayCaptureResult::OkButResized) { - // We resized. Notify of that - framebuffer = capture->GetFramebufferInformation(); - diff = capture->GetDiffInformation(); - firstFrame = true; - - } else { - printf("Failed to capture\n"); - break; } } -#endif return 0; } diff --git a/shared/src/atomic_spinlock.hpp b/shared/src/atomic_spinlock.hpp index 28c753c..0aedcc1 100644 --- a/shared/src/atomic_spinlock.hpp +++ b/shared/src/atomic_spinlock.hpp @@ -30,8 +30,10 @@ namespace hazelnut { void lock_manually() { u32 expected = 0; while(!__lock.compare_exchange_strong(expected, 1, std::memory_order::seq_cst)) { - //printf("LOCK CONTENDED\n"); expected = 0; +#ifdef _WIN32 + //Sleep(1); +#endif } }