From 09142f9668d1d90e15126c3989a7a93f97fa5aa2 Mon Sep 17 00:00:00 2001 From: modeco80 Date: Mon, 14 Oct 2024 07:25:59 -0400 Subject: [PATCH] working gpu-only encode --- server/Cargo.lock | 1 + server/Cargo.toml | 1 + server/src/main.rs | 128 +++++++++---- server/src/retro_thread.rs | 283 ++++++++++++++-------------- server/src/surface.rs | 23 ++- server/src/video/cuda_gl/bindgen.sh | 27 +++ server/src/video/cuda_gl/gl.h | 1 + server/src/video/cuda_gl/mod.rs | 14 ++ server/src/video/cuda_gl/safe.rs | 135 +++++++++++++ server/src/video/cuda_gl/sys.rs | 73 +++++++ server/src/video/encoder_thread.rs | 269 +++++++++++++++++++++++++- server/src/video/h264_encoder.rs | 107 +++++++---- server/src/video/hwframe.rs | 133 +++++++------ server/src/video/mod.rs | 2 + 14 files changed, 920 insertions(+), 277 deletions(-) create mode 100755 server/src/video/cuda_gl/bindgen.sh create mode 100644 server/src/video/cuda_gl/gl.h create mode 100644 server/src/video/cuda_gl/mod.rs create mode 100644 server/src/video/cuda_gl/safe.rs create mode 100644 server/src/video/cuda_gl/sys.rs diff --git a/server/Cargo.lock b/server/Cargo.lock index 38fd3a3..adb5d8e 100644 --- a/server/Cargo.lock +++ b/server/Cargo.lock @@ -1414,6 +1414,7 @@ dependencies = [ "futures-util", "gl", "letsplay_gpu", + "libloading", "rand", "retro_frontend", "serde", diff --git a/server/Cargo.toml b/server/Cargo.toml index 11a6121..875e15a 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -32,6 +32,7 @@ tracing = "0.1.40" tracing-subscriber = "0.3.18" xkeysym = "0.2.1" async-trait = "0.1.83" +libloading = "0.8.5" [patch.crates-io] diff --git a/server/src/main.rs b/server/src/main.rs index cd5f216..ee3bc01 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -5,10 +5,15 @@ mod video; mod transport; +use anyhow::Context; use async_trait::async_trait; +use cudarc::driver::CudaDevice; +use letsplay_gpu::egl_helpers::DeviceContext; use retro_thread::{spawn_retro_thread, RetroEvent}; +use transport::websocket::WebsocketTransport; use transport::{Transport, TransportReciever}; +use video::cuda_gl::safe::GraphicsResource; use video::encoder_thread::EncodeThreadInput; use video::{encoder_thread, ffmpeg}; @@ -44,19 +49,19 @@ enum WsMessage { Json(String), } -struct AppState { +struct AppState { encoder_tx: Arc>>, inputs: Arc>>, - transport: Arc, + transport: Arc, connection_count: TokioMutex, } -impl AppState { - fn new( - encoder_tx: mpsc::Sender, - transport: Arc, - ) -> Self { +impl AppState +where + T: Transport + Send + Sync + 'static, +{ + fn new(encoder_tx: mpsc::Sender, transport: Arc) -> Self { Self { encoder_tx: Arc::new(TokioMutex::new(encoder_tx)), inputs: Arc::new(TokioMutex::new(Vec::new())), @@ -67,7 +72,10 @@ impl AppState { } #[async_trait] -impl TransportReciever for AppState { +impl TransportReciever for AppState +where + T: Transport + Send + Sync + 'static, +{ async fn on_connect(&self, username: &String) -> anyhow::Result<()> { println!("{username} joined!"); @@ -105,7 +113,6 @@ impl TransportReciever for AppState { "msg": json["msg"].as_str().unwrap() }); - self.transport .broadcast_message(transport::TransportMessage::Text( serde_json::to_string(&send).expect("oh well"), @@ -177,6 +184,23 @@ impl TransportReciever for AppState { } } +const FLIP_SRC: &str = " +extern \"C\" __global__ void flip_opengl( + const unsigned* pSrc, + unsigned* pDest, + unsigned width, + unsigned height +) { + const unsigned x = blockIdx.x * blockDim.x + threadIdx.x; + const unsigned y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x < width && y < height) { + // let reversed_y = (size.height - 1) - y; + unsigned reversed_y = (height - 1) - y; + pDest[y * width + x] = pSrc[reversed_y * width + x]; + } +}"; + #[tokio::main(flavor = "multi_thread", worker_threads = 2)] async fn main() -> anyhow::Result<()> { // Setup a tracing subscriber @@ -186,36 +210,56 @@ async fn main() -> anyhow::Result<()> { tracing::subscriber::set_global_default(subscriber).unwrap(); - let surface = Arc::new(Mutex::new(surface::Surface::new())); - // H.264 encoder related - let frame: Arc>> = Arc::new(Mutex::new(None)); - let (mut encoder_rx, encoder_tx) = encoder_thread::encoder_thread_spawn(&frame); + let device = CudaDevice::new(0)?; - let transport = Arc::new(crate::transport::websocket::WebsocketTransport::new()); + let ptx = cudarc::nvrtc::compile_ptx(&FLIP_SRC).with_context(|| "compiling support kernel")?; + println!("compiled kernel"); + + // pop it in + device.load_ptx(ptx, "module", &["flip_opengl"])?; + + let egl_ctx = Arc::new(Mutex::new(DeviceContext::new(0))); + + let resource = Arc::new(Mutex::new(GraphicsResource::new(&device))); + + let (mut encoder_rx, encoder_tx) = encoder_thread::encoder_thread_spawn_hwframe( + &device.clone(), + &resource.clone(), + &egl_ctx.clone(), + ); + + let transport = Arc::new(WebsocketTransport::new()); let state = Arc::new(AppState::new(encoder_tx, transport.clone())); - let (mut event_rx, event_in_tx) = spawn_retro_thread(surface.clone()); + let (mut retro_event_rx, retro_input_event_tx) = + spawn_retro_thread(egl_ctx.clone(), resource.clone()); let state_clone = state.clone(); - let transport_clone = transport.clone(); - // retro event handler. drives the encoder thread too let _ = std::thread::Builder::new() .name("retro_event_rx".into()) .spawn(move || { - let surface_clone = surface.clone(); - let frame_clone = frame.clone(); + // load game + let _ = retro_input_event_tx.blocking_send(retro_thread::RetroInEvent::LoadCore( + "cores/swanstation_libretro.so".into(), + )); + let _ = retro_input_event_tx.blocking_send(retro_thread::RetroInEvent::LoadGame( + "roms/nmv1_us.cue".into(), + )); // start the libretro thread looping now that we're alive - let _ = event_in_tx.blocking_send(retro_thread::RetroInEvent::Start); + let _ = retro_input_event_tx.blocking_send(retro_thread::RetroInEvent::Start); loop { - match event_rx.try_recv() { - Ok(msg) => match msg { + match retro_event_rx.blocking_recv() { + Some(msg) => match msg { RetroEvent::Frame => { + /* + let mut same = true; + { let mut frame_locked = frame.lock().expect( "Couldn't lock frame on our end. Did the encoder thread panic?", @@ -229,6 +273,8 @@ async fn main() -> anyhow::Result<()> { let mut surf = surface_clone.lock().expect( "locking the VNC surface to paint it to the ffmpeg frame failed", ); + + let surf_buf = surf.get_buffer(); let buf_ptr = @@ -247,12 +293,27 @@ async fn main() -> anyhow::Result<()> { ) }; + // If any line differs then the frame isn't the same and should be encoded. + if &surf_buf[line_stride..line_stride + width as usize] + != dest_line_slice + { + same = false; + } + dest_line_slice.copy_from_slice( &surf_buf[line_stride..line_stride + width as usize], ); } } + if !same { + let _ = state_clone + .encoder_tx + .blocking_lock() + .blocking_send(encoder_thread::EncodeThreadInput::SendFrame); + } + */ + let _ = state_clone .encoder_tx .blocking_lock() @@ -261,6 +322,12 @@ async fn main() -> anyhow::Result<()> { RetroEvent::Resize { size } => { // make a new frame for the encoder + + let _ = state_clone.encoder_tx.blocking_lock().blocking_send( + encoder_thread::EncodeThreadInput::Init { size: size.clone() }, + ); + + /* { let mut lk_frame = frame_clone.lock().expect("Couldn't lock frame"); @@ -270,30 +337,21 @@ async fn main() -> anyhow::Result<()> { size.clone().height, )); } - - let _ = state_clone.encoder_tx.blocking_lock().blocking_send( - encoder_thread::EncodeThreadInput::Init { size: size.clone() }, - ); + */ } RetroEvent::WantInputs { tx } => { let inputs = state_clone.inputs.blocking_lock(); - //tracing::info!("giving inputs {:?}", inputs); tx.send(inputs.clone()).expect("FUCK"); } }, - Err(TryRecvError::Disconnected) => break, - Err(TryRecvError::Empty) => {} + None => break, } match encoder_rx.try_recv() { Ok(msg) => match msg { encoder_thread::EncodeThreadOutput::Frame { packet } => { - // let _ = state_clone - // .websocket_broadcast_tx - // .send(WsMessage::VideoPacket { packet }); - // :( let packet_data = { let slice = packet.data().expect( @@ -301,7 +359,9 @@ async fn main() -> anyhow::Result<()> { ); slice.to_vec() }; - let _ = transport_clone.broadcast_message(transport::TransportMessage::Binary(packet_data)); + let _ = state_clone.transport.broadcast_message( + transport::TransportMessage::Binary(packet_data), + ); } }, Err(TryRecvError::Empty) => {} diff --git a/server/src/retro_thread.rs b/server/src/retro_thread.rs index 2e7cdcd..874a168 100644 --- a/server/src/retro_thread.rs +++ b/server/src/retro_thread.rs @@ -17,7 +17,7 @@ use retro_frontend::{ use gpu::egl_helpers::DeviceContext; use letsplay_gpu as gpu; -use crate::{surface::Surface, types::Size}; +use crate::{surface::Surface, types::Size, video::cuda_gl::safe::GraphicsResource}; /// Called by OpenGL. We use this to dump errors. extern "system" fn opengl_message_callback( @@ -49,30 +49,38 @@ pub struct RetroState { pad: RetroPad, // EGL state - egl_context: Option, + egl_context: Arc>, /// Locked framebuffer. - framebuffer: Arc>, + software_framebuffer: Surface, /// OpenGL FBO gl_framebuffer: gpu::GlFramebuffer, - /// Cached readback buffer. - readback_buffer: Surface, + gl_rendering: bool, + + cuda_resource: Arc>, event_tx: mpsc::Sender, } impl RetroState { - pub fn new(framebuffer: Arc>, event_tx: mpsc::Sender) -> Box { + pub fn new( + device_context: Arc>, + resource: Arc>, + event_tx: mpsc::Sender, + ) -> Box { let mut boxed = Box::new(Self { frontend: None, pad: RetroPad::new(), - egl_context: None, - framebuffer, + egl_context: device_context.clone(), + software_framebuffer: Surface::new(), gl_framebuffer: gpu::GlFramebuffer::new(), - readback_buffer: Surface::new(), + gl_rendering: false, + + cuda_resource: resource.clone(), + event_tx, }); @@ -127,14 +135,29 @@ impl RetroState { /// Initalizes the headless EGL context used for OpenGL rendering. fn hw_gl_egl_init(&mut self) { - self.egl_context = Some(DeviceContext::new(0)); + self.egl_context.lock().expect("piss").make_current(); + + unsafe { + // Load OpenGL functions using the EGL loader. + gl::load_with(|s| { + let str = std::ffi::CString::new(s).expect("gl::load_with fail"); + std::mem::transmute(gpu::egl::GetProcAddress(str.as_ptr())) + }); + + // set OpenGL debug message callback + gl::Enable(gl::DEBUG_OUTPUT); + gl::DebugMessageCallback(Some(opengl_message_callback), std::ptr::null()); + } } /// Destroys OpenGL resources and the EGL context. fn hw_gl_destroy(&mut self) { - if self.egl_context.is_some() { - self.gl_framebuffer.destroy(); - self.egl_context.take().unwrap().destroy() + self.gl_framebuffer.destroy(); + + { + let mut locked = self.egl_context.lock().expect("piss"); + locked.release(); + locked.destroy(); } } @@ -143,59 +166,39 @@ impl RetroState { let step_ms = (1.0 / av_info.timing.fps) * 1000.; let step_duration = Duration::from_millis(step_ms as u64); - self.get_frontend().run_frame(); + { + let egl = (&mut self.egl_context).clone(); + let locked_egl = egl.lock().expect("fuck YOU"); + + locked_egl.make_current(); + self.get_frontend().run_frame(); + locked_egl.release(); + } + std::thread::sleep(step_duration); } /// Bleh, I don't like this is an associated fn, but whatever - fn update_impl(framebuffer: Arc>, slice: &[u32], pitch: u32, from_opengl: bool) { - let mut framebuffer_locked = framebuffer.lock().expect("could not lock framebuffer"); - - let size = framebuffer_locked.size.clone(); - let buffer = framebuffer_locked.get_buffer(); + fn update_software_framebuffer(framebuffer: &mut Surface, slice: &[u32], pitch: u32) { + let size = framebuffer.size.clone(); + let buffer = framebuffer.get_buffer(); let has_disconnected_pitch = pitch != size.width as u32; - // If this frame came from OpenGL we need to flip the image around - // so it is right side up (from our perspective). - // - // We do this in a bit of a convoluted way (but it's also zero allocation!) - if from_opengl { - for y in (0..size.height).rev() { - let src_line_off = (y as u32 * pitch) as usize; - let src_slice = &slice[src_line_off..src_line_off + size.width as usize]; + for y in 0..size.height { + let src_line_off = (y as u32 * pitch) as usize; + let mut dest_line_off = (y as u32 * size.width) as usize; - let reversed_y = (size.height - 1) - y; - - let src_line_off = (reversed_y as u32 * pitch) as usize; - let mut dest_line_off = src_line_off; - - // copy only - if has_disconnected_pitch { - dest_line_off = (reversed_y * pitch.min(size.width)) as usize; - } - - let dest_slice = &mut buffer[dest_line_off..dest_line_off + size.width as usize]; - - dest_slice.copy_from_slice(src_slice); + // copy only + if has_disconnected_pitch { + dest_line_off = (y * pitch) as usize; } - } else { - for y in 0..size.height { - let src_line_off = (y as u32 * pitch) as usize; - let mut dest_line_off = src_line_off; - // copy only - if has_disconnected_pitch { - dest_line_off = (y * size.width) as usize; - } + // Create slices repressenting each part + let src_slice = &slice[src_line_off..src_line_off + size.width as usize]; + let dest_slice = &mut buffer[dest_line_off..dest_line_off + size.width as usize]; - // Create slices repressenting each part - let src_slice = &slice[src_line_off..src_line_off + size.width as usize]; - let dest_slice = &mut framebuffer_locked.get_buffer() - [dest_line_off..dest_line_off + size.width as usize]; - - dest_slice.copy_from_slice(src_slice); - } + dest_slice.copy_from_slice(src_slice); } } } @@ -204,21 +207,32 @@ impl FrontendInterface for RetroState { fn video_resize(&mut self, width: u32, height: u32) { tracing::info!("Resized to {width}x{height}"); - if self.egl_context.is_some() { - self.gl_framebuffer.resize(width, height); - let raw = self.gl_framebuffer.as_raw(); + self.gl_framebuffer.resize(width, height); + let raw = self.gl_framebuffer.as_raw(); - // Notify the frontend layer about the new FBO ID - self.get_frontend().set_gl_fbo(raw); + // Notify the frontend layer about the new FBO ID + self.get_frontend().set_gl_fbo(raw); - // Resize the readback buffer - self.readback_buffer.resize(Size { width, height }); + if !self.gl_rendering { + self.software_framebuffer.resize(Size { width, height }); } - self.framebuffer - .lock() - .expect("its over?") - .resize(Size { width, height }); + unsafe { + } + + // map to cuda + { + let mut locked = self + .cuda_resource + .lock() + .expect("YOU MOTHERFUCKER PISS GO COUNT YOUR DICK"); + + locked.device().bind_to_thread().expect("fuck"); + + locked + .register(self.gl_framebuffer.texture_id(), gl::TEXTURE_2D) + .expect("you fucking asswater"); + } let _ = self.event_tx.blocking_send(RetroEvent::Resize { size: Size { width, height }, @@ -226,39 +240,29 @@ impl FrontendInterface for RetroState { } fn video_update(&mut self, slice: &[u32], pitch: u32) { - Self::update_impl(self.framebuffer.clone(), slice, pitch, false); + Self::update_software_framebuffer(&mut self.software_framebuffer, slice, pitch); + + let size = self.software_framebuffer.size.clone(); + + // upload texture to GPU + unsafe { + gl::TexImage2D( + gl::TEXTURE_2D, + 0, + gl::RGBA as i32, + size.width as i32, + size.height as i32, + 0, + gl::RGBA_INTEGER, + gl::UNSIGNED_INT_8_8_8_8, + self.software_framebuffer.get_buffer().as_mut_ptr() as *const _, + ); + } + let _ = self.event_tx.blocking_send(RetroEvent::Frame); } fn video_update_gl(&mut self) { - let dimensions = self.get_frontend().get_size(); - - // Read back the framebuffer - let slice = { - // lame, doesn't do bgra conversion for us - //self.gl_framebuffer.read_pixels( - // &mut self.readback_buffer.get_buffer()[..], - // dimensions.0, - // dimensions.1, - //); - - unsafe { - let _bind = self.gl_framebuffer.bind(); - gl::ReadPixels( - 0, - 0, - dimensions.0 as i32, - dimensions.1 as i32, - gl::BGRA, - gl::UNSIGNED_BYTE, - (&mut self.readback_buffer.get_buffer()).as_mut_ptr() as *mut std::ffi::c_void, - ); - } - - self.readback_buffer.get_buffer() - }; - - Self::update_impl(self.framebuffer.clone(), slice, dimensions.0, true); let _ = self.event_tx.blocking_send(RetroEvent::Frame); } @@ -270,9 +274,13 @@ impl FrontendInterface for RetroState { let (tx, rx) = oneshot::channel(); let _ = self.event_tx.blocking_send(RetroEvent::WantInputs { tx }); - let inputs = rx.blocking_recv().expect("what the FUCK are you doing"); + let inputs = rx.blocking_recv(); - for key in &inputs { + if inputs.is_err() { + return; + } + + for key in &inputs.unwrap() { use xkeysym::key as Key; match *key { @@ -349,38 +357,23 @@ impl FrontendInterface for RetroState { fn hw_gl_init(&mut self) -> Option { // Only create a new EGL/OpenGL context if we have to. - if self.egl_context.is_none() { - // Initalize EGL - self.hw_gl_egl_init(); + let context = self.egl_context.lock().expect("fuck you!"); + let extensions = gpu::egl_helpers::get_extensions(context.get_display()); - let context = self.egl_context.as_ref().unwrap(); - let extensions = gpu::egl_helpers::get_extensions(context.get_display()); + tracing::debug!("Supported EGL extensions: {:?}", extensions); - tracing::debug!("Supported EGL extensions: {:?}", extensions); - - // Check for EGL_KHR_get_all_proc_addresses, so we can use eglGetProcAddress() to load OpenGL functions - if !extensions.contains(&"EGL_KHR_get_all_proc_addresses".into()) { - tracing::error!("Your graphics driver doesn't support the EGL_KHR_get_all_proc_addresses extension."); - tracing::error!("Retrodemo currently needs this to load OpenGL functions. HW rendering will be disabled."); - return None; - } - - unsafe { - // Load OpenGL functions using the EGL loader. - gl::load_with(|s| { - let str = std::ffi::CString::new(s).expect("gl::load_with fail"); - std::mem::transmute(gpu::egl::GetProcAddress(str.as_ptr())) - }); - - // set OpenGL debug message callback - gl::Enable(gl::DEBUG_OUTPUT); - gl::DebugMessageCallback(Some(opengl_message_callback), std::ptr::null()); - } + // Check for EGL_KHR_get_all_proc_addresses, so we can use eglGetProcAddress() to load OpenGL functions + if !extensions.contains(&"EGL_KHR_get_all_proc_addresses".into()) { + tracing::error!("Your graphics driver doesn't support the EGL_KHR_get_all_proc_addresses extension."); + tracing::error!("Retrodemo currently needs this to load OpenGL functions. HW rendering will be disabled."); + return None; } - // Create the initial FBO for the core to render to - let dimensions = self.get_frontend().get_size(); - self.gl_framebuffer.resize(dimensions.0, dimensions.1); + // If we get here, we can be certain that we're no longer + // going to use software rendering. Therefore, we can + // clear (free) the software framebuffer, since it won't be of use to us anymore. + self.software_framebuffer.clear(); + self.gl_rendering = true; return Some(HwGlInitData { get_proc_address: gpu::egl::GetProcAddress as *mut std::ffi::c_void, @@ -403,25 +396,34 @@ pub enum RetroEvent { pub enum RetroInEvent { Start, + LoadCore(std::path::PathBuf), + LoadGame(std::path::PathBuf), } fn retro_thread_main( - surface: Arc>, + context: &Arc>, + resource: &Arc>, event_tx: mpsc::Sender, mut event_rx: mpsc::Receiver, ) { - let mut app = RetroState::new(surface, event_tx); + let mut app = RetroState::new(context.clone(), resource.clone(), event_tx); - app.load_core("cores/swanstation_libretro.so") - .expect("failed to load core"); - app.load_game("roms/merged/nmv3/us/nmv3_us.cue") //merged/nmv1/us/nmv1_us.cue - .expect("failed to load game"); + // do EGL init first + app.hw_gl_egl_init(); - // sync + // pre-setup loop { match event_rx.blocking_recv() { None => return (), Some(msg) => match msg { + RetroInEvent::LoadCore(path) => { + app.load_core(path).expect("Failed to load core."); + } + + RetroInEvent::LoadGame(path) => { + app.load_game(path).expect("Failed to load game!"); + } + RetroInEvent::Start => break, }, } @@ -435,18 +437,23 @@ fn retro_thread_main( } pub fn spawn_retro_thread( - surface: Arc>, + context: Arc>, + resource: Arc>, ) -> (mpsc::Receiver, mpsc::Sender) { - let (event_tx, event_rx) = mpsc::channel(8); - let (event_in_tx, event_in_rx) = mpsc::channel(8); - let fb_clone = surface.clone(); + // essentially semaphores + let (event_tx, event_rx) = mpsc::channel(1); + let (event_in_tx, event_in_rx) = mpsc::channel(1); + + let cloned = resource.clone(); + let ctxcloned = context.clone(); // discard the join handle let _ = std::thread::Builder::new() .name("retro_game".into()) .spawn(move || { - retro_thread_main(fb_clone, event_tx, event_in_rx); - }).expect("failed to spawn the game thread"); + retro_thread_main(&ctxcloned, &cloned, event_tx, event_in_rx); + }) + .expect("failed to spawn the game thread"); (event_rx, event_in_tx) } diff --git a/server/src/surface.rs b/server/src/surface.rs index 8daad3d..4e1e237 100644 --- a/server/src/surface.rs +++ b/server/src/surface.rs @@ -7,7 +7,7 @@ use super::types::*; use std::alloc; -/// Allocates a boxed slice. +/// Allocates a boxed slice. /// Unlike a [Vec<_>], this can't grow, /// but is just as safe to use, and slightly more predictable. pub fn alloc_boxed_slice(len: usize) -> Box<[T]> { @@ -21,7 +21,7 @@ pub fn alloc_boxed_slice(len: usize) -> Box<[T]> { unsafe { Box::from_raw(slice) } } -/// A BGRA-format surface. +/// A BGRA (or RGBA, I'm not your dad. There are funtions that assume the former though) format surface. pub struct Surface { buffer: Option>, pub size: Size, @@ -38,10 +38,18 @@ impl Surface { } } + pub fn clear(&mut self) { + self.buffer = None; + self.size = Size { + width: 0, + height: 0, + }; + } + pub fn resize(&mut self, size: Size) { self.size = size; - - self.buffer = Some(alloc_boxed_slice(self.size.linear())); + + self.buffer = Some(alloc_boxed_slice(self.size.linear())); } pub fn get_buffer(&mut self) -> &mut [u32] { @@ -53,15 +61,14 @@ impl Surface { pub fn blit_buffer(&mut self, src_at: Rect, data: &[u32]) { let mut off = 0; - let buf = self.buffer.as_mut().unwrap(); - let buf_slice = &mut *buf; + let buf = self.buffer.as_mut().unwrap(); + let buf_slice = &mut *buf; for y in src_at.y..src_at.y + src_at.height { let src = &data[off..off + src_at.width as usize]; let dest_start_offset = (y as usize * self.size.width as usize) + src_at.x as usize; - let dest = - &mut buf_slice[dest_start_offset..dest_start_offset + src_at.width as usize]; + let dest = &mut buf_slice[dest_start_offset..dest_start_offset + src_at.width as usize]; // This forces alpha to always be 0xff. I *could* probably do this in a clearer way though :( for (dest, src_item) in dest.iter_mut().zip(src.iter()) { diff --git a/server/src/video/cuda_gl/bindgen.sh b/server/src/video/cuda_gl/bindgen.sh new file mode 100755 index 0000000..1f9dade --- /dev/null +++ b/server/src/video/cuda_gl/bindgen.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Does bindgen for CUDA cudaGL. (needs postprocessing) +set -exu + +# --allowlist-type="^CU.*" \ +# --allowlist-type="^cuuint(32|64)_t" \ +# --allowlist-type="^cudaError_enum" \ +# --allowlist-type="^cu.*Complex$" \ +# --allowlist-type="^cuda.*" \ +# --allowlist-type="^libraryPropertyType.*" \ +# --allowlist-var="^CU.*" \ + +echo "use cudarc::sys::*; /* Hack :3 */" > ./sys.rs + +bindgen \ + --allowlist-type="" \ + --allowlist-function="^cuGraphicsGL.*" \ + --default-enum-style=rust \ + --no-doc-comments \ + --with-derive-default \ + --with-derive-eq \ + --with-derive-hash \ + --with-derive-ord \ + --use-core \ + --dynamic-loading Lib \ + gl.h -- -I/opt/cuda/include \ + >> ./sys.rs \ No newline at end of file diff --git a/server/src/video/cuda_gl/gl.h b/server/src/video/cuda_gl/gl.h new file mode 100644 index 0000000..31e1454 --- /dev/null +++ b/server/src/video/cuda_gl/gl.h @@ -0,0 +1 @@ +#include "cudaGL.h" \ No newline at end of file diff --git a/server/src/video/cuda_gl/mod.rs b/server/src/video/cuda_gl/mod.rs new file mode 100644 index 0000000..47cc631 --- /dev/null +++ b/server/src/video/cuda_gl/mod.rs @@ -0,0 +1,14 @@ +pub mod sys; +use sys::*; + +pub mod safe; + +pub unsafe fn lib() -> &'static Lib { + static LIB: std::sync::OnceLock = std::sync::OnceLock::new(); + LIB.get_or_init(|| { + if let Ok(lib) = Lib::new(libloading::library_filename("cuda")) { + return lib; + } + panic!("cuda library doesn't exist."); + }) +} \ No newline at end of file diff --git a/server/src/video/cuda_gl/safe.rs b/server/src/video/cuda_gl/safe.rs new file mode 100644 index 0000000..11f2b0c --- /dev/null +++ b/server/src/video/cuda_gl/safe.rs @@ -0,0 +1,135 @@ +use cudarc::driver::{result as cuda_result, safe as cuda_safe, sys as cuda_sys, CudaDevice}; + +use super::sys; + +use std::sync::Arc; + +pub struct MappedGraphicsResource { + resource: cuda_sys::CUgraphicsResource, +} + +impl MappedGraphicsResource { + fn new(resource: cuda_sys::CUgraphicsResource) -> Self { + Self { resource } + } + + pub fn map(&mut self) -> Result<(), cuda_result::DriverError> { + unsafe { + cuda_sys::lib() + .cuGraphicsMapResources(1, &mut self.resource, std::ptr::null_mut()) + .result()?; + } + Ok(()) + } + + pub fn unmap(&mut self) -> Result<(), cuda_result::DriverError> { + unsafe { + cuda_sys::lib() + .cuGraphicsUnmapResources(1, &mut self.resource, std::ptr::null_mut()) + .result()?; + } + + Ok(()) + } + + pub fn get_mapped_array(&mut self) -> Result { + assert!( + !self.resource.is_null(), + "do not call GraphicsResource::get_mapped_array if no resource is actually registered" + ); + + let mut array: cuda_sys::CUarray = std::ptr::null_mut(); + + unsafe { + cuda_sys::lib() + .cuGraphicsSubResourceGetMappedArray(&mut array, self.resource, 0, 0) + .result()?; + } + + Ok(array) + } +} + +impl Drop for MappedGraphicsResource { + fn drop(&mut self) { + let _ = self.unmap(); + } +} + +/// Wrapper over cuGraphicsGL* apis +pub struct GraphicsResource { + context: Arc, + resource: cuda_sys::CUgraphicsResource, +} + +impl GraphicsResource { + pub fn new(device: &Arc) -> Self { + Self { + context: device.clone(), + resource: std::ptr::null_mut(), + } + } + + pub fn device(&self) -> Arc { + self.context.clone() + } + + /// Maps this resource. + pub fn map(&mut self) -> Result { + let mut res = MappedGraphicsResource::new(self.resource); + res.map()?; + + Ok(res) + } + + pub fn register( + &mut self, + texture_id: gl::types::GLuint, + texture_kind: gl::types::GLuint, + ) -> Result<(), cuda_result::DriverError> { + // better to be safe than leak memory? idk. + if !self.resource.is_null() { + self.unregister()?; + } + + unsafe { + super::lib() + .cuGraphicsGLRegisterImage(&mut self.resource, texture_id, texture_kind, 1) + .result()?; + } + + Ok(()) + } + + pub fn is_registered(&self) -> bool { + !self.resource.is_null() + } + + pub fn unregister(&mut self) -> Result<(), cuda_result::DriverError> { + assert!( + !self.resource.is_null(), + "do not call if no resource is actually registered" + ); + + unsafe { + cuda_sys::lib() + .cuGraphicsUnregisterResource(self.resource) + .result()?; + } + + self.resource = std::ptr::null_mut(); + Ok(()) + } + + +} + +impl Drop for GraphicsResource { + fn drop(&mut self) { + if self.is_registered() { + let _ = self.unregister(); + } + } +} + +unsafe impl Send for GraphicsResource {} diff --git a/server/src/video/cuda_gl/sys.rs b/server/src/video/cuda_gl/sys.rs new file mode 100644 index 0000000..f0c3313 --- /dev/null +++ b/server/src/video/cuda_gl/sys.rs @@ -0,0 +1,73 @@ +use cudarc::driver::sys::*; /* Hack :3 */ +use gl::types::{GLenum, GLuint}; +/* automatically generated by rust-bindgen 0.69.4 */ + +pub struct Lib { + __library: ::libloading::Library, + pub cuGraphicsGLRegisterBuffer: Result< + unsafe extern "C" fn( + pCudaResource: *mut CUgraphicsResource, + buffer: GLuint, + Flags: ::core::ffi::c_uint, + ) -> CUresult, + ::libloading::Error, + >, + pub cuGraphicsGLRegisterImage: Result< + unsafe extern "C" fn( + pCudaResource: *mut CUgraphicsResource, + image: GLuint, + target: GLenum, + Flags: ::core::ffi::c_uint, + ) -> CUresult, + ::libloading::Error, + >, +} +impl Lib { + pub unsafe fn new

(path: P) -> Result + where + P: AsRef<::std::ffi::OsStr>, + { + let library = ::libloading::Library::new(path)?; + Self::from_library(library) + } + pub unsafe fn from_library(library: L) -> Result + where + L: Into<::libloading::Library>, + { + let __library = library.into(); + let cuGraphicsGLRegisterBuffer = __library + .get(b"cuGraphicsGLRegisterBuffer\0") + .map(|sym| *sym); + let cuGraphicsGLRegisterImage = __library + .get(b"cuGraphicsGLRegisterImage\0") + .map(|sym| *sym); + Ok(Lib { + __library, + cuGraphicsGLRegisterBuffer, + cuGraphicsGLRegisterImage, + }) + } + pub unsafe fn cuGraphicsGLRegisterBuffer( + &self, + pCudaResource: *mut CUgraphicsResource, + buffer: GLuint, + Flags: ::core::ffi::c_uint, + ) -> CUresult { + (self + .cuGraphicsGLRegisterBuffer + .as_ref() + .expect("Expected function, got error."))(pCudaResource, buffer, Flags) + } + pub unsafe fn cuGraphicsGLRegisterImage( + &self, + pCudaResource: *mut CUgraphicsResource, + image: GLuint, + target: GLenum, + Flags: ::core::ffi::c_uint, + ) -> CUresult { + (self + .cuGraphicsGLRegisterImage + .as_ref() + .expect("Expected function, got error."))(pCudaResource, image, target, Flags) + } +} diff --git a/server/src/video/encoder_thread.rs b/server/src/video/encoder_thread.rs index d2247b2..d75ec75 100644 --- a/server/src/video/encoder_thread.rs +++ b/server/src/video/encoder_thread.rs @@ -1,11 +1,16 @@ +use cudarc::driver::{ + sys::{CUdeviceptr, CUmemorytype}, + CudaDevice, LaunchAsync, +}; +use letsplay_gpu::egl_helpers::DeviceContext; use std::{ sync::{Arc, Mutex}, time::Duration, }; use tokio::sync::mpsc::{self, error::TryRecvError}; -use super::ffmpeg; use super::h264_encoder::H264Encoder; +use super::{cuda_gl::safe::GraphicsResource, ffmpeg}; pub enum EncodeThreadInput { Init { size: crate::types::Size }, @@ -37,7 +42,7 @@ impl EncoderState { self.encoder = Some(H264Encoder::new_nvenc_swframe( size.clone(), 60, - 3 * (1024 * 1024), + 2 * (1024 * 1024), )?); // replace packet @@ -85,7 +90,76 @@ impl EncoderState { } } -fn encoder_thread_main( +struct EncoderStateHW { + encoder: Option, + frame: ffmpeg::frame::Video, + packet: ffmpeg::Packet, +} + +impl EncoderStateHW { + fn new() -> Self { + Self { + encoder: None, + frame: ffmpeg::frame::Video::empty(), + packet: ffmpeg::Packet::empty(), + } + } + + fn init(&mut self, device: &Arc, size: crate::types::Size) -> anyhow::Result<()> { + self.encoder = Some(H264Encoder::new_nvenc_hwframe( + &device, + size.clone(), + 60, + 2 * (1024 * 1024), + )?); + + // replace packet + self.packet = ffmpeg::Packet::empty(); + self.frame = self.encoder.as_mut().unwrap().create_frame()?; + + Ok(()) + } + + #[inline] + fn frame(&mut self) -> &mut ffmpeg::frame::Video { + &mut self.frame + } + + fn send_frame(&mut self, pts: u64, force_keyframe: bool) -> Option { + let frame = &mut self.frame; + let encoder = self.encoder.as_mut().unwrap(); + + // set frame metadata + unsafe { + if force_keyframe { + (*frame.as_mut_ptr()).pict_type = ffmpeg::sys::AVPictureType::AV_PICTURE_TYPE_I; + (*frame.as_mut_ptr()).flags = ffmpeg::sys::AV_FRAME_FLAG_KEY; + (*frame.as_mut_ptr()).key_frame = 1; + } else { + (*frame.as_mut_ptr()).pict_type = ffmpeg::sys::AVPictureType::AV_PICTURE_TYPE_NONE; + (*frame.as_mut_ptr()).flags = 0i32; + (*frame.as_mut_ptr()).key_frame = 0; + } + + (*frame.as_mut_ptr()).pts = pts as i64; + } + + encoder.send_frame(&*frame); + encoder + .receive_packet(&mut self.packet) + .expect("Failed to recieve packet"); + + unsafe { + if !self.packet.is_empty() { + return Some(self.packet.clone()); + } + } + + return None; + } +} + +fn encoder_thread_swframe_main( mut rx: mpsc::Receiver, tx: mpsc::Sender, frame: &Arc>>, @@ -143,7 +217,7 @@ fn encoder_thread_main( Ok(()) } -pub fn encoder_thread_spawn( +pub fn encoder_thread_spawn_swframe( frame: &Arc>>, ) -> ( mpsc::Receiver, @@ -154,7 +228,192 @@ pub fn encoder_thread_spawn( let clone = Arc::clone(frame); - std::thread::spawn(move || encoder_thread_main(in_rx, out_tx, &clone)); + std::thread::spawn(move || encoder_thread_swframe_main(in_rx, out_tx, &clone)); + + (out_rx, in_tx) +} + +fn encoder_thread_hwframe_main( + mut rx: mpsc::Receiver, + tx: mpsc::Sender, + + cuda_device: &Arc, + cuda_resource: &Arc>, + gl_context: &Arc>, +) -> anyhow::Result<()> { + let mut frame_number = 0u64; + let mut force_keyframe = false; + + let mut encoder = EncoderStateHW::new(); + + // :) + cuda_device.bind_to_thread()?; + + /* + + let mut memcpy = cudarc::driver::sys::CUDA_MEMCPY2D_st::default(); + // setup basic src stuff + memcpy.srcXInBytes = 0; + memcpy.srcY = 0; + memcpy.srcMemoryType = CUmemorytype::CU_MEMORYTYPE_ARRAY; + + // dest + memcpy.dstXInBytes = 0; + memcpy.dstY = 0; + memcpy.dstMemoryType = CUmemorytype::CU_MEMORYTYPE_DEVICE; + */ + + loop { + match rx.try_recv() { + Ok(msg) => match msg { + EncodeThreadInput::Init { size } => { + frame_number = 0; + + if force_keyframe { + force_keyframe = false; + } + + encoder + .init(cuda_device, size) + .expect("encoder init failed"); + } + + EncodeThreadInput::ForceKeyframe => { + force_keyframe = true; + } + + EncodeThreadInput::SendFrame => { + // copy gl frame *ON THE GPU* + { + let mut gl_ctx = gl_context.lock().expect("you dumb fuck"); + let mut gl_resource = + cuda_resource.lock().expect("couldnt lock GL resource!"); + + gl_ctx.make_current(); + + let mut mapped = gl_resource + .map() + .expect("couldnt map graphics resource. Its joever"); + + let array = mapped + .get_mapped_array() + .expect("well its all over anyways"); + + { + let frame = encoder.frame(); + let width = frame.width(); + let height = frame.height(); + + let launch_config = cudarc::driver::LaunchConfig { + grid_dim: (width / 16 + 1, height / 2 + 1, 0), + block_dim: (16, 2, 0), + shared_mem_bytes: 0, + }; + + let flip_opengl = cuda_device + .get_func("module", "flip_opengl") + .expect("dumb fucker"); + + unsafe { + let frame_ptr = frame.as_mut_ptr(); + + let mut params = [ + array as *mut std::ffi::c_void, + (*frame_ptr).data[0] as CUdeviceptr as *mut std::ffi::c_void, + width as *const u32 as *mut std::ffi::c_void, + height as *const u32 as *mut std::ffi::c_void, + ]; + + flip_opengl + .launch(launch_config, &mut params[..]) + .expect("oh its done"); + + + cudarc::driver::sys::lib() + .cuStreamSynchronize(std::ptr::null_mut()) + .result() + .expect("you banned"); + } + } + + /* + // setup the cuMemcpy2D + { + let frame = encoder.frame(); + + memcpy.srcArray = array; + + unsafe { + let frame_ptr = frame.as_mut_ptr(); + memcpy.dstDevice = (*frame_ptr).data[0] as CUdeviceptr; + memcpy.dstPitch = (*frame_ptr).linesize[0] as usize; + memcpy.WidthInBytes = ((*frame_ptr).width * 4) as usize; + memcpy.Height = (*frame_ptr).height as usize; + } + } + + unsafe { + cudarc::driver::sys::lib() + .cuMemcpy2DAsync_v2(&memcpy, std::ptr::null_mut()) + .result() + .expect("cuMemcpy2D fail epic"); + + cudarc::driver::sys::lib() + .cuStreamSynchronize(std::ptr::null_mut()) + .result() + .expect("you banned"); + + } */ + + mapped.unmap().expect("fuck you asshole"); + gl_ctx.release(); + } + + if let Some(pkt) = encoder.send_frame(frame_number as u64, force_keyframe) { + // A bit less clear than ::empty(), but it's "Safe" + if let Some(_) = pkt.data() { + let _ = tx.blocking_send(EncodeThreadOutput::Frame { + packet: pkt.clone(), + }); + } + + frame_number += 1; + } + + if force_keyframe { + force_keyframe = false; + } + } + }, + + Err(TryRecvError::Disconnected) => break, + Err(TryRecvError::Empty) => { + std::thread::sleep(Duration::from_millis(1)); + } + } + } + + Ok(()) +} + +pub fn encoder_thread_spawn_hwframe( + cuda_device: &Arc, + cuda_resource: &Arc>, + gl_context: &Arc>, +) -> ( + mpsc::Receiver, + mpsc::Sender, +) { + let (in_tx, in_rx) = mpsc::channel(1); + let (out_tx, out_rx) = mpsc::channel(1); + + let dev_clone = Arc::clone(cuda_device); + let rsrc_clone = Arc::clone(cuda_resource); + let gl_clone = Arc::clone(gl_context); + + std::thread::spawn(move || { + encoder_thread_hwframe_main(in_rx, out_tx, &dev_clone, &rsrc_clone, &gl_clone) + }); (out_rx, in_tx) } diff --git a/server/src/video/h264_encoder.rs b/server/src/video/h264_encoder.rs index 6c0ebd2..313fbfb 100644 --- a/server/src/video/h264_encoder.rs +++ b/server/src/video/h264_encoder.rs @@ -135,8 +135,8 @@ impl H264Encoder { video_encoder_context.set_format(ffmpeg::format::Pixel::ZRGB32); - video_encoder_context.set_qmin(38); - video_encoder_context.set_qmax(32); + video_encoder_context.set_qmin(37); + video_encoder_context.set_qmax(33); // set h264_nvenc options let mut dict = ffmpeg::Dictionary::new(); @@ -148,7 +148,7 @@ impl H264Encoder { // TODO: dict.set("rc", "vbr"); - dict.set("qp", "45"); + dict.set("qp", "35"); dict.set("forced-idr", "1"); @@ -169,9 +169,6 @@ impl H264Encoder { max_framerate: u32, bitrate: usize, ) -> anyhow::Result { - /* - (See FIXMEs above) - let cuda_device_context = super::hwdevice::CudaDeviceContextBuilder::new()? .set_cuda_context((*cuda_device.cu_primary_ctx()) as *mut _) .build() @@ -180,14 +177,53 @@ impl H264Encoder { let mut hw_frame_context = super::hwframe::HwFrameContextBuilder::new(cuda_device_context)? .set_width(size.width) .set_height(size.height) - .set_sw_format(ffmpeg::format::Pixel::ZRGB32) + .set_sw_format(ffmpeg::format::Pixel::ZBGR32) .set_format(ffmpeg::format::Pixel::CUDA) .build() .with_context(|| "while trying to create CUDA frame context")?; - */ + let (mut encoder, mut video_encoder_context) = + create_context_and_set_common_parameters("h264_nvenc", &size, max_framerate, bitrate) + .with_context(|| "while trying to create encoder")?; - todo!("Implement me!"); + video_encoder_context.set_format(ffmpeg::format::Pixel::CUDA); + + video_encoder_context.set_qmin(37); + video_encoder_context.set_qmax(33); + + unsafe { + // FIXME: this currently breaks the avbufferref system a bit + (*video_encoder_context.as_mut_ptr()).hw_frames_ctx = hw_frame_context.as_raw_mut(); + (*video_encoder_context.as_mut_ptr()).hw_device_ctx = + hw_frame_context.as_device_context_mut(); + } + + // set h264_nvenc options + let mut dict = ffmpeg::Dictionary::new(); + + dict.set("tune", "ull"); + dict.set("preset", "p1"); + + dict.set("profile", "main"); + + // TODO: + dict.set("rc", "vbr"); + dict.set("qp", "35"); + + dict.set("forced-idr", "1"); + + // damn you + dict.set("delay", "0"); + dict.set("zerolatency", "1"); + + let encoder = video_encoder_context + .open_as_with(encoder, dict) + .with_context(|| "While opening h264_nvenc video codec")?; + + Ok(Self::NvencHWFrame { + encoder: encoder, + hw_context: hw_frame_context, + }) } // NOTE: It's a bit pointless to have this have a mut borrow, @@ -207,46 +243,44 @@ impl H264Encoder { // } //} - pub fn create_frame(&mut self) -> ffmpeg::frame::Video { - + pub fn create_frame(&mut self) -> anyhow::Result { match self { Self::Software { encoder } | Self::NvencSWFrame { encoder } => { - return ffmpeg::frame::Video::new(encoder.format(), encoder.width(), encoder.height()); + return Ok(ffmpeg::frame::Video::new( + encoder.format(), + encoder.width(), + encoder.height(), + )); } Self::NvencHWFrame { encoder, hw_context, } => { - todo!("Implement H264Encoder::create_frame() for NvencHWFrame!"); + let mut frame = ffmpeg::frame::Video::empty(); + + unsafe { + (*frame.as_mut_ptr()).format = ffmpeg::format::Pixel::CUDA as i32; + (*frame.as_mut_ptr()).width = encoder.width() as i32; + (*frame.as_mut_ptr()).height = encoder.height() as i32; + (*frame.as_mut_ptr()).hw_frames_ctx = hw_context.as_raw_mut(); + + //ffmpeg::sys::av_frame_get_buffer(frame.as_mut_ptr(), 32); + + hw_context.get_buffer(&mut frame)?; + hw_context.get_buffer(&mut frame)?; + + (*frame.as_mut_ptr()).linesize[0] = (*frame.as_ptr()).width * 4; + + return Ok(frame); + } } } /* - unsafe { - let mut frame = ffmpeg::Frame::empty(); - (*frame.as_mut_ptr()).format = pixel_format as i32; - (*frame.as_mut_ptr()).width = width as i32; - (*frame.as_mut_ptr()).height = height as i32; - (*frame.as_mut_ptr()).hw_frames_ctx = context.as_raw_mut(); - super::check_ret(ffmpeg::sys::av_hwframe_get_buffer( - context.as_raw_mut(), - frame.as_mut_ptr(), - 0, - ))?; - super::check_ret(ffmpeg::sys::av_hwframe_get_buffer( - context.as_raw_mut(), - frame.as_mut_ptr(), - 0, - ))?; - (*frame.as_mut_ptr()).linesize[0] = (*frame.as_ptr()).width * 4; - - Ok(frame) - } - - */ + */ todo!("FIXME"); } @@ -267,7 +301,8 @@ impl H264Encoder { encoder, hw_context, } => { - todo!("Implement send_frame() for NvencHWFrame"); + //todo!("Implement send_frame() for NvencHWFrame"); + encoder.send_frame(frame).unwrap(); } } } diff --git a/server/src/video/hwframe.rs b/server/src/video/hwframe.rs index e58f72a..721ca5c 100644 --- a/server/src/video/hwframe.rs +++ b/server/src/video/hwframe.rs @@ -7,82 +7,103 @@ use ffmpeg::format::Pixel; use super::{check_ret, hwdevice::CudaDeviceContext}; pub struct HwFrameContext { - _cuda_device_context: CudaDeviceContext, - buffer: *mut ffmpeg::sys::AVBufferRef, + _cuda_device_context: CudaDeviceContext, + buffer: *mut ffmpeg::sys::AVBufferRef, } impl HwFrameContext { - fn new(cuda_device_context: CudaDeviceContext, buffer: *mut ffmpeg::sys::AVBufferRef) -> Self { - Self { _cuda_device_context: cuda_device_context, buffer } - } + fn new(cuda_device_context: CudaDeviceContext, buffer: *mut ffmpeg::sys::AVBufferRef) -> Self { + Self { + _cuda_device_context: cuda_device_context, + buffer, + } + } - // pub fn as_context_mut(&mut self) -> &mut ffmpeg::sys::AVHWFramesContext { - // unsafe { &mut *((*self.buffer).data as *mut ffmpeg::sys::AVHWFramesContext) } - // } + // pub fn as_context_mut(&mut self) -> &mut ffmpeg::sys::AVHWFramesContext { + // unsafe { &mut *((*self.buffer).data as *mut ffmpeg::sys::AVHWFramesContext) } + // } - // pub fn as_context(&self) -> &ffmpeg::sys::AVHWFramesContext { - // unsafe { &*((*self.buffer).data as *const ffmpeg::sys::AVHWFramesContext) } - // } + // pub fn as_context(&self) -> &ffmpeg::sys::AVHWFramesContext { + // unsafe { &*((*self.buffer).data as *const ffmpeg::sys::AVHWFramesContext) } + // } - pub fn as_raw_mut(&mut self) -> &mut ffmpeg::sys::AVBufferRef { - unsafe { &mut *self.buffer } - } + pub fn as_raw_mut(&mut self) -> &mut ffmpeg::sys::AVBufferRef { + unsafe { &mut *self.buffer } + } - // pub fn as_raw(&self) -> &ffmpeg::sys::AVBufferRef { - // unsafe { &*self.buffer } - // } + pub fn as_device_context_mut(&mut self) -> &mut ffmpeg::sys::AVBufferRef { + unsafe { + self._cuda_device_context.as_raw_mut() + } + } + + /// call once to allocate frame + pub fn get_buffer(&mut self, frame: &mut ffmpeg::frame::Video) -> Result<(), ffmpeg::Error> { + unsafe { + super::check_ret(ffmpeg::sys::av_hwframe_get_buffer(self.buffer, frame.as_mut_ptr(), 0))?; + } + + Ok(()) + } + + // pub fn as_raw(&self) -> &ffmpeg::sys::AVBufferRef { + // unsafe { &*self.buffer } + // } } -unsafe impl Send for HwFrameContext { } +unsafe impl Send for HwFrameContext {} pub struct HwFrameContextBuilder { - cuda_device_context: CudaDeviceContext, - buffer: *mut ffmpeg::sys::AVBufferRef, + cuda_device_context: CudaDeviceContext, + buffer: *mut ffmpeg::sys::AVBufferRef, } impl HwFrameContextBuilder { - pub fn new(mut cuda_device_context: CudaDeviceContext) -> anyhow::Result { - let buffer = unsafe { ffmpeg::sys::av_hwframe_ctx_alloc(cuda_device_context.as_raw_mut()) }; - if buffer.is_null() { - return Err(anyhow::anyhow!("could not allocate a hwframe context")); - } + pub fn new(mut cuda_device_context: CudaDeviceContext) -> anyhow::Result { + let buffer = unsafe { ffmpeg::sys::av_hwframe_ctx_alloc(cuda_device_context.as_raw_mut()) }; + if buffer.is_null() { + return Err(anyhow::anyhow!("could not allocate a hwframe context")); + } - Ok(Self { cuda_device_context, buffer }) - } + Ok(Self { + cuda_device_context, + buffer, + }) + } - pub fn build(mut self) -> Result { - check_ret(unsafe { ffmpeg::sys::av_hwframe_ctx_init(self.buffer) })?; - let result = Ok(HwFrameContext::new(self.cuda_device_context, self.buffer)); - self.buffer = null_mut(); + pub fn build(mut self) -> Result { + check_ret(unsafe { ffmpeg::sys::av_hwframe_ctx_init(self.buffer) })?; + let result = Ok(HwFrameContext::new(self.cuda_device_context, self.buffer)); + self.buffer = null_mut(); - result - } + result + } - pub fn set_width(mut self, width: u32) -> Self { - self.as_frame_mut().width = width as i32; - self - } + pub fn set_width(mut self, width: u32) -> Self { + self.as_frame_mut().width = width as i32; + self + } - pub fn set_height(mut self, height: u32) -> Self { - self.as_frame_mut().height = height as i32; - self - } + pub fn set_height(mut self, height: u32) -> Self { + self.as_frame_mut().height = height as i32; + self + } - pub fn set_sw_format(mut self, sw_format: Pixel) -> Self { - self.as_frame_mut().sw_format = sw_format.into(); - self - } + pub fn set_sw_format(mut self, sw_format: Pixel) -> Self { + self.as_frame_mut().sw_format = sw_format.into(); + self + } - pub fn set_format(mut self, format: Pixel) -> Self { - self.as_frame_mut().format = format.into(); - self - } + pub fn set_format(mut self, format: Pixel) -> Self { + self.as_frame_mut().format = format.into(); + self + } - pub fn as_frame_mut(&mut self) -> &mut ffmpeg::sys::AVHWFramesContext { - unsafe { &mut *((*self.buffer).data as *mut ffmpeg::sys::AVHWFramesContext) } - } + pub fn as_frame_mut(&mut self) -> &mut ffmpeg::sys::AVHWFramesContext { + unsafe { &mut *((*self.buffer).data as *mut ffmpeg::sys::AVHWFramesContext) } + } - // pub fn as_frame(&self) -> &ffmpeg::sys::AVHWFramesContext { - // unsafe { &*((*self.buffer).data as *const ffmpeg::sys::AVHWFramesContext) } - // } -} \ No newline at end of file + // pub fn as_frame(&self) -> &ffmpeg::sys::AVHWFramesContext { + // unsafe { &*((*self.buffer).data as *const ffmpeg::sys::AVHWFramesContext) } + // } +} diff --git a/server/src/video/mod.rs b/server/src/video/mod.rs index 1af486d..59adcdf 100644 --- a/server/src/video/mod.rs +++ b/server/src/video/mod.rs @@ -9,6 +9,8 @@ pub mod hwframe; pub mod encoder_thread; +pub mod cuda_gl; + // from hgaiser/moonshine pub fn check_ret(error_code: i32) -> Result<(), ffmpeg::Error> { if error_code != 0 {