working gpu-only encode

This commit is contained in:
Lily Tsuru 2024-10-14 07:25:59 -04:00
parent 9248fe91a9
commit 09142f9668
14 changed files with 920 additions and 277 deletions

1
server/Cargo.lock generated
View file

@ -1414,6 +1414,7 @@ dependencies = [
"futures-util", "futures-util",
"gl", "gl",
"letsplay_gpu", "letsplay_gpu",
"libloading",
"rand", "rand",
"retro_frontend", "retro_frontend",
"serde", "serde",

View file

@ -32,6 +32,7 @@ tracing = "0.1.40"
tracing-subscriber = "0.3.18" tracing-subscriber = "0.3.18"
xkeysym = "0.2.1" xkeysym = "0.2.1"
async-trait = "0.1.83" async-trait = "0.1.83"
libloading = "0.8.5"
[patch.crates-io] [patch.crates-io]

View file

@ -5,10 +5,15 @@ mod video;
mod transport; mod transport;
use anyhow::Context;
use async_trait::async_trait; use async_trait::async_trait;
use cudarc::driver::CudaDevice;
use letsplay_gpu::egl_helpers::DeviceContext;
use retro_thread::{spawn_retro_thread, RetroEvent}; use retro_thread::{spawn_retro_thread, RetroEvent};
use transport::websocket::WebsocketTransport;
use transport::{Transport, TransportReciever}; use transport::{Transport, TransportReciever};
use video::cuda_gl::safe::GraphicsResource;
use video::encoder_thread::EncodeThreadInput; use video::encoder_thread::EncodeThreadInput;
use video::{encoder_thread, ffmpeg}; use video::{encoder_thread, ffmpeg};
@ -44,19 +49,19 @@ enum WsMessage {
Json(String), Json(String),
} }
struct AppState { struct AppState<T: Transport> {
encoder_tx: Arc<TokioMutex<mpsc::Sender<EncodeThreadInput>>>, encoder_tx: Arc<TokioMutex<mpsc::Sender<EncodeThreadInput>>>,
inputs: Arc<TokioMutex<Vec<u32>>>, inputs: Arc<TokioMutex<Vec<u32>>>,
transport: Arc<crate::transport::websocket::WebsocketTransport>, transport: Arc<T>,
connection_count: TokioMutex<usize>, connection_count: TokioMutex<usize>,
} }
impl AppState { impl<T> AppState<T>
fn new( where
encoder_tx: mpsc::Sender<EncodeThreadInput>, T: Transport + Send + Sync + 'static,
transport: Arc<crate::transport::websocket::WebsocketTransport>, {
) -> Self { fn new(encoder_tx: mpsc::Sender<EncodeThreadInput>, transport: Arc<T>) -> Self {
Self { Self {
encoder_tx: Arc::new(TokioMutex::new(encoder_tx)), encoder_tx: Arc::new(TokioMutex::new(encoder_tx)),
inputs: Arc::new(TokioMutex::new(Vec::new())), inputs: Arc::new(TokioMutex::new(Vec::new())),
@ -67,7 +72,10 @@ impl AppState {
} }
#[async_trait] #[async_trait]
impl TransportReciever for AppState { impl<T> TransportReciever for AppState<T>
where
T: Transport + Send + Sync + 'static,
{
async fn on_connect(&self, username: &String) -> anyhow::Result<()> { async fn on_connect(&self, username: &String) -> anyhow::Result<()> {
println!("{username} joined!"); println!("{username} joined!");
@ -105,7 +113,6 @@ impl TransportReciever for AppState {
"msg": json["msg"].as_str().unwrap() "msg": json["msg"].as_str().unwrap()
}); });
self.transport self.transport
.broadcast_message(transport::TransportMessage::Text( .broadcast_message(transport::TransportMessage::Text(
serde_json::to_string(&send).expect("oh well"), serde_json::to_string(&send).expect("oh well"),
@ -177,6 +184,23 @@ impl TransportReciever for AppState {
} }
} }
const FLIP_SRC: &str = "
extern \"C\" __global__ void flip_opengl(
const unsigned* pSrc,
unsigned* pDest,
unsigned width,
unsigned height
) {
const unsigned x = blockIdx.x * blockDim.x + threadIdx.x;
const unsigned y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < width && y < height) {
// let reversed_y = (size.height - 1) - y;
unsigned reversed_y = (height - 1) - y;
pDest[y * width + x] = pSrc[reversed_y * width + x];
}
}";
#[tokio::main(flavor = "multi_thread", worker_threads = 2)] #[tokio::main(flavor = "multi_thread", worker_threads = 2)]
async fn main() -> anyhow::Result<()> { async fn main() -> anyhow::Result<()> {
// Setup a tracing subscriber // Setup a tracing subscriber
@ -186,36 +210,56 @@ async fn main() -> anyhow::Result<()> {
tracing::subscriber::set_global_default(subscriber).unwrap(); tracing::subscriber::set_global_default(subscriber).unwrap();
let surface = Arc::new(Mutex::new(surface::Surface::new()));
// H.264 encoder related // H.264 encoder related
let frame: Arc<Mutex<Option<ffmpeg::frame::Video>>> = Arc::new(Mutex::new(None)); let device = CudaDevice::new(0)?;
let (mut encoder_rx, encoder_tx) = encoder_thread::encoder_thread_spawn(&frame);
let transport = Arc::new(crate::transport::websocket::WebsocketTransport::new()); let ptx = cudarc::nvrtc::compile_ptx(&FLIP_SRC).with_context(|| "compiling support kernel")?;
println!("compiled kernel");
// pop it in
device.load_ptx(ptx, "module", &["flip_opengl"])?;
let egl_ctx = Arc::new(Mutex::new(DeviceContext::new(0)));
let resource = Arc::new(Mutex::new(GraphicsResource::new(&device)));
let (mut encoder_rx, encoder_tx) = encoder_thread::encoder_thread_spawn_hwframe(
&device.clone(),
&resource.clone(),
&egl_ctx.clone(),
);
let transport = Arc::new(WebsocketTransport::new());
let state = Arc::new(AppState::new(encoder_tx, transport.clone())); let state = Arc::new(AppState::new(encoder_tx, transport.clone()));
let (mut event_rx, event_in_tx) = spawn_retro_thread(surface.clone()); let (mut retro_event_rx, retro_input_event_tx) =
spawn_retro_thread(egl_ctx.clone(), resource.clone());
let state_clone = state.clone(); let state_clone = state.clone();
let transport_clone = transport.clone();
// retro event handler. drives the encoder thread too // retro event handler. drives the encoder thread too
let _ = std::thread::Builder::new() let _ = std::thread::Builder::new()
.name("retro_event_rx".into()) .name("retro_event_rx".into())
.spawn(move || { .spawn(move || {
let surface_clone = surface.clone(); // load game
let frame_clone = frame.clone(); let _ = retro_input_event_tx.blocking_send(retro_thread::RetroInEvent::LoadCore(
"cores/swanstation_libretro.so".into(),
));
let _ = retro_input_event_tx.blocking_send(retro_thread::RetroInEvent::LoadGame(
"roms/nmv1_us.cue".into(),
));
// start the libretro thread looping now that we're alive // start the libretro thread looping now that we're alive
let _ = event_in_tx.blocking_send(retro_thread::RetroInEvent::Start); let _ = retro_input_event_tx.blocking_send(retro_thread::RetroInEvent::Start);
loop { loop {
match event_rx.try_recv() { match retro_event_rx.blocking_recv() {
Ok(msg) => match msg { Some(msg) => match msg {
RetroEvent::Frame => { RetroEvent::Frame => {
/*
let mut same = true;
{ {
let mut frame_locked = frame.lock().expect( let mut frame_locked = frame.lock().expect(
"Couldn't lock frame on our end. Did the encoder thread panic?", "Couldn't lock frame on our end. Did the encoder thread panic?",
@ -229,6 +273,8 @@ async fn main() -> anyhow::Result<()> {
let mut surf = surface_clone.lock().expect( let mut surf = surface_clone.lock().expect(
"locking the VNC surface to paint it to the ffmpeg frame failed", "locking the VNC surface to paint it to the ffmpeg frame failed",
); );
let surf_buf = surf.get_buffer(); let surf_buf = surf.get_buffer();
let buf_ptr = let buf_ptr =
@ -247,12 +293,27 @@ async fn main() -> anyhow::Result<()> {
) )
}; };
// If any line differs then the frame isn't the same and should be encoded.
if &surf_buf[line_stride..line_stride + width as usize]
!= dest_line_slice
{
same = false;
}
dest_line_slice.copy_from_slice( dest_line_slice.copy_from_slice(
&surf_buf[line_stride..line_stride + width as usize], &surf_buf[line_stride..line_stride + width as usize],
); );
} }
} }
if !same {
let _ = state_clone
.encoder_tx
.blocking_lock()
.blocking_send(encoder_thread::EncodeThreadInput::SendFrame);
}
*/
let _ = state_clone let _ = state_clone
.encoder_tx .encoder_tx
.blocking_lock() .blocking_lock()
@ -261,6 +322,12 @@ async fn main() -> anyhow::Result<()> {
RetroEvent::Resize { size } => { RetroEvent::Resize { size } => {
// make a new frame for the encoder // make a new frame for the encoder
let _ = state_clone.encoder_tx.blocking_lock().blocking_send(
encoder_thread::EncodeThreadInput::Init { size: size.clone() },
);
/*
{ {
let mut lk_frame = frame_clone.lock().expect("Couldn't lock frame"); let mut lk_frame = frame_clone.lock().expect("Couldn't lock frame");
@ -270,30 +337,21 @@ async fn main() -> anyhow::Result<()> {
size.clone().height, size.clone().height,
)); ));
} }
*/
let _ = state_clone.encoder_tx.blocking_lock().blocking_send(
encoder_thread::EncodeThreadInput::Init { size: size.clone() },
);
} }
RetroEvent::WantInputs { tx } => { RetroEvent::WantInputs { tx } => {
let inputs = state_clone.inputs.blocking_lock(); let inputs = state_clone.inputs.blocking_lock();
//tracing::info!("giving inputs {:?}", inputs);
tx.send(inputs.clone()).expect("FUCK"); tx.send(inputs.clone()).expect("FUCK");
} }
}, },
Err(TryRecvError::Disconnected) => break, None => break,
Err(TryRecvError::Empty) => {}
} }
match encoder_rx.try_recv() { match encoder_rx.try_recv() {
Ok(msg) => match msg { Ok(msg) => match msg {
encoder_thread::EncodeThreadOutput::Frame { packet } => { encoder_thread::EncodeThreadOutput::Frame { packet } => {
// let _ = state_clone
// .websocket_broadcast_tx
// .send(WsMessage::VideoPacket { packet });
// :( // :(
let packet_data = { let packet_data = {
let slice = packet.data().expect( let slice = packet.data().expect(
@ -301,7 +359,9 @@ async fn main() -> anyhow::Result<()> {
); );
slice.to_vec() slice.to_vec()
}; };
let _ = transport_clone.broadcast_message(transport::TransportMessage::Binary(packet_data)); let _ = state_clone.transport.broadcast_message(
transport::TransportMessage::Binary(packet_data),
);
} }
}, },
Err(TryRecvError::Empty) => {} Err(TryRecvError::Empty) => {}

View file

@ -17,7 +17,7 @@ use retro_frontend::{
use gpu::egl_helpers::DeviceContext; use gpu::egl_helpers::DeviceContext;
use letsplay_gpu as gpu; use letsplay_gpu as gpu;
use crate::{surface::Surface, types::Size}; use crate::{surface::Surface, types::Size, video::cuda_gl::safe::GraphicsResource};
/// Called by OpenGL. We use this to dump errors. /// Called by OpenGL. We use this to dump errors.
extern "system" fn opengl_message_callback( extern "system" fn opengl_message_callback(
@ -49,30 +49,38 @@ pub struct RetroState {
pad: RetroPad, pad: RetroPad,
// EGL state // EGL state
egl_context: Option<DeviceContext>, egl_context: Arc<Mutex<DeviceContext>>,
/// Locked framebuffer. /// Locked framebuffer.
framebuffer: Arc<Mutex<Surface>>, software_framebuffer: Surface,
/// OpenGL FBO /// OpenGL FBO
gl_framebuffer: gpu::GlFramebuffer, gl_framebuffer: gpu::GlFramebuffer,
/// Cached readback buffer. gl_rendering: bool,
readback_buffer: Surface,
cuda_resource: Arc<Mutex<GraphicsResource>>,
event_tx: mpsc::Sender<RetroEvent>, event_tx: mpsc::Sender<RetroEvent>,
} }
impl RetroState { impl RetroState {
pub fn new(framebuffer: Arc<Mutex<Surface>>, event_tx: mpsc::Sender<RetroEvent>) -> Box<Self> { pub fn new(
device_context: Arc<Mutex<DeviceContext>>,
resource: Arc<Mutex<GraphicsResource>>,
event_tx: mpsc::Sender<RetroEvent>,
) -> Box<Self> {
let mut boxed = Box::new(Self { let mut boxed = Box::new(Self {
frontend: None, frontend: None,
pad: RetroPad::new(), pad: RetroPad::new(),
egl_context: None, egl_context: device_context.clone(),
framebuffer, software_framebuffer: Surface::new(),
gl_framebuffer: gpu::GlFramebuffer::new(), gl_framebuffer: gpu::GlFramebuffer::new(),
readback_buffer: Surface::new(), gl_rendering: false,
cuda_resource: resource.clone(),
event_tx, event_tx,
}); });
@ -127,14 +135,29 @@ impl RetroState {
/// Initalizes the headless EGL context used for OpenGL rendering. /// Initalizes the headless EGL context used for OpenGL rendering.
fn hw_gl_egl_init(&mut self) { fn hw_gl_egl_init(&mut self) {
self.egl_context = Some(DeviceContext::new(0)); self.egl_context.lock().expect("piss").make_current();
unsafe {
// Load OpenGL functions using the EGL loader.
gl::load_with(|s| {
let str = std::ffi::CString::new(s).expect("gl::load_with fail");
std::mem::transmute(gpu::egl::GetProcAddress(str.as_ptr()))
});
// set OpenGL debug message callback
gl::Enable(gl::DEBUG_OUTPUT);
gl::DebugMessageCallback(Some(opengl_message_callback), std::ptr::null());
}
} }
/// Destroys OpenGL resources and the EGL context. /// Destroys OpenGL resources and the EGL context.
fn hw_gl_destroy(&mut self) { fn hw_gl_destroy(&mut self) {
if self.egl_context.is_some() { self.gl_framebuffer.destroy();
self.gl_framebuffer.destroy();
self.egl_context.take().unwrap().destroy() {
let mut locked = self.egl_context.lock().expect("piss");
locked.release();
locked.destroy();
} }
} }
@ -143,59 +166,39 @@ impl RetroState {
let step_ms = (1.0 / av_info.timing.fps) * 1000.; let step_ms = (1.0 / av_info.timing.fps) * 1000.;
let step_duration = Duration::from_millis(step_ms as u64); let step_duration = Duration::from_millis(step_ms as u64);
self.get_frontend().run_frame(); {
let egl = (&mut self.egl_context).clone();
let locked_egl = egl.lock().expect("fuck YOU");
locked_egl.make_current();
self.get_frontend().run_frame();
locked_egl.release();
}
std::thread::sleep(step_duration); std::thread::sleep(step_duration);
} }
/// Bleh, I don't like this is an associated fn, but whatever /// Bleh, I don't like this is an associated fn, but whatever
fn update_impl(framebuffer: Arc<Mutex<Surface>>, slice: &[u32], pitch: u32, from_opengl: bool) { fn update_software_framebuffer(framebuffer: &mut Surface, slice: &[u32], pitch: u32) {
let mut framebuffer_locked = framebuffer.lock().expect("could not lock framebuffer"); let size = framebuffer.size.clone();
let buffer = framebuffer.get_buffer();
let size = framebuffer_locked.size.clone();
let buffer = framebuffer_locked.get_buffer();
let has_disconnected_pitch = pitch != size.width as u32; let has_disconnected_pitch = pitch != size.width as u32;
// If this frame came from OpenGL we need to flip the image around for y in 0..size.height {
// so it is right side up (from our perspective). let src_line_off = (y as u32 * pitch) as usize;
// let mut dest_line_off = (y as u32 * size.width) as usize;
// We do this in a bit of a convoluted way (but it's also zero allocation!)
if from_opengl {
for y in (0..size.height).rev() {
let src_line_off = (y as u32 * pitch) as usize;
let src_slice = &slice[src_line_off..src_line_off + size.width as usize];
let reversed_y = (size.height - 1) - y; // copy only
if has_disconnected_pitch {
let src_line_off = (reversed_y as u32 * pitch) as usize; dest_line_off = (y * pitch) as usize;
let mut dest_line_off = src_line_off;
// copy only
if has_disconnected_pitch {
dest_line_off = (reversed_y * pitch.min(size.width)) as usize;
}
let dest_slice = &mut buffer[dest_line_off..dest_line_off + size.width as usize];
dest_slice.copy_from_slice(src_slice);
} }
} else {
for y in 0..size.height {
let src_line_off = (y as u32 * pitch) as usize;
let mut dest_line_off = src_line_off;
// copy only // Create slices repressenting each part
if has_disconnected_pitch { let src_slice = &slice[src_line_off..src_line_off + size.width as usize];
dest_line_off = (y * size.width) as usize; let dest_slice = &mut buffer[dest_line_off..dest_line_off + size.width as usize];
}
// Create slices repressenting each part dest_slice.copy_from_slice(src_slice);
let src_slice = &slice[src_line_off..src_line_off + size.width as usize];
let dest_slice = &mut framebuffer_locked.get_buffer()
[dest_line_off..dest_line_off + size.width as usize];
dest_slice.copy_from_slice(src_slice);
}
} }
} }
} }
@ -204,21 +207,32 @@ impl FrontendInterface for RetroState {
fn video_resize(&mut self, width: u32, height: u32) { fn video_resize(&mut self, width: u32, height: u32) {
tracing::info!("Resized to {width}x{height}"); tracing::info!("Resized to {width}x{height}");
if self.egl_context.is_some() { self.gl_framebuffer.resize(width, height);
self.gl_framebuffer.resize(width, height); let raw = self.gl_framebuffer.as_raw();
let raw = self.gl_framebuffer.as_raw();
// Notify the frontend layer about the new FBO ID // Notify the frontend layer about the new FBO ID
self.get_frontend().set_gl_fbo(raw); self.get_frontend().set_gl_fbo(raw);
// Resize the readback buffer if !self.gl_rendering {
self.readback_buffer.resize(Size { width, height }); self.software_framebuffer.resize(Size { width, height });
} }
self.framebuffer unsafe {
.lock() }
.expect("its over?")
.resize(Size { width, height }); // map to cuda
{
let mut locked = self
.cuda_resource
.lock()
.expect("YOU MOTHERFUCKER PISS GO COUNT YOUR DICK");
locked.device().bind_to_thread().expect("fuck");
locked
.register(self.gl_framebuffer.texture_id(), gl::TEXTURE_2D)
.expect("you fucking asswater");
}
let _ = self.event_tx.blocking_send(RetroEvent::Resize { let _ = self.event_tx.blocking_send(RetroEvent::Resize {
size: Size { width, height }, size: Size { width, height },
@ -226,39 +240,29 @@ impl FrontendInterface for RetroState {
} }
fn video_update(&mut self, slice: &[u32], pitch: u32) { fn video_update(&mut self, slice: &[u32], pitch: u32) {
Self::update_impl(self.framebuffer.clone(), slice, pitch, false); Self::update_software_framebuffer(&mut self.software_framebuffer, slice, pitch);
let size = self.software_framebuffer.size.clone();
// upload texture to GPU
unsafe {
gl::TexImage2D(
gl::TEXTURE_2D,
0,
gl::RGBA as i32,
size.width as i32,
size.height as i32,
0,
gl::RGBA_INTEGER,
gl::UNSIGNED_INT_8_8_8_8,
self.software_framebuffer.get_buffer().as_mut_ptr() as *const _,
);
}
let _ = self.event_tx.blocking_send(RetroEvent::Frame); let _ = self.event_tx.blocking_send(RetroEvent::Frame);
} }
fn video_update_gl(&mut self) { fn video_update_gl(&mut self) {
let dimensions = self.get_frontend().get_size();
// Read back the framebuffer
let slice = {
// lame, doesn't do bgra conversion for us
//self.gl_framebuffer.read_pixels(
// &mut self.readback_buffer.get_buffer()[..],
// dimensions.0,
// dimensions.1,
//);
unsafe {
let _bind = self.gl_framebuffer.bind();
gl::ReadPixels(
0,
0,
dimensions.0 as i32,
dimensions.1 as i32,
gl::BGRA,
gl::UNSIGNED_BYTE,
(&mut self.readback_buffer.get_buffer()).as_mut_ptr() as *mut std::ffi::c_void,
);
}
self.readback_buffer.get_buffer()
};
Self::update_impl(self.framebuffer.clone(), slice, dimensions.0, true);
let _ = self.event_tx.blocking_send(RetroEvent::Frame); let _ = self.event_tx.blocking_send(RetroEvent::Frame);
} }
@ -270,9 +274,13 @@ impl FrontendInterface for RetroState {
let (tx, rx) = oneshot::channel(); let (tx, rx) = oneshot::channel();
let _ = self.event_tx.blocking_send(RetroEvent::WantInputs { tx }); let _ = self.event_tx.blocking_send(RetroEvent::WantInputs { tx });
let inputs = rx.blocking_recv().expect("what the FUCK are you doing"); let inputs = rx.blocking_recv();
for key in &inputs { if inputs.is_err() {
return;
}
for key in &inputs.unwrap() {
use xkeysym::key as Key; use xkeysym::key as Key;
match *key { match *key {
@ -349,38 +357,23 @@ impl FrontendInterface for RetroState {
fn hw_gl_init(&mut self) -> Option<HwGlInitData> { fn hw_gl_init(&mut self) -> Option<HwGlInitData> {
// Only create a new EGL/OpenGL context if we have to. // Only create a new EGL/OpenGL context if we have to.
if self.egl_context.is_none() { let context = self.egl_context.lock().expect("fuck you!");
// Initalize EGL let extensions = gpu::egl_helpers::get_extensions(context.get_display());
self.hw_gl_egl_init();
let context = self.egl_context.as_ref().unwrap(); tracing::debug!("Supported EGL extensions: {:?}", extensions);
let extensions = gpu::egl_helpers::get_extensions(context.get_display());
tracing::debug!("Supported EGL extensions: {:?}", extensions); // Check for EGL_KHR_get_all_proc_addresses, so we can use eglGetProcAddress() to load OpenGL functions
if !extensions.contains(&"EGL_KHR_get_all_proc_addresses".into()) {
// Check for EGL_KHR_get_all_proc_addresses, so we can use eglGetProcAddress() to load OpenGL functions tracing::error!("Your graphics driver doesn't support the EGL_KHR_get_all_proc_addresses extension.");
if !extensions.contains(&"EGL_KHR_get_all_proc_addresses".into()) { tracing::error!("Retrodemo currently needs this to load OpenGL functions. HW rendering will be disabled.");
tracing::error!("Your graphics driver doesn't support the EGL_KHR_get_all_proc_addresses extension."); return None;
tracing::error!("Retrodemo currently needs this to load OpenGL functions. HW rendering will be disabled.");
return None;
}
unsafe {
// Load OpenGL functions using the EGL loader.
gl::load_with(|s| {
let str = std::ffi::CString::new(s).expect("gl::load_with fail");
std::mem::transmute(gpu::egl::GetProcAddress(str.as_ptr()))
});
// set OpenGL debug message callback
gl::Enable(gl::DEBUG_OUTPUT);
gl::DebugMessageCallback(Some(opengl_message_callback), std::ptr::null());
}
} }
// Create the initial FBO for the core to render to // If we get here, we can be certain that we're no longer
let dimensions = self.get_frontend().get_size(); // going to use software rendering. Therefore, we can
self.gl_framebuffer.resize(dimensions.0, dimensions.1); // clear (free) the software framebuffer, since it won't be of use to us anymore.
self.software_framebuffer.clear();
self.gl_rendering = true;
return Some(HwGlInitData { return Some(HwGlInitData {
get_proc_address: gpu::egl::GetProcAddress as *mut std::ffi::c_void, get_proc_address: gpu::egl::GetProcAddress as *mut std::ffi::c_void,
@ -403,25 +396,34 @@ pub enum RetroEvent {
pub enum RetroInEvent { pub enum RetroInEvent {
Start, Start,
LoadCore(std::path::PathBuf),
LoadGame(std::path::PathBuf),
} }
fn retro_thread_main( fn retro_thread_main(
surface: Arc<Mutex<Surface>>, context: &Arc<Mutex<DeviceContext>>,
resource: &Arc<Mutex<GraphicsResource>>,
event_tx: mpsc::Sender<RetroEvent>, event_tx: mpsc::Sender<RetroEvent>,
mut event_rx: mpsc::Receiver<RetroInEvent>, mut event_rx: mpsc::Receiver<RetroInEvent>,
) { ) {
let mut app = RetroState::new(surface, event_tx); let mut app = RetroState::new(context.clone(), resource.clone(), event_tx);
app.load_core("cores/swanstation_libretro.so") // do EGL init first
.expect("failed to load core"); app.hw_gl_egl_init();
app.load_game("roms/merged/nmv3/us/nmv3_us.cue") //merged/nmv1/us/nmv1_us.cue
.expect("failed to load game");
// sync // pre-setup
loop { loop {
match event_rx.blocking_recv() { match event_rx.blocking_recv() {
None => return (), None => return (),
Some(msg) => match msg { Some(msg) => match msg {
RetroInEvent::LoadCore(path) => {
app.load_core(path).expect("Failed to load core.");
}
RetroInEvent::LoadGame(path) => {
app.load_game(path).expect("Failed to load game!");
}
RetroInEvent::Start => break, RetroInEvent::Start => break,
}, },
} }
@ -435,18 +437,23 @@ fn retro_thread_main(
} }
pub fn spawn_retro_thread( pub fn spawn_retro_thread(
surface: Arc<Mutex<Surface>>, context: Arc<Mutex<DeviceContext>>,
resource: Arc<Mutex<GraphicsResource>>,
) -> (mpsc::Receiver<RetroEvent>, mpsc::Sender<RetroInEvent>) { ) -> (mpsc::Receiver<RetroEvent>, mpsc::Sender<RetroInEvent>) {
let (event_tx, event_rx) = mpsc::channel(8); // essentially semaphores
let (event_in_tx, event_in_rx) = mpsc::channel(8); let (event_tx, event_rx) = mpsc::channel(1);
let fb_clone = surface.clone(); let (event_in_tx, event_in_rx) = mpsc::channel(1);
let cloned = resource.clone();
let ctxcloned = context.clone();
// discard the join handle // discard the join handle
let _ = std::thread::Builder::new() let _ = std::thread::Builder::new()
.name("retro_game".into()) .name("retro_game".into())
.spawn(move || { .spawn(move || {
retro_thread_main(fb_clone, event_tx, event_in_rx); retro_thread_main(&ctxcloned, &cloned, event_tx, event_in_rx);
}).expect("failed to spawn the game thread"); })
.expect("failed to spawn the game thread");
(event_rx, event_in_tx) (event_rx, event_in_tx)
} }

View file

@ -21,7 +21,7 @@ pub fn alloc_boxed_slice<T: Sized>(len: usize) -> Box<[T]> {
unsafe { Box::from_raw(slice) } unsafe { Box::from_raw(slice) }
} }
/// A BGRA-format surface. /// A BGRA (or RGBA, I'm not your dad. There are funtions that assume the former though) format surface.
pub struct Surface { pub struct Surface {
buffer: Option<Box<[u32]>>, buffer: Option<Box<[u32]>>,
pub size: Size, pub size: Size,
@ -38,10 +38,18 @@ impl Surface {
} }
} }
pub fn clear(&mut self) {
self.buffer = None;
self.size = Size {
width: 0,
height: 0,
};
}
pub fn resize(&mut self, size: Size) { pub fn resize(&mut self, size: Size) {
self.size = size; self.size = size;
self.buffer = Some(alloc_boxed_slice(self.size.linear())); self.buffer = Some(alloc_boxed_slice(self.size.linear()));
} }
pub fn get_buffer(&mut self) -> &mut [u32] { pub fn get_buffer(&mut self) -> &mut [u32] {
@ -53,15 +61,14 @@ impl Surface {
pub fn blit_buffer(&mut self, src_at: Rect, data: &[u32]) { pub fn blit_buffer(&mut self, src_at: Rect, data: &[u32]) {
let mut off = 0; let mut off = 0;
let buf = self.buffer.as_mut().unwrap(); let buf = self.buffer.as_mut().unwrap();
let buf_slice = &mut *buf; let buf_slice = &mut *buf;
for y in src_at.y..src_at.y + src_at.height { for y in src_at.y..src_at.y + src_at.height {
let src = &data[off..off + src_at.width as usize]; let src = &data[off..off + src_at.width as usize];
let dest_start_offset = (y as usize * self.size.width as usize) + src_at.x as usize; let dest_start_offset = (y as usize * self.size.width as usize) + src_at.x as usize;
let dest = let dest = &mut buf_slice[dest_start_offset..dest_start_offset + src_at.width as usize];
&mut buf_slice[dest_start_offset..dest_start_offset + src_at.width as usize];
// This forces alpha to always be 0xff. I *could* probably do this in a clearer way though :( // This forces alpha to always be 0xff. I *could* probably do this in a clearer way though :(
for (dest, src_item) in dest.iter_mut().zip(src.iter()) { for (dest, src_item) in dest.iter_mut().zip(src.iter()) {

View file

@ -0,0 +1,27 @@
#!/bin/bash
# Does bindgen for CUDA cudaGL. (needs postprocessing)
set -exu
# --allowlist-type="^CU.*" \
# --allowlist-type="^cuuint(32|64)_t" \
# --allowlist-type="^cudaError_enum" \
# --allowlist-type="^cu.*Complex$" \
# --allowlist-type="^cuda.*" \
# --allowlist-type="^libraryPropertyType.*" \
# --allowlist-var="^CU.*" \
echo "use cudarc::sys::*; /* Hack :3 */" > ./sys.rs
bindgen \
--allowlist-type="" \
--allowlist-function="^cuGraphicsGL.*" \
--default-enum-style=rust \
--no-doc-comments \
--with-derive-default \
--with-derive-eq \
--with-derive-hash \
--with-derive-ord \
--use-core \
--dynamic-loading Lib \
gl.h -- -I/opt/cuda/include \
>> ./sys.rs

View file

@ -0,0 +1 @@
#include "cudaGL.h"

View file

@ -0,0 +1,14 @@
pub mod sys;
use sys::*;
pub mod safe;
pub unsafe fn lib() -> &'static Lib {
static LIB: std::sync::OnceLock<Lib> = std::sync::OnceLock::new();
LIB.get_or_init(|| {
if let Ok(lib) = Lib::new(libloading::library_filename("cuda")) {
return lib;
}
panic!("cuda library doesn't exist.");
})
}

View file

@ -0,0 +1,135 @@
use cudarc::driver::{result as cuda_result, safe as cuda_safe, sys as cuda_sys, CudaDevice};
use super::sys;
use std::sync::Arc;
pub struct MappedGraphicsResource {
resource: cuda_sys::CUgraphicsResource,
}
impl MappedGraphicsResource {
fn new(resource: cuda_sys::CUgraphicsResource) -> Self {
Self { resource }
}
pub fn map(&mut self) -> Result<(), cuda_result::DriverError> {
unsafe {
cuda_sys::lib()
.cuGraphicsMapResources(1, &mut self.resource, std::ptr::null_mut())
.result()?;
}
Ok(())
}
pub fn unmap(&mut self) -> Result<(), cuda_result::DriverError> {
unsafe {
cuda_sys::lib()
.cuGraphicsUnmapResources(1, &mut self.resource, std::ptr::null_mut())
.result()?;
}
Ok(())
}
pub fn get_mapped_array(&mut self) -> Result<cuda_sys::CUarray, cuda_result::DriverError> {
assert!(
!self.resource.is_null(),
"do not call GraphicsResource::get_mapped_array if no resource is actually registered"
);
let mut array: cuda_sys::CUarray = std::ptr::null_mut();
unsafe {
cuda_sys::lib()
.cuGraphicsSubResourceGetMappedArray(&mut array, self.resource, 0, 0)
.result()?;
}
Ok(array)
}
}
impl Drop for MappedGraphicsResource {
fn drop(&mut self) {
let _ = self.unmap();
}
}
/// Wrapper over cuGraphicsGL* apis
pub struct GraphicsResource {
context: Arc<CudaDevice>,
resource: cuda_sys::CUgraphicsResource,
}
impl GraphicsResource {
pub fn new(device: &Arc<CudaDevice>) -> Self {
Self {
context: device.clone(),
resource: std::ptr::null_mut(),
}
}
pub fn device(&self) -> Arc<CudaDevice> {
self.context.clone()
}
/// Maps this resource.
pub fn map(&mut self) -> Result<MappedGraphicsResource, cuda_result::DriverError> {
let mut res = MappedGraphicsResource::new(self.resource);
res.map()?;
Ok(res)
}
pub fn register(
&mut self,
texture_id: gl::types::GLuint,
texture_kind: gl::types::GLuint,
) -> Result<(), cuda_result::DriverError> {
// better to be safe than leak memory? idk.
if !self.resource.is_null() {
self.unregister()?;
}
unsafe {
super::lib()
.cuGraphicsGLRegisterImage(&mut self.resource, texture_id, texture_kind, 1)
.result()?;
}
Ok(())
}
pub fn is_registered(&self) -> bool {
!self.resource.is_null()
}
pub fn unregister(&mut self) -> Result<(), cuda_result::DriverError> {
assert!(
!self.resource.is_null(),
"do not call if no resource is actually registered"
);
unsafe {
cuda_sys::lib()
.cuGraphicsUnregisterResource(self.resource)
.result()?;
}
self.resource = std::ptr::null_mut();
Ok(())
}
}
impl Drop for GraphicsResource {
fn drop(&mut self) {
if self.is_registered() {
let _ = self.unregister();
}
}
}
unsafe impl Send for GraphicsResource {}

View file

@ -0,0 +1,73 @@
use cudarc::driver::sys::*; /* Hack :3 */
use gl::types::{GLenum, GLuint};
/* automatically generated by rust-bindgen 0.69.4 */
pub struct Lib {
__library: ::libloading::Library,
pub cuGraphicsGLRegisterBuffer: Result<
unsafe extern "C" fn(
pCudaResource: *mut CUgraphicsResource,
buffer: GLuint,
Flags: ::core::ffi::c_uint,
) -> CUresult,
::libloading::Error,
>,
pub cuGraphicsGLRegisterImage: Result<
unsafe extern "C" fn(
pCudaResource: *mut CUgraphicsResource,
image: GLuint,
target: GLenum,
Flags: ::core::ffi::c_uint,
) -> CUresult,
::libloading::Error,
>,
}
impl Lib {
pub unsafe fn new<P>(path: P) -> Result<Self, ::libloading::Error>
where
P: AsRef<::std::ffi::OsStr>,
{
let library = ::libloading::Library::new(path)?;
Self::from_library(library)
}
pub unsafe fn from_library<L>(library: L) -> Result<Self, ::libloading::Error>
where
L: Into<::libloading::Library>,
{
let __library = library.into();
let cuGraphicsGLRegisterBuffer = __library
.get(b"cuGraphicsGLRegisterBuffer\0")
.map(|sym| *sym);
let cuGraphicsGLRegisterImage = __library
.get(b"cuGraphicsGLRegisterImage\0")
.map(|sym| *sym);
Ok(Lib {
__library,
cuGraphicsGLRegisterBuffer,
cuGraphicsGLRegisterImage,
})
}
pub unsafe fn cuGraphicsGLRegisterBuffer(
&self,
pCudaResource: *mut CUgraphicsResource,
buffer: GLuint,
Flags: ::core::ffi::c_uint,
) -> CUresult {
(self
.cuGraphicsGLRegisterBuffer
.as_ref()
.expect("Expected function, got error."))(pCudaResource, buffer, Flags)
}
pub unsafe fn cuGraphicsGLRegisterImage(
&self,
pCudaResource: *mut CUgraphicsResource,
image: GLuint,
target: GLenum,
Flags: ::core::ffi::c_uint,
) -> CUresult {
(self
.cuGraphicsGLRegisterImage
.as_ref()
.expect("Expected function, got error."))(pCudaResource, image, target, Flags)
}
}

View file

@ -1,11 +1,16 @@
use cudarc::driver::{
sys::{CUdeviceptr, CUmemorytype},
CudaDevice, LaunchAsync,
};
use letsplay_gpu::egl_helpers::DeviceContext;
use std::{ use std::{
sync::{Arc, Mutex}, sync::{Arc, Mutex},
time::Duration, time::Duration,
}; };
use tokio::sync::mpsc::{self, error::TryRecvError}; use tokio::sync::mpsc::{self, error::TryRecvError};
use super::ffmpeg;
use super::h264_encoder::H264Encoder; use super::h264_encoder::H264Encoder;
use super::{cuda_gl::safe::GraphicsResource, ffmpeg};
pub enum EncodeThreadInput { pub enum EncodeThreadInput {
Init { size: crate::types::Size }, Init { size: crate::types::Size },
@ -37,7 +42,7 @@ impl EncoderState {
self.encoder = Some(H264Encoder::new_nvenc_swframe( self.encoder = Some(H264Encoder::new_nvenc_swframe(
size.clone(), size.clone(),
60, 60,
3 * (1024 * 1024), 2 * (1024 * 1024),
)?); )?);
// replace packet // replace packet
@ -85,7 +90,76 @@ impl EncoderState {
} }
} }
fn encoder_thread_main( struct EncoderStateHW {
encoder: Option<H264Encoder>,
frame: ffmpeg::frame::Video,
packet: ffmpeg::Packet,
}
impl EncoderStateHW {
fn new() -> Self {
Self {
encoder: None,
frame: ffmpeg::frame::Video::empty(),
packet: ffmpeg::Packet::empty(),
}
}
fn init(&mut self, device: &Arc<CudaDevice>, size: crate::types::Size) -> anyhow::Result<()> {
self.encoder = Some(H264Encoder::new_nvenc_hwframe(
&device,
size.clone(),
60,
2 * (1024 * 1024),
)?);
// replace packet
self.packet = ffmpeg::Packet::empty();
self.frame = self.encoder.as_mut().unwrap().create_frame()?;
Ok(())
}
#[inline]
fn frame(&mut self) -> &mut ffmpeg::frame::Video {
&mut self.frame
}
fn send_frame(&mut self, pts: u64, force_keyframe: bool) -> Option<ffmpeg::Packet> {
let frame = &mut self.frame;
let encoder = self.encoder.as_mut().unwrap();
// set frame metadata
unsafe {
if force_keyframe {
(*frame.as_mut_ptr()).pict_type = ffmpeg::sys::AVPictureType::AV_PICTURE_TYPE_I;
(*frame.as_mut_ptr()).flags = ffmpeg::sys::AV_FRAME_FLAG_KEY;
(*frame.as_mut_ptr()).key_frame = 1;
} else {
(*frame.as_mut_ptr()).pict_type = ffmpeg::sys::AVPictureType::AV_PICTURE_TYPE_NONE;
(*frame.as_mut_ptr()).flags = 0i32;
(*frame.as_mut_ptr()).key_frame = 0;
}
(*frame.as_mut_ptr()).pts = pts as i64;
}
encoder.send_frame(&*frame);
encoder
.receive_packet(&mut self.packet)
.expect("Failed to recieve packet");
unsafe {
if !self.packet.is_empty() {
return Some(self.packet.clone());
}
}
return None;
}
}
fn encoder_thread_swframe_main(
mut rx: mpsc::Receiver<EncodeThreadInput>, mut rx: mpsc::Receiver<EncodeThreadInput>,
tx: mpsc::Sender<EncodeThreadOutput>, tx: mpsc::Sender<EncodeThreadOutput>,
frame: &Arc<Mutex<Option<ffmpeg::frame::Video>>>, frame: &Arc<Mutex<Option<ffmpeg::frame::Video>>>,
@ -143,7 +217,7 @@ fn encoder_thread_main(
Ok(()) Ok(())
} }
pub fn encoder_thread_spawn( pub fn encoder_thread_spawn_swframe(
frame: &Arc<Mutex<Option<ffmpeg::frame::Video>>>, frame: &Arc<Mutex<Option<ffmpeg::frame::Video>>>,
) -> ( ) -> (
mpsc::Receiver<EncodeThreadOutput>, mpsc::Receiver<EncodeThreadOutput>,
@ -154,7 +228,192 @@ pub fn encoder_thread_spawn(
let clone = Arc::clone(frame); let clone = Arc::clone(frame);
std::thread::spawn(move || encoder_thread_main(in_rx, out_tx, &clone)); std::thread::spawn(move || encoder_thread_swframe_main(in_rx, out_tx, &clone));
(out_rx, in_tx)
}
fn encoder_thread_hwframe_main(
mut rx: mpsc::Receiver<EncodeThreadInput>,
tx: mpsc::Sender<EncodeThreadOutput>,
cuda_device: &Arc<CudaDevice>,
cuda_resource: &Arc<Mutex<GraphicsResource>>,
gl_context: &Arc<Mutex<DeviceContext>>,
) -> anyhow::Result<()> {
let mut frame_number = 0u64;
let mut force_keyframe = false;
let mut encoder = EncoderStateHW::new();
// :)
cuda_device.bind_to_thread()?;
/*
let mut memcpy = cudarc::driver::sys::CUDA_MEMCPY2D_st::default();
// setup basic src stuff
memcpy.srcXInBytes = 0;
memcpy.srcY = 0;
memcpy.srcMemoryType = CUmemorytype::CU_MEMORYTYPE_ARRAY;
// dest
memcpy.dstXInBytes = 0;
memcpy.dstY = 0;
memcpy.dstMemoryType = CUmemorytype::CU_MEMORYTYPE_DEVICE;
*/
loop {
match rx.try_recv() {
Ok(msg) => match msg {
EncodeThreadInput::Init { size } => {
frame_number = 0;
if force_keyframe {
force_keyframe = false;
}
encoder
.init(cuda_device, size)
.expect("encoder init failed");
}
EncodeThreadInput::ForceKeyframe => {
force_keyframe = true;
}
EncodeThreadInput::SendFrame => {
// copy gl frame *ON THE GPU*
{
let mut gl_ctx = gl_context.lock().expect("you dumb fuck");
let mut gl_resource =
cuda_resource.lock().expect("couldnt lock GL resource!");
gl_ctx.make_current();
let mut mapped = gl_resource
.map()
.expect("couldnt map graphics resource. Its joever");
let array = mapped
.get_mapped_array()
.expect("well its all over anyways");
{
let frame = encoder.frame();
let width = frame.width();
let height = frame.height();
let launch_config = cudarc::driver::LaunchConfig {
grid_dim: (width / 16 + 1, height / 2 + 1, 0),
block_dim: (16, 2, 0),
shared_mem_bytes: 0,
};
let flip_opengl = cuda_device
.get_func("module", "flip_opengl")
.expect("dumb fucker");
unsafe {
let frame_ptr = frame.as_mut_ptr();
let mut params = [
array as *mut std::ffi::c_void,
(*frame_ptr).data[0] as CUdeviceptr as *mut std::ffi::c_void,
width as *const u32 as *mut std::ffi::c_void,
height as *const u32 as *mut std::ffi::c_void,
];
flip_opengl
.launch(launch_config, &mut params[..])
.expect("oh its done");
cudarc::driver::sys::lib()
.cuStreamSynchronize(std::ptr::null_mut())
.result()
.expect("you banned");
}
}
/*
// setup the cuMemcpy2D
{
let frame = encoder.frame();
memcpy.srcArray = array;
unsafe {
let frame_ptr = frame.as_mut_ptr();
memcpy.dstDevice = (*frame_ptr).data[0] as CUdeviceptr;
memcpy.dstPitch = (*frame_ptr).linesize[0] as usize;
memcpy.WidthInBytes = ((*frame_ptr).width * 4) as usize;
memcpy.Height = (*frame_ptr).height as usize;
}
}
unsafe {
cudarc::driver::sys::lib()
.cuMemcpy2DAsync_v2(&memcpy, std::ptr::null_mut())
.result()
.expect("cuMemcpy2D fail epic");
cudarc::driver::sys::lib()
.cuStreamSynchronize(std::ptr::null_mut())
.result()
.expect("you banned");
} */
mapped.unmap().expect("fuck you asshole");
gl_ctx.release();
}
if let Some(pkt) = encoder.send_frame(frame_number as u64, force_keyframe) {
// A bit less clear than ::empty(), but it's "Safe"
if let Some(_) = pkt.data() {
let _ = tx.blocking_send(EncodeThreadOutput::Frame {
packet: pkt.clone(),
});
}
frame_number += 1;
}
if force_keyframe {
force_keyframe = false;
}
}
},
Err(TryRecvError::Disconnected) => break,
Err(TryRecvError::Empty) => {
std::thread::sleep(Duration::from_millis(1));
}
}
}
Ok(())
}
pub fn encoder_thread_spawn_hwframe(
cuda_device: &Arc<CudaDevice>,
cuda_resource: &Arc<Mutex<GraphicsResource>>,
gl_context: &Arc<Mutex<DeviceContext>>,
) -> (
mpsc::Receiver<EncodeThreadOutput>,
mpsc::Sender<EncodeThreadInput>,
) {
let (in_tx, in_rx) = mpsc::channel(1);
let (out_tx, out_rx) = mpsc::channel(1);
let dev_clone = Arc::clone(cuda_device);
let rsrc_clone = Arc::clone(cuda_resource);
let gl_clone = Arc::clone(gl_context);
std::thread::spawn(move || {
encoder_thread_hwframe_main(in_rx, out_tx, &dev_clone, &rsrc_clone, &gl_clone)
});
(out_rx, in_tx) (out_rx, in_tx)
} }

View file

@ -135,8 +135,8 @@ impl H264Encoder {
video_encoder_context.set_format(ffmpeg::format::Pixel::ZRGB32); video_encoder_context.set_format(ffmpeg::format::Pixel::ZRGB32);
video_encoder_context.set_qmin(38); video_encoder_context.set_qmin(37);
video_encoder_context.set_qmax(32); video_encoder_context.set_qmax(33);
// set h264_nvenc options // set h264_nvenc options
let mut dict = ffmpeg::Dictionary::new(); let mut dict = ffmpeg::Dictionary::new();
@ -148,7 +148,7 @@ impl H264Encoder {
// TODO: // TODO:
dict.set("rc", "vbr"); dict.set("rc", "vbr");
dict.set("qp", "45"); dict.set("qp", "35");
dict.set("forced-idr", "1"); dict.set("forced-idr", "1");
@ -169,9 +169,6 @@ impl H264Encoder {
max_framerate: u32, max_framerate: u32,
bitrate: usize, bitrate: usize,
) -> anyhow::Result<Self> { ) -> anyhow::Result<Self> {
/*
(See FIXMEs above)
let cuda_device_context = super::hwdevice::CudaDeviceContextBuilder::new()? let cuda_device_context = super::hwdevice::CudaDeviceContextBuilder::new()?
.set_cuda_context((*cuda_device.cu_primary_ctx()) as *mut _) .set_cuda_context((*cuda_device.cu_primary_ctx()) as *mut _)
.build() .build()
@ -180,14 +177,53 @@ impl H264Encoder {
let mut hw_frame_context = super::hwframe::HwFrameContextBuilder::new(cuda_device_context)? let mut hw_frame_context = super::hwframe::HwFrameContextBuilder::new(cuda_device_context)?
.set_width(size.width) .set_width(size.width)
.set_height(size.height) .set_height(size.height)
.set_sw_format(ffmpeg::format::Pixel::ZRGB32) .set_sw_format(ffmpeg::format::Pixel::ZBGR32)
.set_format(ffmpeg::format::Pixel::CUDA) .set_format(ffmpeg::format::Pixel::CUDA)
.build() .build()
.with_context(|| "while trying to create CUDA frame context")?; .with_context(|| "while trying to create CUDA frame context")?;
*/ let (mut encoder, mut video_encoder_context) =
create_context_and_set_common_parameters("h264_nvenc", &size, max_framerate, bitrate)
.with_context(|| "while trying to create encoder")?;
todo!("Implement me!"); video_encoder_context.set_format(ffmpeg::format::Pixel::CUDA);
video_encoder_context.set_qmin(37);
video_encoder_context.set_qmax(33);
unsafe {
// FIXME: this currently breaks the avbufferref system a bit
(*video_encoder_context.as_mut_ptr()).hw_frames_ctx = hw_frame_context.as_raw_mut();
(*video_encoder_context.as_mut_ptr()).hw_device_ctx =
hw_frame_context.as_device_context_mut();
}
// set h264_nvenc options
let mut dict = ffmpeg::Dictionary::new();
dict.set("tune", "ull");
dict.set("preset", "p1");
dict.set("profile", "main");
// TODO:
dict.set("rc", "vbr");
dict.set("qp", "35");
dict.set("forced-idr", "1");
// damn you
dict.set("delay", "0");
dict.set("zerolatency", "1");
let encoder = video_encoder_context
.open_as_with(encoder, dict)
.with_context(|| "While opening h264_nvenc video codec")?;
Ok(Self::NvencHWFrame {
encoder: encoder,
hw_context: hw_frame_context,
})
} }
// NOTE: It's a bit pointless to have this have a mut borrow, // NOTE: It's a bit pointless to have this have a mut borrow,
@ -207,46 +243,44 @@ impl H264Encoder {
// } // }
//} //}
pub fn create_frame(&mut self) -> ffmpeg::frame::Video { pub fn create_frame(&mut self) -> anyhow::Result<ffmpeg::frame::Video> {
match self { match self {
Self::Software { encoder } | Self::NvencSWFrame { encoder } => { Self::Software { encoder } | Self::NvencSWFrame { encoder } => {
return ffmpeg::frame::Video::new(encoder.format(), encoder.width(), encoder.height()); return Ok(ffmpeg::frame::Video::new(
encoder.format(),
encoder.width(),
encoder.height(),
));
} }
Self::NvencHWFrame { Self::NvencHWFrame {
encoder, encoder,
hw_context, hw_context,
} => { } => {
todo!("Implement H264Encoder::create_frame() for NvencHWFrame!"); let mut frame = ffmpeg::frame::Video::empty();
unsafe {
(*frame.as_mut_ptr()).format = ffmpeg::format::Pixel::CUDA as i32;
(*frame.as_mut_ptr()).width = encoder.width() as i32;
(*frame.as_mut_ptr()).height = encoder.height() as i32;
(*frame.as_mut_ptr()).hw_frames_ctx = hw_context.as_raw_mut();
//ffmpeg::sys::av_frame_get_buffer(frame.as_mut_ptr(), 32);
hw_context.get_buffer(&mut frame)?;
hw_context.get_buffer(&mut frame)?;
(*frame.as_mut_ptr()).linesize[0] = (*frame.as_ptr()).width * 4;
return Ok(frame);
}
} }
} }
/* /*
unsafe {
let mut frame = ffmpeg::Frame::empty();
(*frame.as_mut_ptr()).format = pixel_format as i32;
(*frame.as_mut_ptr()).width = width as i32;
(*frame.as_mut_ptr()).height = height as i32;
(*frame.as_mut_ptr()).hw_frames_ctx = context.as_raw_mut();
super::check_ret(ffmpeg::sys::av_hwframe_get_buffer(
context.as_raw_mut(),
frame.as_mut_ptr(),
0,
))?;
super::check_ret(ffmpeg::sys::av_hwframe_get_buffer(
context.as_raw_mut(),
frame.as_mut_ptr(),
0,
))?;
(*frame.as_mut_ptr()).linesize[0] = (*frame.as_ptr()).width * 4; */
Ok(frame)
}
*/
todo!("FIXME"); todo!("FIXME");
} }
@ -267,7 +301,8 @@ impl H264Encoder {
encoder, encoder,
hw_context, hw_context,
} => { } => {
todo!("Implement send_frame() for NvencHWFrame"); //todo!("Implement send_frame() for NvencHWFrame");
encoder.send_frame(frame).unwrap();
} }
} }
} }

View file

@ -7,82 +7,103 @@ use ffmpeg::format::Pixel;
use super::{check_ret, hwdevice::CudaDeviceContext}; use super::{check_ret, hwdevice::CudaDeviceContext};
pub struct HwFrameContext { pub struct HwFrameContext {
_cuda_device_context: CudaDeviceContext, _cuda_device_context: CudaDeviceContext,
buffer: *mut ffmpeg::sys::AVBufferRef, buffer: *mut ffmpeg::sys::AVBufferRef,
} }
impl HwFrameContext { impl HwFrameContext {
fn new(cuda_device_context: CudaDeviceContext, buffer: *mut ffmpeg::sys::AVBufferRef) -> Self { fn new(cuda_device_context: CudaDeviceContext, buffer: *mut ffmpeg::sys::AVBufferRef) -> Self {
Self { _cuda_device_context: cuda_device_context, buffer } Self {
} _cuda_device_context: cuda_device_context,
buffer,
}
}
// pub fn as_context_mut(&mut self) -> &mut ffmpeg::sys::AVHWFramesContext { // pub fn as_context_mut(&mut self) -> &mut ffmpeg::sys::AVHWFramesContext {
// unsafe { &mut *((*self.buffer).data as *mut ffmpeg::sys::AVHWFramesContext) } // unsafe { &mut *((*self.buffer).data as *mut ffmpeg::sys::AVHWFramesContext) }
// } // }
// pub fn as_context(&self) -> &ffmpeg::sys::AVHWFramesContext { // pub fn as_context(&self) -> &ffmpeg::sys::AVHWFramesContext {
// unsafe { &*((*self.buffer).data as *const ffmpeg::sys::AVHWFramesContext) } // unsafe { &*((*self.buffer).data as *const ffmpeg::sys::AVHWFramesContext) }
// } // }
pub fn as_raw_mut(&mut self) -> &mut ffmpeg::sys::AVBufferRef { pub fn as_raw_mut(&mut self) -> &mut ffmpeg::sys::AVBufferRef {
unsafe { &mut *self.buffer } unsafe { &mut *self.buffer }
} }
// pub fn as_raw(&self) -> &ffmpeg::sys::AVBufferRef { pub fn as_device_context_mut(&mut self) -> &mut ffmpeg::sys::AVBufferRef {
// unsafe { &*self.buffer } unsafe {
// } self._cuda_device_context.as_raw_mut()
}
}
/// call once to allocate frame
pub fn get_buffer(&mut self, frame: &mut ffmpeg::frame::Video) -> Result<(), ffmpeg::Error> {
unsafe {
super::check_ret(ffmpeg::sys::av_hwframe_get_buffer(self.buffer, frame.as_mut_ptr(), 0))?;
}
Ok(())
}
// pub fn as_raw(&self) -> &ffmpeg::sys::AVBufferRef {
// unsafe { &*self.buffer }
// }
} }
unsafe impl Send for HwFrameContext { } unsafe impl Send for HwFrameContext {}
pub struct HwFrameContextBuilder { pub struct HwFrameContextBuilder {
cuda_device_context: CudaDeviceContext, cuda_device_context: CudaDeviceContext,
buffer: *mut ffmpeg::sys::AVBufferRef, buffer: *mut ffmpeg::sys::AVBufferRef,
} }
impl HwFrameContextBuilder { impl HwFrameContextBuilder {
pub fn new(mut cuda_device_context: CudaDeviceContext) -> anyhow::Result<Self> { pub fn new(mut cuda_device_context: CudaDeviceContext) -> anyhow::Result<Self> {
let buffer = unsafe { ffmpeg::sys::av_hwframe_ctx_alloc(cuda_device_context.as_raw_mut()) }; let buffer = unsafe { ffmpeg::sys::av_hwframe_ctx_alloc(cuda_device_context.as_raw_mut()) };
if buffer.is_null() { if buffer.is_null() {
return Err(anyhow::anyhow!("could not allocate a hwframe context")); return Err(anyhow::anyhow!("could not allocate a hwframe context"));
} }
Ok(Self { cuda_device_context, buffer }) Ok(Self {
} cuda_device_context,
buffer,
})
}
pub fn build(mut self) -> Result<HwFrameContext, ffmpeg::Error> { pub fn build(mut self) -> Result<HwFrameContext, ffmpeg::Error> {
check_ret(unsafe { ffmpeg::sys::av_hwframe_ctx_init(self.buffer) })?; check_ret(unsafe { ffmpeg::sys::av_hwframe_ctx_init(self.buffer) })?;
let result = Ok(HwFrameContext::new(self.cuda_device_context, self.buffer)); let result = Ok(HwFrameContext::new(self.cuda_device_context, self.buffer));
self.buffer = null_mut(); self.buffer = null_mut();
result result
} }
pub fn set_width(mut self, width: u32) -> Self { pub fn set_width(mut self, width: u32) -> Self {
self.as_frame_mut().width = width as i32; self.as_frame_mut().width = width as i32;
self self
} }
pub fn set_height(mut self, height: u32) -> Self { pub fn set_height(mut self, height: u32) -> Self {
self.as_frame_mut().height = height as i32; self.as_frame_mut().height = height as i32;
self self
} }
pub fn set_sw_format(mut self, sw_format: Pixel) -> Self { pub fn set_sw_format(mut self, sw_format: Pixel) -> Self {
self.as_frame_mut().sw_format = sw_format.into(); self.as_frame_mut().sw_format = sw_format.into();
self self
} }
pub fn set_format(mut self, format: Pixel) -> Self { pub fn set_format(mut self, format: Pixel) -> Self {
self.as_frame_mut().format = format.into(); self.as_frame_mut().format = format.into();
self self
} }
pub fn as_frame_mut(&mut self) -> &mut ffmpeg::sys::AVHWFramesContext { pub fn as_frame_mut(&mut self) -> &mut ffmpeg::sys::AVHWFramesContext {
unsafe { &mut *((*self.buffer).data as *mut ffmpeg::sys::AVHWFramesContext) } unsafe { &mut *((*self.buffer).data as *mut ffmpeg::sys::AVHWFramesContext) }
} }
// pub fn as_frame(&self) -> &ffmpeg::sys::AVHWFramesContext { // pub fn as_frame(&self) -> &ffmpeg::sys::AVHWFramesContext {
// unsafe { &*((*self.buffer).data as *const ffmpeg::sys::AVHWFramesContext) } // unsafe { &*((*self.buffer).data as *const ffmpeg::sys::AVHWFramesContext) }
// } // }
} }

View file

@ -9,6 +9,8 @@ pub mod hwframe;
pub mod encoder_thread; pub mod encoder_thread;
pub mod cuda_gl;
// from hgaiser/moonshine // from hgaiser/moonshine
pub fn check_ret(error_code: i32) -> Result<(), ffmpeg::Error> { pub fn check_ret(error_code: i32) -> Result<(), ffmpeg::Error> {
if error_code != 0 { if error_code != 0 {