working gpu-only encode
This commit is contained in:
parent
9248fe91a9
commit
09142f9668
14 changed files with 920 additions and 277 deletions
1
server/Cargo.lock
generated
1
server/Cargo.lock
generated
|
@ -1414,6 +1414,7 @@ dependencies = [
|
|||
"futures-util",
|
||||
"gl",
|
||||
"letsplay_gpu",
|
||||
"libloading",
|
||||
"rand",
|
||||
"retro_frontend",
|
||||
"serde",
|
||||
|
|
|
@ -32,6 +32,7 @@ tracing = "0.1.40"
|
|||
tracing-subscriber = "0.3.18"
|
||||
xkeysym = "0.2.1"
|
||||
async-trait = "0.1.83"
|
||||
libloading = "0.8.5"
|
||||
|
||||
|
||||
[patch.crates-io]
|
||||
|
|
|
@ -5,10 +5,15 @@ mod video;
|
|||
|
||||
mod transport;
|
||||
|
||||
use anyhow::Context;
|
||||
use async_trait::async_trait;
|
||||
|
||||
use cudarc::driver::CudaDevice;
|
||||
use letsplay_gpu::egl_helpers::DeviceContext;
|
||||
use retro_thread::{spawn_retro_thread, RetroEvent};
|
||||
use transport::websocket::WebsocketTransport;
|
||||
use transport::{Transport, TransportReciever};
|
||||
use video::cuda_gl::safe::GraphicsResource;
|
||||
use video::encoder_thread::EncodeThreadInput;
|
||||
use video::{encoder_thread, ffmpeg};
|
||||
|
||||
|
@ -44,19 +49,19 @@ enum WsMessage {
|
|||
Json(String),
|
||||
}
|
||||
|
||||
struct AppState {
|
||||
struct AppState<T: Transport> {
|
||||
encoder_tx: Arc<TokioMutex<mpsc::Sender<EncodeThreadInput>>>,
|
||||
inputs: Arc<TokioMutex<Vec<u32>>>,
|
||||
|
||||
transport: Arc<crate::transport::websocket::WebsocketTransport>,
|
||||
transport: Arc<T>,
|
||||
connection_count: TokioMutex<usize>,
|
||||
}
|
||||
|
||||
impl AppState {
|
||||
fn new(
|
||||
encoder_tx: mpsc::Sender<EncodeThreadInput>,
|
||||
transport: Arc<crate::transport::websocket::WebsocketTransport>,
|
||||
) -> Self {
|
||||
impl<T> AppState<T>
|
||||
where
|
||||
T: Transport + Send + Sync + 'static,
|
||||
{
|
||||
fn new(encoder_tx: mpsc::Sender<EncodeThreadInput>, transport: Arc<T>) -> Self {
|
||||
Self {
|
||||
encoder_tx: Arc::new(TokioMutex::new(encoder_tx)),
|
||||
inputs: Arc::new(TokioMutex::new(Vec::new())),
|
||||
|
@ -67,7 +72,10 @@ impl AppState {
|
|||
}
|
||||
|
||||
#[async_trait]
|
||||
impl TransportReciever for AppState {
|
||||
impl<T> TransportReciever for AppState<T>
|
||||
where
|
||||
T: Transport + Send + Sync + 'static,
|
||||
{
|
||||
async fn on_connect(&self, username: &String) -> anyhow::Result<()> {
|
||||
println!("{username} joined!");
|
||||
|
||||
|
@ -105,7 +113,6 @@ impl TransportReciever for AppState {
|
|||
"msg": json["msg"].as_str().unwrap()
|
||||
});
|
||||
|
||||
|
||||
self.transport
|
||||
.broadcast_message(transport::TransportMessage::Text(
|
||||
serde_json::to_string(&send).expect("oh well"),
|
||||
|
@ -177,6 +184,23 @@ impl TransportReciever for AppState {
|
|||
}
|
||||
}
|
||||
|
||||
const FLIP_SRC: &str = "
|
||||
extern \"C\" __global__ void flip_opengl(
|
||||
const unsigned* pSrc,
|
||||
unsigned* pDest,
|
||||
unsigned width,
|
||||
unsigned height
|
||||
) {
|
||||
const unsigned x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const unsigned y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x < width && y < height) {
|
||||
// let reversed_y = (size.height - 1) - y;
|
||||
unsigned reversed_y = (height - 1) - y;
|
||||
pDest[y * width + x] = pSrc[reversed_y * width + x];
|
||||
}
|
||||
}";
|
||||
|
||||
#[tokio::main(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
// Setup a tracing subscriber
|
||||
|
@ -186,36 +210,56 @@ async fn main() -> anyhow::Result<()> {
|
|||
|
||||
tracing::subscriber::set_global_default(subscriber).unwrap();
|
||||
|
||||
let surface = Arc::new(Mutex::new(surface::Surface::new()));
|
||||
|
||||
// H.264 encoder related
|
||||
let frame: Arc<Mutex<Option<ffmpeg::frame::Video>>> = Arc::new(Mutex::new(None));
|
||||
let (mut encoder_rx, encoder_tx) = encoder_thread::encoder_thread_spawn(&frame);
|
||||
let device = CudaDevice::new(0)?;
|
||||
|
||||
let transport = Arc::new(crate::transport::websocket::WebsocketTransport::new());
|
||||
let ptx = cudarc::nvrtc::compile_ptx(&FLIP_SRC).with_context(|| "compiling support kernel")?;
|
||||
|
||||
println!("compiled kernel");
|
||||
|
||||
// pop it in
|
||||
device.load_ptx(ptx, "module", &["flip_opengl"])?;
|
||||
|
||||
let egl_ctx = Arc::new(Mutex::new(DeviceContext::new(0)));
|
||||
|
||||
let resource = Arc::new(Mutex::new(GraphicsResource::new(&device)));
|
||||
|
||||
let (mut encoder_rx, encoder_tx) = encoder_thread::encoder_thread_spawn_hwframe(
|
||||
&device.clone(),
|
||||
&resource.clone(),
|
||||
&egl_ctx.clone(),
|
||||
);
|
||||
|
||||
let transport = Arc::new(WebsocketTransport::new());
|
||||
let state = Arc::new(AppState::new(encoder_tx, transport.clone()));
|
||||
|
||||
let (mut event_rx, event_in_tx) = spawn_retro_thread(surface.clone());
|
||||
let (mut retro_event_rx, retro_input_event_tx) =
|
||||
spawn_retro_thread(egl_ctx.clone(), resource.clone());
|
||||
|
||||
let state_clone = state.clone();
|
||||
|
||||
let transport_clone = transport.clone();
|
||||
|
||||
// retro event handler. drives the encoder thread too
|
||||
let _ = std::thread::Builder::new()
|
||||
.name("retro_event_rx".into())
|
||||
.spawn(move || {
|
||||
let surface_clone = surface.clone();
|
||||
let frame_clone = frame.clone();
|
||||
// load game
|
||||
let _ = retro_input_event_tx.blocking_send(retro_thread::RetroInEvent::LoadCore(
|
||||
"cores/swanstation_libretro.so".into(),
|
||||
));
|
||||
let _ = retro_input_event_tx.blocking_send(retro_thread::RetroInEvent::LoadGame(
|
||||
"roms/nmv1_us.cue".into(),
|
||||
));
|
||||
|
||||
// start the libretro thread looping now that we're alive
|
||||
let _ = event_in_tx.blocking_send(retro_thread::RetroInEvent::Start);
|
||||
let _ = retro_input_event_tx.blocking_send(retro_thread::RetroInEvent::Start);
|
||||
|
||||
loop {
|
||||
match event_rx.try_recv() {
|
||||
Ok(msg) => match msg {
|
||||
match retro_event_rx.blocking_recv() {
|
||||
Some(msg) => match msg {
|
||||
RetroEvent::Frame => {
|
||||
/*
|
||||
let mut same = true;
|
||||
|
||||
{
|
||||
let mut frame_locked = frame.lock().expect(
|
||||
"Couldn't lock frame on our end. Did the encoder thread panic?",
|
||||
|
@ -229,6 +273,8 @@ async fn main() -> anyhow::Result<()> {
|
|||
let mut surf = surface_clone.lock().expect(
|
||||
"locking the VNC surface to paint it to the ffmpeg frame failed",
|
||||
);
|
||||
|
||||
|
||||
let surf_buf = surf.get_buffer();
|
||||
|
||||
let buf_ptr =
|
||||
|
@ -247,12 +293,27 @@ async fn main() -> anyhow::Result<()> {
|
|||
)
|
||||
};
|
||||
|
||||
// If any line differs then the frame isn't the same and should be encoded.
|
||||
if &surf_buf[line_stride..line_stride + width as usize]
|
||||
!= dest_line_slice
|
||||
{
|
||||
same = false;
|
||||
}
|
||||
|
||||
dest_line_slice.copy_from_slice(
|
||||
&surf_buf[line_stride..line_stride + width as usize],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if !same {
|
||||
let _ = state_clone
|
||||
.encoder_tx
|
||||
.blocking_lock()
|
||||
.blocking_send(encoder_thread::EncodeThreadInput::SendFrame);
|
||||
}
|
||||
*/
|
||||
|
||||
let _ = state_clone
|
||||
.encoder_tx
|
||||
.blocking_lock()
|
||||
|
@ -261,6 +322,12 @@ async fn main() -> anyhow::Result<()> {
|
|||
|
||||
RetroEvent::Resize { size } => {
|
||||
// make a new frame for the encoder
|
||||
|
||||
let _ = state_clone.encoder_tx.blocking_lock().blocking_send(
|
||||
encoder_thread::EncodeThreadInput::Init { size: size.clone() },
|
||||
);
|
||||
|
||||
/*
|
||||
{
|
||||
let mut lk_frame = frame_clone.lock().expect("Couldn't lock frame");
|
||||
|
||||
|
@ -270,30 +337,21 @@ async fn main() -> anyhow::Result<()> {
|
|||
size.clone().height,
|
||||
));
|
||||
}
|
||||
|
||||
let _ = state_clone.encoder_tx.blocking_lock().blocking_send(
|
||||
encoder_thread::EncodeThreadInput::Init { size: size.clone() },
|
||||
);
|
||||
*/
|
||||
}
|
||||
|
||||
RetroEvent::WantInputs { tx } => {
|
||||
let inputs = state_clone.inputs.blocking_lock();
|
||||
//tracing::info!("giving inputs {:?}", inputs);
|
||||
tx.send(inputs.clone()).expect("FUCK");
|
||||
}
|
||||
},
|
||||
|
||||
Err(TryRecvError::Disconnected) => break,
|
||||
Err(TryRecvError::Empty) => {}
|
||||
None => break,
|
||||
}
|
||||
|
||||
match encoder_rx.try_recv() {
|
||||
Ok(msg) => match msg {
|
||||
encoder_thread::EncodeThreadOutput::Frame { packet } => {
|
||||
// let _ = state_clone
|
||||
// .websocket_broadcast_tx
|
||||
// .send(WsMessage::VideoPacket { packet });
|
||||
|
||||
// :(
|
||||
let packet_data = {
|
||||
let slice = packet.data().expect(
|
||||
|
@ -301,7 +359,9 @@ async fn main() -> anyhow::Result<()> {
|
|||
);
|
||||
slice.to_vec()
|
||||
};
|
||||
let _ = transport_clone.broadcast_message(transport::TransportMessage::Binary(packet_data));
|
||||
let _ = state_clone.transport.broadcast_message(
|
||||
transport::TransportMessage::Binary(packet_data),
|
||||
);
|
||||
}
|
||||
},
|
||||
Err(TryRecvError::Empty) => {}
|
||||
|
|
|
@ -17,7 +17,7 @@ use retro_frontend::{
|
|||
use gpu::egl_helpers::DeviceContext;
|
||||
use letsplay_gpu as gpu;
|
||||
|
||||
use crate::{surface::Surface, types::Size};
|
||||
use crate::{surface::Surface, types::Size, video::cuda_gl::safe::GraphicsResource};
|
||||
|
||||
/// Called by OpenGL. We use this to dump errors.
|
||||
extern "system" fn opengl_message_callback(
|
||||
|
@ -49,30 +49,38 @@ pub struct RetroState {
|
|||
pad: RetroPad,
|
||||
|
||||
// EGL state
|
||||
egl_context: Option<DeviceContext>,
|
||||
egl_context: Arc<Mutex<DeviceContext>>,
|
||||
|
||||
/// Locked framebuffer.
|
||||
framebuffer: Arc<Mutex<Surface>>,
|
||||
software_framebuffer: Surface,
|
||||
|
||||
/// OpenGL FBO
|
||||
gl_framebuffer: gpu::GlFramebuffer,
|
||||
|
||||
/// Cached readback buffer.
|
||||
readback_buffer: Surface,
|
||||
gl_rendering: bool,
|
||||
|
||||
cuda_resource: Arc<Mutex<GraphicsResource>>,
|
||||
|
||||
event_tx: mpsc::Sender<RetroEvent>,
|
||||
}
|
||||
|
||||
impl RetroState {
|
||||
pub fn new(framebuffer: Arc<Mutex<Surface>>, event_tx: mpsc::Sender<RetroEvent>) -> Box<Self> {
|
||||
pub fn new(
|
||||
device_context: Arc<Mutex<DeviceContext>>,
|
||||
resource: Arc<Mutex<GraphicsResource>>,
|
||||
event_tx: mpsc::Sender<RetroEvent>,
|
||||
) -> Box<Self> {
|
||||
let mut boxed = Box::new(Self {
|
||||
frontend: None,
|
||||
pad: RetroPad::new(),
|
||||
|
||||
egl_context: None,
|
||||
framebuffer,
|
||||
egl_context: device_context.clone(),
|
||||
software_framebuffer: Surface::new(),
|
||||
gl_framebuffer: gpu::GlFramebuffer::new(),
|
||||
readback_buffer: Surface::new(),
|
||||
gl_rendering: false,
|
||||
|
||||
cuda_resource: resource.clone(),
|
||||
|
||||
event_tx,
|
||||
});
|
||||
|
||||
|
@ -127,14 +135,29 @@ impl RetroState {
|
|||
|
||||
/// Initalizes the headless EGL context used for OpenGL rendering.
|
||||
fn hw_gl_egl_init(&mut self) {
|
||||
self.egl_context = Some(DeviceContext::new(0));
|
||||
self.egl_context.lock().expect("piss").make_current();
|
||||
|
||||
unsafe {
|
||||
// Load OpenGL functions using the EGL loader.
|
||||
gl::load_with(|s| {
|
||||
let str = std::ffi::CString::new(s).expect("gl::load_with fail");
|
||||
std::mem::transmute(gpu::egl::GetProcAddress(str.as_ptr()))
|
||||
});
|
||||
|
||||
// set OpenGL debug message callback
|
||||
gl::Enable(gl::DEBUG_OUTPUT);
|
||||
gl::DebugMessageCallback(Some(opengl_message_callback), std::ptr::null());
|
||||
}
|
||||
}
|
||||
|
||||
/// Destroys OpenGL resources and the EGL context.
|
||||
fn hw_gl_destroy(&mut self) {
|
||||
if self.egl_context.is_some() {
|
||||
self.gl_framebuffer.destroy();
|
||||
self.egl_context.take().unwrap().destroy()
|
||||
self.gl_framebuffer.destroy();
|
||||
|
||||
{
|
||||
let mut locked = self.egl_context.lock().expect("piss");
|
||||
locked.release();
|
||||
locked.destroy();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -143,59 +166,39 @@ impl RetroState {
|
|||
let step_ms = (1.0 / av_info.timing.fps) * 1000.;
|
||||
let step_duration = Duration::from_millis(step_ms as u64);
|
||||
|
||||
self.get_frontend().run_frame();
|
||||
{
|
||||
let egl = (&mut self.egl_context).clone();
|
||||
let locked_egl = egl.lock().expect("fuck YOU");
|
||||
|
||||
locked_egl.make_current();
|
||||
self.get_frontend().run_frame();
|
||||
locked_egl.release();
|
||||
}
|
||||
|
||||
std::thread::sleep(step_duration);
|
||||
}
|
||||
|
||||
/// Bleh, I don't like this is an associated fn, but whatever
|
||||
fn update_impl(framebuffer: Arc<Mutex<Surface>>, slice: &[u32], pitch: u32, from_opengl: bool) {
|
||||
let mut framebuffer_locked = framebuffer.lock().expect("could not lock framebuffer");
|
||||
|
||||
let size = framebuffer_locked.size.clone();
|
||||
let buffer = framebuffer_locked.get_buffer();
|
||||
fn update_software_framebuffer(framebuffer: &mut Surface, slice: &[u32], pitch: u32) {
|
||||
let size = framebuffer.size.clone();
|
||||
let buffer = framebuffer.get_buffer();
|
||||
|
||||
let has_disconnected_pitch = pitch != size.width as u32;
|
||||
|
||||
// If this frame came from OpenGL we need to flip the image around
|
||||
// so it is right side up (from our perspective).
|
||||
//
|
||||
// We do this in a bit of a convoluted way (but it's also zero allocation!)
|
||||
if from_opengl {
|
||||
for y in (0..size.height).rev() {
|
||||
let src_line_off = (y as u32 * pitch) as usize;
|
||||
let src_slice = &slice[src_line_off..src_line_off + size.width as usize];
|
||||
for y in 0..size.height {
|
||||
let src_line_off = (y as u32 * pitch) as usize;
|
||||
let mut dest_line_off = (y as u32 * size.width) as usize;
|
||||
|
||||
let reversed_y = (size.height - 1) - y;
|
||||
|
||||
let src_line_off = (reversed_y as u32 * pitch) as usize;
|
||||
let mut dest_line_off = src_line_off;
|
||||
|
||||
// copy only
|
||||
if has_disconnected_pitch {
|
||||
dest_line_off = (reversed_y * pitch.min(size.width)) as usize;
|
||||
}
|
||||
|
||||
let dest_slice = &mut buffer[dest_line_off..dest_line_off + size.width as usize];
|
||||
|
||||
dest_slice.copy_from_slice(src_slice);
|
||||
// copy only
|
||||
if has_disconnected_pitch {
|
||||
dest_line_off = (y * pitch) as usize;
|
||||
}
|
||||
} else {
|
||||
for y in 0..size.height {
|
||||
let src_line_off = (y as u32 * pitch) as usize;
|
||||
let mut dest_line_off = src_line_off;
|
||||
|
||||
// copy only
|
||||
if has_disconnected_pitch {
|
||||
dest_line_off = (y * size.width) as usize;
|
||||
}
|
||||
// Create slices repressenting each part
|
||||
let src_slice = &slice[src_line_off..src_line_off + size.width as usize];
|
||||
let dest_slice = &mut buffer[dest_line_off..dest_line_off + size.width as usize];
|
||||
|
||||
// Create slices repressenting each part
|
||||
let src_slice = &slice[src_line_off..src_line_off + size.width as usize];
|
||||
let dest_slice = &mut framebuffer_locked.get_buffer()
|
||||
[dest_line_off..dest_line_off + size.width as usize];
|
||||
|
||||
dest_slice.copy_from_slice(src_slice);
|
||||
}
|
||||
dest_slice.copy_from_slice(src_slice);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -204,21 +207,32 @@ impl FrontendInterface for RetroState {
|
|||
fn video_resize(&mut self, width: u32, height: u32) {
|
||||
tracing::info!("Resized to {width}x{height}");
|
||||
|
||||
if self.egl_context.is_some() {
|
||||
self.gl_framebuffer.resize(width, height);
|
||||
let raw = self.gl_framebuffer.as_raw();
|
||||
self.gl_framebuffer.resize(width, height);
|
||||
let raw = self.gl_framebuffer.as_raw();
|
||||
|
||||
// Notify the frontend layer about the new FBO ID
|
||||
self.get_frontend().set_gl_fbo(raw);
|
||||
// Notify the frontend layer about the new FBO ID
|
||||
self.get_frontend().set_gl_fbo(raw);
|
||||
|
||||
// Resize the readback buffer
|
||||
self.readback_buffer.resize(Size { width, height });
|
||||
if !self.gl_rendering {
|
||||
self.software_framebuffer.resize(Size { width, height });
|
||||
}
|
||||
|
||||
self.framebuffer
|
||||
.lock()
|
||||
.expect("its over?")
|
||||
.resize(Size { width, height });
|
||||
unsafe {
|
||||
}
|
||||
|
||||
// map to cuda
|
||||
{
|
||||
let mut locked = self
|
||||
.cuda_resource
|
||||
.lock()
|
||||
.expect("YOU MOTHERFUCKER PISS GO COUNT YOUR DICK");
|
||||
|
||||
locked.device().bind_to_thread().expect("fuck");
|
||||
|
||||
locked
|
||||
.register(self.gl_framebuffer.texture_id(), gl::TEXTURE_2D)
|
||||
.expect("you fucking asswater");
|
||||
}
|
||||
|
||||
let _ = self.event_tx.blocking_send(RetroEvent::Resize {
|
||||
size: Size { width, height },
|
||||
|
@ -226,39 +240,29 @@ impl FrontendInterface for RetroState {
|
|||
}
|
||||
|
||||
fn video_update(&mut self, slice: &[u32], pitch: u32) {
|
||||
Self::update_impl(self.framebuffer.clone(), slice, pitch, false);
|
||||
Self::update_software_framebuffer(&mut self.software_framebuffer, slice, pitch);
|
||||
|
||||
let size = self.software_framebuffer.size.clone();
|
||||
|
||||
// upload texture to GPU
|
||||
unsafe {
|
||||
gl::TexImage2D(
|
||||
gl::TEXTURE_2D,
|
||||
0,
|
||||
gl::RGBA as i32,
|
||||
size.width as i32,
|
||||
size.height as i32,
|
||||
0,
|
||||
gl::RGBA_INTEGER,
|
||||
gl::UNSIGNED_INT_8_8_8_8,
|
||||
self.software_framebuffer.get_buffer().as_mut_ptr() as *const _,
|
||||
);
|
||||
}
|
||||
|
||||
let _ = self.event_tx.blocking_send(RetroEvent::Frame);
|
||||
}
|
||||
|
||||
fn video_update_gl(&mut self) {
|
||||
let dimensions = self.get_frontend().get_size();
|
||||
|
||||
// Read back the framebuffer
|
||||
let slice = {
|
||||
// lame, doesn't do bgra conversion for us
|
||||
//self.gl_framebuffer.read_pixels(
|
||||
// &mut self.readback_buffer.get_buffer()[..],
|
||||
// dimensions.0,
|
||||
// dimensions.1,
|
||||
//);
|
||||
|
||||
unsafe {
|
||||
let _bind = self.gl_framebuffer.bind();
|
||||
gl::ReadPixels(
|
||||
0,
|
||||
0,
|
||||
dimensions.0 as i32,
|
||||
dimensions.1 as i32,
|
||||
gl::BGRA,
|
||||
gl::UNSIGNED_BYTE,
|
||||
(&mut self.readback_buffer.get_buffer()).as_mut_ptr() as *mut std::ffi::c_void,
|
||||
);
|
||||
}
|
||||
|
||||
self.readback_buffer.get_buffer()
|
||||
};
|
||||
|
||||
Self::update_impl(self.framebuffer.clone(), slice, dimensions.0, true);
|
||||
let _ = self.event_tx.blocking_send(RetroEvent::Frame);
|
||||
}
|
||||
|
||||
|
@ -270,9 +274,13 @@ impl FrontendInterface for RetroState {
|
|||
let (tx, rx) = oneshot::channel();
|
||||
let _ = self.event_tx.blocking_send(RetroEvent::WantInputs { tx });
|
||||
|
||||
let inputs = rx.blocking_recv().expect("what the FUCK are you doing");
|
||||
let inputs = rx.blocking_recv();
|
||||
|
||||
for key in &inputs {
|
||||
if inputs.is_err() {
|
||||
return;
|
||||
}
|
||||
|
||||
for key in &inputs.unwrap() {
|
||||
use xkeysym::key as Key;
|
||||
|
||||
match *key {
|
||||
|
@ -349,38 +357,23 @@ impl FrontendInterface for RetroState {
|
|||
|
||||
fn hw_gl_init(&mut self) -> Option<HwGlInitData> {
|
||||
// Only create a new EGL/OpenGL context if we have to.
|
||||
if self.egl_context.is_none() {
|
||||
// Initalize EGL
|
||||
self.hw_gl_egl_init();
|
||||
let context = self.egl_context.lock().expect("fuck you!");
|
||||
let extensions = gpu::egl_helpers::get_extensions(context.get_display());
|
||||
|
||||
let context = self.egl_context.as_ref().unwrap();
|
||||
let extensions = gpu::egl_helpers::get_extensions(context.get_display());
|
||||
tracing::debug!("Supported EGL extensions: {:?}", extensions);
|
||||
|
||||
tracing::debug!("Supported EGL extensions: {:?}", extensions);
|
||||
|
||||
// Check for EGL_KHR_get_all_proc_addresses, so we can use eglGetProcAddress() to load OpenGL functions
|
||||
if !extensions.contains(&"EGL_KHR_get_all_proc_addresses".into()) {
|
||||
tracing::error!("Your graphics driver doesn't support the EGL_KHR_get_all_proc_addresses extension.");
|
||||
tracing::error!("Retrodemo currently needs this to load OpenGL functions. HW rendering will be disabled.");
|
||||
return None;
|
||||
}
|
||||
|
||||
unsafe {
|
||||
// Load OpenGL functions using the EGL loader.
|
||||
gl::load_with(|s| {
|
||||
let str = std::ffi::CString::new(s).expect("gl::load_with fail");
|
||||
std::mem::transmute(gpu::egl::GetProcAddress(str.as_ptr()))
|
||||
});
|
||||
|
||||
// set OpenGL debug message callback
|
||||
gl::Enable(gl::DEBUG_OUTPUT);
|
||||
gl::DebugMessageCallback(Some(opengl_message_callback), std::ptr::null());
|
||||
}
|
||||
// Check for EGL_KHR_get_all_proc_addresses, so we can use eglGetProcAddress() to load OpenGL functions
|
||||
if !extensions.contains(&"EGL_KHR_get_all_proc_addresses".into()) {
|
||||
tracing::error!("Your graphics driver doesn't support the EGL_KHR_get_all_proc_addresses extension.");
|
||||
tracing::error!("Retrodemo currently needs this to load OpenGL functions. HW rendering will be disabled.");
|
||||
return None;
|
||||
}
|
||||
|
||||
// Create the initial FBO for the core to render to
|
||||
let dimensions = self.get_frontend().get_size();
|
||||
self.gl_framebuffer.resize(dimensions.0, dimensions.1);
|
||||
// If we get here, we can be certain that we're no longer
|
||||
// going to use software rendering. Therefore, we can
|
||||
// clear (free) the software framebuffer, since it won't be of use to us anymore.
|
||||
self.software_framebuffer.clear();
|
||||
self.gl_rendering = true;
|
||||
|
||||
return Some(HwGlInitData {
|
||||
get_proc_address: gpu::egl::GetProcAddress as *mut std::ffi::c_void,
|
||||
|
@ -403,25 +396,34 @@ pub enum RetroEvent {
|
|||
|
||||
pub enum RetroInEvent {
|
||||
Start,
|
||||
LoadCore(std::path::PathBuf),
|
||||
LoadGame(std::path::PathBuf),
|
||||
}
|
||||
|
||||
fn retro_thread_main(
|
||||
surface: Arc<Mutex<Surface>>,
|
||||
context: &Arc<Mutex<DeviceContext>>,
|
||||
resource: &Arc<Mutex<GraphicsResource>>,
|
||||
event_tx: mpsc::Sender<RetroEvent>,
|
||||
mut event_rx: mpsc::Receiver<RetroInEvent>,
|
||||
) {
|
||||
let mut app = RetroState::new(surface, event_tx);
|
||||
let mut app = RetroState::new(context.clone(), resource.clone(), event_tx);
|
||||
|
||||
app.load_core("cores/swanstation_libretro.so")
|
||||
.expect("failed to load core");
|
||||
app.load_game("roms/merged/nmv3/us/nmv3_us.cue") //merged/nmv1/us/nmv1_us.cue
|
||||
.expect("failed to load game");
|
||||
// do EGL init first
|
||||
app.hw_gl_egl_init();
|
||||
|
||||
// sync
|
||||
// pre-setup
|
||||
loop {
|
||||
match event_rx.blocking_recv() {
|
||||
None => return (),
|
||||
Some(msg) => match msg {
|
||||
RetroInEvent::LoadCore(path) => {
|
||||
app.load_core(path).expect("Failed to load core.");
|
||||
}
|
||||
|
||||
RetroInEvent::LoadGame(path) => {
|
||||
app.load_game(path).expect("Failed to load game!");
|
||||
}
|
||||
|
||||
RetroInEvent::Start => break,
|
||||
},
|
||||
}
|
||||
|
@ -435,18 +437,23 @@ fn retro_thread_main(
|
|||
}
|
||||
|
||||
pub fn spawn_retro_thread(
|
||||
surface: Arc<Mutex<Surface>>,
|
||||
context: Arc<Mutex<DeviceContext>>,
|
||||
resource: Arc<Mutex<GraphicsResource>>,
|
||||
) -> (mpsc::Receiver<RetroEvent>, mpsc::Sender<RetroInEvent>) {
|
||||
let (event_tx, event_rx) = mpsc::channel(8);
|
||||
let (event_in_tx, event_in_rx) = mpsc::channel(8);
|
||||
let fb_clone = surface.clone();
|
||||
// essentially semaphores
|
||||
let (event_tx, event_rx) = mpsc::channel(1);
|
||||
let (event_in_tx, event_in_rx) = mpsc::channel(1);
|
||||
|
||||
let cloned = resource.clone();
|
||||
let ctxcloned = context.clone();
|
||||
|
||||
// discard the join handle
|
||||
let _ = std::thread::Builder::new()
|
||||
.name("retro_game".into())
|
||||
.spawn(move || {
|
||||
retro_thread_main(fb_clone, event_tx, event_in_rx);
|
||||
}).expect("failed to spawn the game thread");
|
||||
retro_thread_main(&ctxcloned, &cloned, event_tx, event_in_rx);
|
||||
})
|
||||
.expect("failed to spawn the game thread");
|
||||
|
||||
(event_rx, event_in_tx)
|
||||
}
|
||||
|
|
|
@ -21,7 +21,7 @@ pub fn alloc_boxed_slice<T: Sized>(len: usize) -> Box<[T]> {
|
|||
unsafe { Box::from_raw(slice) }
|
||||
}
|
||||
|
||||
/// A BGRA-format surface.
|
||||
/// A BGRA (or RGBA, I'm not your dad. There are funtions that assume the former though) format surface.
|
||||
pub struct Surface {
|
||||
buffer: Option<Box<[u32]>>,
|
||||
pub size: Size,
|
||||
|
@ -38,10 +38,18 @@ impl Surface {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn clear(&mut self) {
|
||||
self.buffer = None;
|
||||
self.size = Size {
|
||||
width: 0,
|
||||
height: 0,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn resize(&mut self, size: Size) {
|
||||
self.size = size;
|
||||
|
||||
self.buffer = Some(alloc_boxed_slice(self.size.linear()));
|
||||
self.buffer = Some(alloc_boxed_slice(self.size.linear()));
|
||||
}
|
||||
|
||||
pub fn get_buffer(&mut self) -> &mut [u32] {
|
||||
|
@ -53,15 +61,14 @@ impl Surface {
|
|||
pub fn blit_buffer(&mut self, src_at: Rect, data: &[u32]) {
|
||||
let mut off = 0;
|
||||
|
||||
let buf = self.buffer.as_mut().unwrap();
|
||||
let buf_slice = &mut *buf;
|
||||
let buf = self.buffer.as_mut().unwrap();
|
||||
let buf_slice = &mut *buf;
|
||||
|
||||
for y in src_at.y..src_at.y + src_at.height {
|
||||
let src = &data[off..off + src_at.width as usize];
|
||||
let dest_start_offset = (y as usize * self.size.width as usize) + src_at.x as usize;
|
||||
|
||||
let dest =
|
||||
&mut buf_slice[dest_start_offset..dest_start_offset + src_at.width as usize];
|
||||
let dest = &mut buf_slice[dest_start_offset..dest_start_offset + src_at.width as usize];
|
||||
|
||||
// This forces alpha to always be 0xff. I *could* probably do this in a clearer way though :(
|
||||
for (dest, src_item) in dest.iter_mut().zip(src.iter()) {
|
||||
|
|
27
server/src/video/cuda_gl/bindgen.sh
Executable file
27
server/src/video/cuda_gl/bindgen.sh
Executable file
|
@ -0,0 +1,27 @@
|
|||
#!/bin/bash
|
||||
# Does bindgen for CUDA cudaGL. (needs postprocessing)
|
||||
set -exu
|
||||
|
||||
# --allowlist-type="^CU.*" \
|
||||
# --allowlist-type="^cuuint(32|64)_t" \
|
||||
# --allowlist-type="^cudaError_enum" \
|
||||
# --allowlist-type="^cu.*Complex$" \
|
||||
# --allowlist-type="^cuda.*" \
|
||||
# --allowlist-type="^libraryPropertyType.*" \
|
||||
# --allowlist-var="^CU.*" \
|
||||
|
||||
echo "use cudarc::sys::*; /* Hack :3 */" > ./sys.rs
|
||||
|
||||
bindgen \
|
||||
--allowlist-type="" \
|
||||
--allowlist-function="^cuGraphicsGL.*" \
|
||||
--default-enum-style=rust \
|
||||
--no-doc-comments \
|
||||
--with-derive-default \
|
||||
--with-derive-eq \
|
||||
--with-derive-hash \
|
||||
--with-derive-ord \
|
||||
--use-core \
|
||||
--dynamic-loading Lib \
|
||||
gl.h -- -I/opt/cuda/include \
|
||||
>> ./sys.rs
|
1
server/src/video/cuda_gl/gl.h
Normal file
1
server/src/video/cuda_gl/gl.h
Normal file
|
@ -0,0 +1 @@
|
|||
#include "cudaGL.h"
|
14
server/src/video/cuda_gl/mod.rs
Normal file
14
server/src/video/cuda_gl/mod.rs
Normal file
|
@ -0,0 +1,14 @@
|
|||
pub mod sys;
|
||||
use sys::*;
|
||||
|
||||
pub mod safe;
|
||||
|
||||
pub unsafe fn lib() -> &'static Lib {
|
||||
static LIB: std::sync::OnceLock<Lib> = std::sync::OnceLock::new();
|
||||
LIB.get_or_init(|| {
|
||||
if let Ok(lib) = Lib::new(libloading::library_filename("cuda")) {
|
||||
return lib;
|
||||
}
|
||||
panic!("cuda library doesn't exist.");
|
||||
})
|
||||
}
|
135
server/src/video/cuda_gl/safe.rs
Normal file
135
server/src/video/cuda_gl/safe.rs
Normal file
|
@ -0,0 +1,135 @@
|
|||
use cudarc::driver::{result as cuda_result, safe as cuda_safe, sys as cuda_sys, CudaDevice};
|
||||
|
||||
use super::sys;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
pub struct MappedGraphicsResource {
|
||||
resource: cuda_sys::CUgraphicsResource,
|
||||
}
|
||||
|
||||
impl MappedGraphicsResource {
|
||||
fn new(resource: cuda_sys::CUgraphicsResource) -> Self {
|
||||
Self { resource }
|
||||
}
|
||||
|
||||
pub fn map(&mut self) -> Result<(), cuda_result::DriverError> {
|
||||
unsafe {
|
||||
cuda_sys::lib()
|
||||
.cuGraphicsMapResources(1, &mut self.resource, std::ptr::null_mut())
|
||||
.result()?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn unmap(&mut self) -> Result<(), cuda_result::DriverError> {
|
||||
unsafe {
|
||||
cuda_sys::lib()
|
||||
.cuGraphicsUnmapResources(1, &mut self.resource, std::ptr::null_mut())
|
||||
.result()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_mapped_array(&mut self) -> Result<cuda_sys::CUarray, cuda_result::DriverError> {
|
||||
assert!(
|
||||
!self.resource.is_null(),
|
||||
"do not call GraphicsResource::get_mapped_array if no resource is actually registered"
|
||||
);
|
||||
|
||||
let mut array: cuda_sys::CUarray = std::ptr::null_mut();
|
||||
|
||||
unsafe {
|
||||
cuda_sys::lib()
|
||||
.cuGraphicsSubResourceGetMappedArray(&mut array, self.resource, 0, 0)
|
||||
.result()?;
|
||||
}
|
||||
|
||||
Ok(array)
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for MappedGraphicsResource {
|
||||
fn drop(&mut self) {
|
||||
let _ = self.unmap();
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper over cuGraphicsGL* apis
|
||||
pub struct GraphicsResource {
|
||||
context: Arc<CudaDevice>,
|
||||
resource: cuda_sys::CUgraphicsResource,
|
||||
}
|
||||
|
||||
impl GraphicsResource {
|
||||
pub fn new(device: &Arc<CudaDevice>) -> Self {
|
||||
Self {
|
||||
context: device.clone(),
|
||||
resource: std::ptr::null_mut(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn device(&self) -> Arc<CudaDevice> {
|
||||
self.context.clone()
|
||||
}
|
||||
|
||||
/// Maps this resource.
|
||||
pub fn map(&mut self) -> Result<MappedGraphicsResource, cuda_result::DriverError> {
|
||||
let mut res = MappedGraphicsResource::new(self.resource);
|
||||
res.map()?;
|
||||
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
pub fn register(
|
||||
&mut self,
|
||||
texture_id: gl::types::GLuint,
|
||||
texture_kind: gl::types::GLuint,
|
||||
) -> Result<(), cuda_result::DriverError> {
|
||||
// better to be safe than leak memory? idk.
|
||||
if !self.resource.is_null() {
|
||||
self.unregister()?;
|
||||
}
|
||||
|
||||
unsafe {
|
||||
super::lib()
|
||||
.cuGraphicsGLRegisterImage(&mut self.resource, texture_id, texture_kind, 1)
|
||||
.result()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn is_registered(&self) -> bool {
|
||||
!self.resource.is_null()
|
||||
}
|
||||
|
||||
pub fn unregister(&mut self) -> Result<(), cuda_result::DriverError> {
|
||||
assert!(
|
||||
!self.resource.is_null(),
|
||||
"do not call if no resource is actually registered"
|
||||
);
|
||||
|
||||
unsafe {
|
||||
cuda_sys::lib()
|
||||
.cuGraphicsUnregisterResource(self.resource)
|
||||
.result()?;
|
||||
}
|
||||
|
||||
self.resource = std::ptr::null_mut();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
impl Drop for GraphicsResource {
|
||||
fn drop(&mut self) {
|
||||
if self.is_registered() {
|
||||
let _ = self.unregister();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl Send for GraphicsResource {}
|
73
server/src/video/cuda_gl/sys.rs
Normal file
73
server/src/video/cuda_gl/sys.rs
Normal file
|
@ -0,0 +1,73 @@
|
|||
use cudarc::driver::sys::*; /* Hack :3 */
|
||||
use gl::types::{GLenum, GLuint};
|
||||
/* automatically generated by rust-bindgen 0.69.4 */
|
||||
|
||||
pub struct Lib {
|
||||
__library: ::libloading::Library,
|
||||
pub cuGraphicsGLRegisterBuffer: Result<
|
||||
unsafe extern "C" fn(
|
||||
pCudaResource: *mut CUgraphicsResource,
|
||||
buffer: GLuint,
|
||||
Flags: ::core::ffi::c_uint,
|
||||
) -> CUresult,
|
||||
::libloading::Error,
|
||||
>,
|
||||
pub cuGraphicsGLRegisterImage: Result<
|
||||
unsafe extern "C" fn(
|
||||
pCudaResource: *mut CUgraphicsResource,
|
||||
image: GLuint,
|
||||
target: GLenum,
|
||||
Flags: ::core::ffi::c_uint,
|
||||
) -> CUresult,
|
||||
::libloading::Error,
|
||||
>,
|
||||
}
|
||||
impl Lib {
|
||||
pub unsafe fn new<P>(path: P) -> Result<Self, ::libloading::Error>
|
||||
where
|
||||
P: AsRef<::std::ffi::OsStr>,
|
||||
{
|
||||
let library = ::libloading::Library::new(path)?;
|
||||
Self::from_library(library)
|
||||
}
|
||||
pub unsafe fn from_library<L>(library: L) -> Result<Self, ::libloading::Error>
|
||||
where
|
||||
L: Into<::libloading::Library>,
|
||||
{
|
||||
let __library = library.into();
|
||||
let cuGraphicsGLRegisterBuffer = __library
|
||||
.get(b"cuGraphicsGLRegisterBuffer\0")
|
||||
.map(|sym| *sym);
|
||||
let cuGraphicsGLRegisterImage = __library
|
||||
.get(b"cuGraphicsGLRegisterImage\0")
|
||||
.map(|sym| *sym);
|
||||
Ok(Lib {
|
||||
__library,
|
||||
cuGraphicsGLRegisterBuffer,
|
||||
cuGraphicsGLRegisterImage,
|
||||
})
|
||||
}
|
||||
pub unsafe fn cuGraphicsGLRegisterBuffer(
|
||||
&self,
|
||||
pCudaResource: *mut CUgraphicsResource,
|
||||
buffer: GLuint,
|
||||
Flags: ::core::ffi::c_uint,
|
||||
) -> CUresult {
|
||||
(self
|
||||
.cuGraphicsGLRegisterBuffer
|
||||
.as_ref()
|
||||
.expect("Expected function, got error."))(pCudaResource, buffer, Flags)
|
||||
}
|
||||
pub unsafe fn cuGraphicsGLRegisterImage(
|
||||
&self,
|
||||
pCudaResource: *mut CUgraphicsResource,
|
||||
image: GLuint,
|
||||
target: GLenum,
|
||||
Flags: ::core::ffi::c_uint,
|
||||
) -> CUresult {
|
||||
(self
|
||||
.cuGraphicsGLRegisterImage
|
||||
.as_ref()
|
||||
.expect("Expected function, got error."))(pCudaResource, image, target, Flags)
|
||||
}
|
||||
}
|
|
@ -1,11 +1,16 @@
|
|||
use cudarc::driver::{
|
||||
sys::{CUdeviceptr, CUmemorytype},
|
||||
CudaDevice, LaunchAsync,
|
||||
};
|
||||
use letsplay_gpu::egl_helpers::DeviceContext;
|
||||
use std::{
|
||||
sync::{Arc, Mutex},
|
||||
time::Duration,
|
||||
};
|
||||
use tokio::sync::mpsc::{self, error::TryRecvError};
|
||||
|
||||
use super::ffmpeg;
|
||||
use super::h264_encoder::H264Encoder;
|
||||
use super::{cuda_gl::safe::GraphicsResource, ffmpeg};
|
||||
|
||||
pub enum EncodeThreadInput {
|
||||
Init { size: crate::types::Size },
|
||||
|
@ -37,7 +42,7 @@ impl EncoderState {
|
|||
self.encoder = Some(H264Encoder::new_nvenc_swframe(
|
||||
size.clone(),
|
||||
60,
|
||||
3 * (1024 * 1024),
|
||||
2 * (1024 * 1024),
|
||||
)?);
|
||||
|
||||
// replace packet
|
||||
|
@ -85,7 +90,76 @@ impl EncoderState {
|
|||
}
|
||||
}
|
||||
|
||||
fn encoder_thread_main(
|
||||
struct EncoderStateHW {
|
||||
encoder: Option<H264Encoder>,
|
||||
frame: ffmpeg::frame::Video,
|
||||
packet: ffmpeg::Packet,
|
||||
}
|
||||
|
||||
impl EncoderStateHW {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
encoder: None,
|
||||
frame: ffmpeg::frame::Video::empty(),
|
||||
packet: ffmpeg::Packet::empty(),
|
||||
}
|
||||
}
|
||||
|
||||
fn init(&mut self, device: &Arc<CudaDevice>, size: crate::types::Size) -> anyhow::Result<()> {
|
||||
self.encoder = Some(H264Encoder::new_nvenc_hwframe(
|
||||
&device,
|
||||
size.clone(),
|
||||
60,
|
||||
2 * (1024 * 1024),
|
||||
)?);
|
||||
|
||||
// replace packet
|
||||
self.packet = ffmpeg::Packet::empty();
|
||||
self.frame = self.encoder.as_mut().unwrap().create_frame()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn frame(&mut self) -> &mut ffmpeg::frame::Video {
|
||||
&mut self.frame
|
||||
}
|
||||
|
||||
fn send_frame(&mut self, pts: u64, force_keyframe: bool) -> Option<ffmpeg::Packet> {
|
||||
let frame = &mut self.frame;
|
||||
let encoder = self.encoder.as_mut().unwrap();
|
||||
|
||||
// set frame metadata
|
||||
unsafe {
|
||||
if force_keyframe {
|
||||
(*frame.as_mut_ptr()).pict_type = ffmpeg::sys::AVPictureType::AV_PICTURE_TYPE_I;
|
||||
(*frame.as_mut_ptr()).flags = ffmpeg::sys::AV_FRAME_FLAG_KEY;
|
||||
(*frame.as_mut_ptr()).key_frame = 1;
|
||||
} else {
|
||||
(*frame.as_mut_ptr()).pict_type = ffmpeg::sys::AVPictureType::AV_PICTURE_TYPE_NONE;
|
||||
(*frame.as_mut_ptr()).flags = 0i32;
|
||||
(*frame.as_mut_ptr()).key_frame = 0;
|
||||
}
|
||||
|
||||
(*frame.as_mut_ptr()).pts = pts as i64;
|
||||
}
|
||||
|
||||
encoder.send_frame(&*frame);
|
||||
encoder
|
||||
.receive_packet(&mut self.packet)
|
||||
.expect("Failed to recieve packet");
|
||||
|
||||
unsafe {
|
||||
if !self.packet.is_empty() {
|
||||
return Some(self.packet.clone());
|
||||
}
|
||||
}
|
||||
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
fn encoder_thread_swframe_main(
|
||||
mut rx: mpsc::Receiver<EncodeThreadInput>,
|
||||
tx: mpsc::Sender<EncodeThreadOutput>,
|
||||
frame: &Arc<Mutex<Option<ffmpeg::frame::Video>>>,
|
||||
|
@ -143,7 +217,7 @@ fn encoder_thread_main(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub fn encoder_thread_spawn(
|
||||
pub fn encoder_thread_spawn_swframe(
|
||||
frame: &Arc<Mutex<Option<ffmpeg::frame::Video>>>,
|
||||
) -> (
|
||||
mpsc::Receiver<EncodeThreadOutput>,
|
||||
|
@ -154,7 +228,192 @@ pub fn encoder_thread_spawn(
|
|||
|
||||
let clone = Arc::clone(frame);
|
||||
|
||||
std::thread::spawn(move || encoder_thread_main(in_rx, out_tx, &clone));
|
||||
std::thread::spawn(move || encoder_thread_swframe_main(in_rx, out_tx, &clone));
|
||||
|
||||
(out_rx, in_tx)
|
||||
}
|
||||
|
||||
fn encoder_thread_hwframe_main(
|
||||
mut rx: mpsc::Receiver<EncodeThreadInput>,
|
||||
tx: mpsc::Sender<EncodeThreadOutput>,
|
||||
|
||||
cuda_device: &Arc<CudaDevice>,
|
||||
cuda_resource: &Arc<Mutex<GraphicsResource>>,
|
||||
gl_context: &Arc<Mutex<DeviceContext>>,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut frame_number = 0u64;
|
||||
let mut force_keyframe = false;
|
||||
|
||||
let mut encoder = EncoderStateHW::new();
|
||||
|
||||
// :)
|
||||
cuda_device.bind_to_thread()?;
|
||||
|
||||
/*
|
||||
|
||||
let mut memcpy = cudarc::driver::sys::CUDA_MEMCPY2D_st::default();
|
||||
// setup basic src stuff
|
||||
memcpy.srcXInBytes = 0;
|
||||
memcpy.srcY = 0;
|
||||
memcpy.srcMemoryType = CUmemorytype::CU_MEMORYTYPE_ARRAY;
|
||||
|
||||
// dest
|
||||
memcpy.dstXInBytes = 0;
|
||||
memcpy.dstY = 0;
|
||||
memcpy.dstMemoryType = CUmemorytype::CU_MEMORYTYPE_DEVICE;
|
||||
*/
|
||||
|
||||
loop {
|
||||
match rx.try_recv() {
|
||||
Ok(msg) => match msg {
|
||||
EncodeThreadInput::Init { size } => {
|
||||
frame_number = 0;
|
||||
|
||||
if force_keyframe {
|
||||
force_keyframe = false;
|
||||
}
|
||||
|
||||
encoder
|
||||
.init(cuda_device, size)
|
||||
.expect("encoder init failed");
|
||||
}
|
||||
|
||||
EncodeThreadInput::ForceKeyframe => {
|
||||
force_keyframe = true;
|
||||
}
|
||||
|
||||
EncodeThreadInput::SendFrame => {
|
||||
// copy gl frame *ON THE GPU*
|
||||
{
|
||||
let mut gl_ctx = gl_context.lock().expect("you dumb fuck");
|
||||
let mut gl_resource =
|
||||
cuda_resource.lock().expect("couldnt lock GL resource!");
|
||||
|
||||
gl_ctx.make_current();
|
||||
|
||||
let mut mapped = gl_resource
|
||||
.map()
|
||||
.expect("couldnt map graphics resource. Its joever");
|
||||
|
||||
let array = mapped
|
||||
.get_mapped_array()
|
||||
.expect("well its all over anyways");
|
||||
|
||||
{
|
||||
let frame = encoder.frame();
|
||||
let width = frame.width();
|
||||
let height = frame.height();
|
||||
|
||||
let launch_config = cudarc::driver::LaunchConfig {
|
||||
grid_dim: (width / 16 + 1, height / 2 + 1, 0),
|
||||
block_dim: (16, 2, 0),
|
||||
shared_mem_bytes: 0,
|
||||
};
|
||||
|
||||
let flip_opengl = cuda_device
|
||||
.get_func("module", "flip_opengl")
|
||||
.expect("dumb fucker");
|
||||
|
||||
unsafe {
|
||||
let frame_ptr = frame.as_mut_ptr();
|
||||
|
||||
let mut params = [
|
||||
array as *mut std::ffi::c_void,
|
||||
(*frame_ptr).data[0] as CUdeviceptr as *mut std::ffi::c_void,
|
||||
width as *const u32 as *mut std::ffi::c_void,
|
||||
height as *const u32 as *mut std::ffi::c_void,
|
||||
];
|
||||
|
||||
flip_opengl
|
||||
.launch(launch_config, &mut params[..])
|
||||
.expect("oh its done");
|
||||
|
||||
|
||||
cudarc::driver::sys::lib()
|
||||
.cuStreamSynchronize(std::ptr::null_mut())
|
||||
.result()
|
||||
.expect("you banned");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
// setup the cuMemcpy2D
|
||||
{
|
||||
let frame = encoder.frame();
|
||||
|
||||
memcpy.srcArray = array;
|
||||
|
||||
unsafe {
|
||||
let frame_ptr = frame.as_mut_ptr();
|
||||
memcpy.dstDevice = (*frame_ptr).data[0] as CUdeviceptr;
|
||||
memcpy.dstPitch = (*frame_ptr).linesize[0] as usize;
|
||||
memcpy.WidthInBytes = ((*frame_ptr).width * 4) as usize;
|
||||
memcpy.Height = (*frame_ptr).height as usize;
|
||||
}
|
||||
}
|
||||
|
||||
unsafe {
|
||||
cudarc::driver::sys::lib()
|
||||
.cuMemcpy2DAsync_v2(&memcpy, std::ptr::null_mut())
|
||||
.result()
|
||||
.expect("cuMemcpy2D fail epic");
|
||||
|
||||
cudarc::driver::sys::lib()
|
||||
.cuStreamSynchronize(std::ptr::null_mut())
|
||||
.result()
|
||||
.expect("you banned");
|
||||
|
||||
} */
|
||||
|
||||
mapped.unmap().expect("fuck you asshole");
|
||||
gl_ctx.release();
|
||||
}
|
||||
|
||||
if let Some(pkt) = encoder.send_frame(frame_number as u64, force_keyframe) {
|
||||
// A bit less clear than ::empty(), but it's "Safe"
|
||||
if let Some(_) = pkt.data() {
|
||||
let _ = tx.blocking_send(EncodeThreadOutput::Frame {
|
||||
packet: pkt.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
frame_number += 1;
|
||||
}
|
||||
|
||||
if force_keyframe {
|
||||
force_keyframe = false;
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
Err(TryRecvError::Disconnected) => break,
|
||||
Err(TryRecvError::Empty) => {
|
||||
std::thread::sleep(Duration::from_millis(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn encoder_thread_spawn_hwframe(
|
||||
cuda_device: &Arc<CudaDevice>,
|
||||
cuda_resource: &Arc<Mutex<GraphicsResource>>,
|
||||
gl_context: &Arc<Mutex<DeviceContext>>,
|
||||
) -> (
|
||||
mpsc::Receiver<EncodeThreadOutput>,
|
||||
mpsc::Sender<EncodeThreadInput>,
|
||||
) {
|
||||
let (in_tx, in_rx) = mpsc::channel(1);
|
||||
let (out_tx, out_rx) = mpsc::channel(1);
|
||||
|
||||
let dev_clone = Arc::clone(cuda_device);
|
||||
let rsrc_clone = Arc::clone(cuda_resource);
|
||||
let gl_clone = Arc::clone(gl_context);
|
||||
|
||||
std::thread::spawn(move || {
|
||||
encoder_thread_hwframe_main(in_rx, out_tx, &dev_clone, &rsrc_clone, &gl_clone)
|
||||
});
|
||||
|
||||
(out_rx, in_tx)
|
||||
}
|
||||
|
|
|
@ -135,8 +135,8 @@ impl H264Encoder {
|
|||
|
||||
video_encoder_context.set_format(ffmpeg::format::Pixel::ZRGB32);
|
||||
|
||||
video_encoder_context.set_qmin(38);
|
||||
video_encoder_context.set_qmax(32);
|
||||
video_encoder_context.set_qmin(37);
|
||||
video_encoder_context.set_qmax(33);
|
||||
|
||||
// set h264_nvenc options
|
||||
let mut dict = ffmpeg::Dictionary::new();
|
||||
|
@ -148,7 +148,7 @@ impl H264Encoder {
|
|||
|
||||
// TODO:
|
||||
dict.set("rc", "vbr");
|
||||
dict.set("qp", "45");
|
||||
dict.set("qp", "35");
|
||||
|
||||
dict.set("forced-idr", "1");
|
||||
|
||||
|
@ -169,9 +169,6 @@ impl H264Encoder {
|
|||
max_framerate: u32,
|
||||
bitrate: usize,
|
||||
) -> anyhow::Result<Self> {
|
||||
/*
|
||||
(See FIXMEs above)
|
||||
|
||||
let cuda_device_context = super::hwdevice::CudaDeviceContextBuilder::new()?
|
||||
.set_cuda_context((*cuda_device.cu_primary_ctx()) as *mut _)
|
||||
.build()
|
||||
|
@ -180,14 +177,53 @@ impl H264Encoder {
|
|||
let mut hw_frame_context = super::hwframe::HwFrameContextBuilder::new(cuda_device_context)?
|
||||
.set_width(size.width)
|
||||
.set_height(size.height)
|
||||
.set_sw_format(ffmpeg::format::Pixel::ZRGB32)
|
||||
.set_sw_format(ffmpeg::format::Pixel::ZBGR32)
|
||||
.set_format(ffmpeg::format::Pixel::CUDA)
|
||||
.build()
|
||||
.with_context(|| "while trying to create CUDA frame context")?;
|
||||
|
||||
*/
|
||||
let (mut encoder, mut video_encoder_context) =
|
||||
create_context_and_set_common_parameters("h264_nvenc", &size, max_framerate, bitrate)
|
||||
.with_context(|| "while trying to create encoder")?;
|
||||
|
||||
todo!("Implement me!");
|
||||
video_encoder_context.set_format(ffmpeg::format::Pixel::CUDA);
|
||||
|
||||
video_encoder_context.set_qmin(37);
|
||||
video_encoder_context.set_qmax(33);
|
||||
|
||||
unsafe {
|
||||
// FIXME: this currently breaks the avbufferref system a bit
|
||||
(*video_encoder_context.as_mut_ptr()).hw_frames_ctx = hw_frame_context.as_raw_mut();
|
||||
(*video_encoder_context.as_mut_ptr()).hw_device_ctx =
|
||||
hw_frame_context.as_device_context_mut();
|
||||
}
|
||||
|
||||
// set h264_nvenc options
|
||||
let mut dict = ffmpeg::Dictionary::new();
|
||||
|
||||
dict.set("tune", "ull");
|
||||
dict.set("preset", "p1");
|
||||
|
||||
dict.set("profile", "main");
|
||||
|
||||
// TODO:
|
||||
dict.set("rc", "vbr");
|
||||
dict.set("qp", "35");
|
||||
|
||||
dict.set("forced-idr", "1");
|
||||
|
||||
// damn you
|
||||
dict.set("delay", "0");
|
||||
dict.set("zerolatency", "1");
|
||||
|
||||
let encoder = video_encoder_context
|
||||
.open_as_with(encoder, dict)
|
||||
.with_context(|| "While opening h264_nvenc video codec")?;
|
||||
|
||||
Ok(Self::NvencHWFrame {
|
||||
encoder: encoder,
|
||||
hw_context: hw_frame_context,
|
||||
})
|
||||
}
|
||||
|
||||
// NOTE: It's a bit pointless to have this have a mut borrow,
|
||||
|
@ -207,46 +243,44 @@ impl H264Encoder {
|
|||
// }
|
||||
//}
|
||||
|
||||
pub fn create_frame(&mut self) -> ffmpeg::frame::Video {
|
||||
|
||||
pub fn create_frame(&mut self) -> anyhow::Result<ffmpeg::frame::Video> {
|
||||
match self {
|
||||
Self::Software { encoder } | Self::NvencSWFrame { encoder } => {
|
||||
return ffmpeg::frame::Video::new(encoder.format(), encoder.width(), encoder.height());
|
||||
return Ok(ffmpeg::frame::Video::new(
|
||||
encoder.format(),
|
||||
encoder.width(),
|
||||
encoder.height(),
|
||||
));
|
||||
}
|
||||
|
||||
Self::NvencHWFrame {
|
||||
encoder,
|
||||
hw_context,
|
||||
} => {
|
||||
todo!("Implement H264Encoder::create_frame() for NvencHWFrame!");
|
||||
let mut frame = ffmpeg::frame::Video::empty();
|
||||
|
||||
unsafe {
|
||||
(*frame.as_mut_ptr()).format = ffmpeg::format::Pixel::CUDA as i32;
|
||||
(*frame.as_mut_ptr()).width = encoder.width() as i32;
|
||||
(*frame.as_mut_ptr()).height = encoder.height() as i32;
|
||||
(*frame.as_mut_ptr()).hw_frames_ctx = hw_context.as_raw_mut();
|
||||
|
||||
//ffmpeg::sys::av_frame_get_buffer(frame.as_mut_ptr(), 32);
|
||||
|
||||
hw_context.get_buffer(&mut frame)?;
|
||||
hw_context.get_buffer(&mut frame)?;
|
||||
|
||||
(*frame.as_mut_ptr()).linesize[0] = (*frame.as_ptr()).width * 4;
|
||||
|
||||
return Ok(frame);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
unsafe {
|
||||
let mut frame = ffmpeg::Frame::empty();
|
||||
(*frame.as_mut_ptr()).format = pixel_format as i32;
|
||||
(*frame.as_mut_ptr()).width = width as i32;
|
||||
(*frame.as_mut_ptr()).height = height as i32;
|
||||
(*frame.as_mut_ptr()).hw_frames_ctx = context.as_raw_mut();
|
||||
|
||||
super::check_ret(ffmpeg::sys::av_hwframe_get_buffer(
|
||||
context.as_raw_mut(),
|
||||
frame.as_mut_ptr(),
|
||||
0,
|
||||
))?;
|
||||
super::check_ret(ffmpeg::sys::av_hwframe_get_buffer(
|
||||
context.as_raw_mut(),
|
||||
frame.as_mut_ptr(),
|
||||
0,
|
||||
))?;
|
||||
|
||||
(*frame.as_mut_ptr()).linesize[0] = (*frame.as_ptr()).width * 4;
|
||||
|
||||
Ok(frame)
|
||||
}
|
||||
|
||||
*/
|
||||
*/
|
||||
|
||||
todo!("FIXME");
|
||||
}
|
||||
|
@ -267,7 +301,8 @@ impl H264Encoder {
|
|||
encoder,
|
||||
hw_context,
|
||||
} => {
|
||||
todo!("Implement send_frame() for NvencHWFrame");
|
||||
//todo!("Implement send_frame() for NvencHWFrame");
|
||||
encoder.send_frame(frame).unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,82 +7,103 @@ use ffmpeg::format::Pixel;
|
|||
use super::{check_ret, hwdevice::CudaDeviceContext};
|
||||
|
||||
pub struct HwFrameContext {
|
||||
_cuda_device_context: CudaDeviceContext,
|
||||
buffer: *mut ffmpeg::sys::AVBufferRef,
|
||||
_cuda_device_context: CudaDeviceContext,
|
||||
buffer: *mut ffmpeg::sys::AVBufferRef,
|
||||
}
|
||||
|
||||
impl HwFrameContext {
|
||||
fn new(cuda_device_context: CudaDeviceContext, buffer: *mut ffmpeg::sys::AVBufferRef) -> Self {
|
||||
Self { _cuda_device_context: cuda_device_context, buffer }
|
||||
}
|
||||
fn new(cuda_device_context: CudaDeviceContext, buffer: *mut ffmpeg::sys::AVBufferRef) -> Self {
|
||||
Self {
|
||||
_cuda_device_context: cuda_device_context,
|
||||
buffer,
|
||||
}
|
||||
}
|
||||
|
||||
// pub fn as_context_mut(&mut self) -> &mut ffmpeg::sys::AVHWFramesContext {
|
||||
// unsafe { &mut *((*self.buffer).data as *mut ffmpeg::sys::AVHWFramesContext) }
|
||||
// }
|
||||
// pub fn as_context_mut(&mut self) -> &mut ffmpeg::sys::AVHWFramesContext {
|
||||
// unsafe { &mut *((*self.buffer).data as *mut ffmpeg::sys::AVHWFramesContext) }
|
||||
// }
|
||||
|
||||
// pub fn as_context(&self) -> &ffmpeg::sys::AVHWFramesContext {
|
||||
// unsafe { &*((*self.buffer).data as *const ffmpeg::sys::AVHWFramesContext) }
|
||||
// }
|
||||
// pub fn as_context(&self) -> &ffmpeg::sys::AVHWFramesContext {
|
||||
// unsafe { &*((*self.buffer).data as *const ffmpeg::sys::AVHWFramesContext) }
|
||||
// }
|
||||
|
||||
pub fn as_raw_mut(&mut self) -> &mut ffmpeg::sys::AVBufferRef {
|
||||
unsafe { &mut *self.buffer }
|
||||
}
|
||||
pub fn as_raw_mut(&mut self) -> &mut ffmpeg::sys::AVBufferRef {
|
||||
unsafe { &mut *self.buffer }
|
||||
}
|
||||
|
||||
// pub fn as_raw(&self) -> &ffmpeg::sys::AVBufferRef {
|
||||
// unsafe { &*self.buffer }
|
||||
// }
|
||||
pub fn as_device_context_mut(&mut self) -> &mut ffmpeg::sys::AVBufferRef {
|
||||
unsafe {
|
||||
self._cuda_device_context.as_raw_mut()
|
||||
}
|
||||
}
|
||||
|
||||
/// call once to allocate frame
|
||||
pub fn get_buffer(&mut self, frame: &mut ffmpeg::frame::Video) -> Result<(), ffmpeg::Error> {
|
||||
unsafe {
|
||||
super::check_ret(ffmpeg::sys::av_hwframe_get_buffer(self.buffer, frame.as_mut_ptr(), 0))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// pub fn as_raw(&self) -> &ffmpeg::sys::AVBufferRef {
|
||||
// unsafe { &*self.buffer }
|
||||
// }
|
||||
}
|
||||
|
||||
unsafe impl Send for HwFrameContext { }
|
||||
unsafe impl Send for HwFrameContext {}
|
||||
|
||||
pub struct HwFrameContextBuilder {
|
||||
cuda_device_context: CudaDeviceContext,
|
||||
buffer: *mut ffmpeg::sys::AVBufferRef,
|
||||
cuda_device_context: CudaDeviceContext,
|
||||
buffer: *mut ffmpeg::sys::AVBufferRef,
|
||||
}
|
||||
|
||||
impl HwFrameContextBuilder {
|
||||
pub fn new(mut cuda_device_context: CudaDeviceContext) -> anyhow::Result<Self> {
|
||||
let buffer = unsafe { ffmpeg::sys::av_hwframe_ctx_alloc(cuda_device_context.as_raw_mut()) };
|
||||
if buffer.is_null() {
|
||||
return Err(anyhow::anyhow!("could not allocate a hwframe context"));
|
||||
}
|
||||
pub fn new(mut cuda_device_context: CudaDeviceContext) -> anyhow::Result<Self> {
|
||||
let buffer = unsafe { ffmpeg::sys::av_hwframe_ctx_alloc(cuda_device_context.as_raw_mut()) };
|
||||
if buffer.is_null() {
|
||||
return Err(anyhow::anyhow!("could not allocate a hwframe context"));
|
||||
}
|
||||
|
||||
Ok(Self { cuda_device_context, buffer })
|
||||
}
|
||||
Ok(Self {
|
||||
cuda_device_context,
|
||||
buffer,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn build(mut self) -> Result<HwFrameContext, ffmpeg::Error> {
|
||||
check_ret(unsafe { ffmpeg::sys::av_hwframe_ctx_init(self.buffer) })?;
|
||||
let result = Ok(HwFrameContext::new(self.cuda_device_context, self.buffer));
|
||||
self.buffer = null_mut();
|
||||
pub fn build(mut self) -> Result<HwFrameContext, ffmpeg::Error> {
|
||||
check_ret(unsafe { ffmpeg::sys::av_hwframe_ctx_init(self.buffer) })?;
|
||||
let result = Ok(HwFrameContext::new(self.cuda_device_context, self.buffer));
|
||||
self.buffer = null_mut();
|
||||
|
||||
result
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn set_width(mut self, width: u32) -> Self {
|
||||
self.as_frame_mut().width = width as i32;
|
||||
self
|
||||
}
|
||||
pub fn set_width(mut self, width: u32) -> Self {
|
||||
self.as_frame_mut().width = width as i32;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn set_height(mut self, height: u32) -> Self {
|
||||
self.as_frame_mut().height = height as i32;
|
||||
self
|
||||
}
|
||||
pub fn set_height(mut self, height: u32) -> Self {
|
||||
self.as_frame_mut().height = height as i32;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn set_sw_format(mut self, sw_format: Pixel) -> Self {
|
||||
self.as_frame_mut().sw_format = sw_format.into();
|
||||
self
|
||||
}
|
||||
pub fn set_sw_format(mut self, sw_format: Pixel) -> Self {
|
||||
self.as_frame_mut().sw_format = sw_format.into();
|
||||
self
|
||||
}
|
||||
|
||||
pub fn set_format(mut self, format: Pixel) -> Self {
|
||||
self.as_frame_mut().format = format.into();
|
||||
self
|
||||
}
|
||||
pub fn set_format(mut self, format: Pixel) -> Self {
|
||||
self.as_frame_mut().format = format.into();
|
||||
self
|
||||
}
|
||||
|
||||
pub fn as_frame_mut(&mut self) -> &mut ffmpeg::sys::AVHWFramesContext {
|
||||
unsafe { &mut *((*self.buffer).data as *mut ffmpeg::sys::AVHWFramesContext) }
|
||||
}
|
||||
pub fn as_frame_mut(&mut self) -> &mut ffmpeg::sys::AVHWFramesContext {
|
||||
unsafe { &mut *((*self.buffer).data as *mut ffmpeg::sys::AVHWFramesContext) }
|
||||
}
|
||||
|
||||
// pub fn as_frame(&self) -> &ffmpeg::sys::AVHWFramesContext {
|
||||
// unsafe { &*((*self.buffer).data as *const ffmpeg::sys::AVHWFramesContext) }
|
||||
// }
|
||||
// pub fn as_frame(&self) -> &ffmpeg::sys::AVHWFramesContext {
|
||||
// unsafe { &*((*self.buffer).data as *const ffmpeg::sys::AVHWFramesContext) }
|
||||
// }
|
||||
}
|
|
@ -9,6 +9,8 @@ pub mod hwframe;
|
|||
|
||||
pub mod encoder_thread;
|
||||
|
||||
pub mod cuda_gl;
|
||||
|
||||
// from hgaiser/moonshine
|
||||
pub fn check_ret(error_code: i32) -> Result<(), ffmpeg::Error> {
|
||||
if error_code != 0 {
|
||||
|
|
Loading…
Reference in a new issue