working gpu-only encode
This commit is contained in:
parent
9248fe91a9
commit
09142f9668
14 changed files with 920 additions and 277 deletions
1
server/Cargo.lock
generated
1
server/Cargo.lock
generated
|
@ -1414,6 +1414,7 @@ dependencies = [
|
||||||
"futures-util",
|
"futures-util",
|
||||||
"gl",
|
"gl",
|
||||||
"letsplay_gpu",
|
"letsplay_gpu",
|
||||||
|
"libloading",
|
||||||
"rand",
|
"rand",
|
||||||
"retro_frontend",
|
"retro_frontend",
|
||||||
"serde",
|
"serde",
|
||||||
|
|
|
@ -32,6 +32,7 @@ tracing = "0.1.40"
|
||||||
tracing-subscriber = "0.3.18"
|
tracing-subscriber = "0.3.18"
|
||||||
xkeysym = "0.2.1"
|
xkeysym = "0.2.1"
|
||||||
async-trait = "0.1.83"
|
async-trait = "0.1.83"
|
||||||
|
libloading = "0.8.5"
|
||||||
|
|
||||||
|
|
||||||
[patch.crates-io]
|
[patch.crates-io]
|
||||||
|
|
|
@ -5,10 +5,15 @@ mod video;
|
||||||
|
|
||||||
mod transport;
|
mod transport;
|
||||||
|
|
||||||
|
use anyhow::Context;
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
|
|
||||||
|
use cudarc::driver::CudaDevice;
|
||||||
|
use letsplay_gpu::egl_helpers::DeviceContext;
|
||||||
use retro_thread::{spawn_retro_thread, RetroEvent};
|
use retro_thread::{spawn_retro_thread, RetroEvent};
|
||||||
|
use transport::websocket::WebsocketTransport;
|
||||||
use transport::{Transport, TransportReciever};
|
use transport::{Transport, TransportReciever};
|
||||||
|
use video::cuda_gl::safe::GraphicsResource;
|
||||||
use video::encoder_thread::EncodeThreadInput;
|
use video::encoder_thread::EncodeThreadInput;
|
||||||
use video::{encoder_thread, ffmpeg};
|
use video::{encoder_thread, ffmpeg};
|
||||||
|
|
||||||
|
@ -44,19 +49,19 @@ enum WsMessage {
|
||||||
Json(String),
|
Json(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
struct AppState {
|
struct AppState<T: Transport> {
|
||||||
encoder_tx: Arc<TokioMutex<mpsc::Sender<EncodeThreadInput>>>,
|
encoder_tx: Arc<TokioMutex<mpsc::Sender<EncodeThreadInput>>>,
|
||||||
inputs: Arc<TokioMutex<Vec<u32>>>,
|
inputs: Arc<TokioMutex<Vec<u32>>>,
|
||||||
|
|
||||||
transport: Arc<crate::transport::websocket::WebsocketTransport>,
|
transport: Arc<T>,
|
||||||
connection_count: TokioMutex<usize>,
|
connection_count: TokioMutex<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AppState {
|
impl<T> AppState<T>
|
||||||
fn new(
|
where
|
||||||
encoder_tx: mpsc::Sender<EncodeThreadInput>,
|
T: Transport + Send + Sync + 'static,
|
||||||
transport: Arc<crate::transport::websocket::WebsocketTransport>,
|
{
|
||||||
) -> Self {
|
fn new(encoder_tx: mpsc::Sender<EncodeThreadInput>, transport: Arc<T>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
encoder_tx: Arc::new(TokioMutex::new(encoder_tx)),
|
encoder_tx: Arc::new(TokioMutex::new(encoder_tx)),
|
||||||
inputs: Arc::new(TokioMutex::new(Vec::new())),
|
inputs: Arc::new(TokioMutex::new(Vec::new())),
|
||||||
|
@ -67,7 +72,10 @@ impl AppState {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl TransportReciever for AppState {
|
impl<T> TransportReciever for AppState<T>
|
||||||
|
where
|
||||||
|
T: Transport + Send + Sync + 'static,
|
||||||
|
{
|
||||||
async fn on_connect(&self, username: &String) -> anyhow::Result<()> {
|
async fn on_connect(&self, username: &String) -> anyhow::Result<()> {
|
||||||
println!("{username} joined!");
|
println!("{username} joined!");
|
||||||
|
|
||||||
|
@ -105,7 +113,6 @@ impl TransportReciever for AppState {
|
||||||
"msg": json["msg"].as_str().unwrap()
|
"msg": json["msg"].as_str().unwrap()
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
self.transport
|
self.transport
|
||||||
.broadcast_message(transport::TransportMessage::Text(
|
.broadcast_message(transport::TransportMessage::Text(
|
||||||
serde_json::to_string(&send).expect("oh well"),
|
serde_json::to_string(&send).expect("oh well"),
|
||||||
|
@ -177,6 +184,23 @@ impl TransportReciever for AppState {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const FLIP_SRC: &str = "
|
||||||
|
extern \"C\" __global__ void flip_opengl(
|
||||||
|
const unsigned* pSrc,
|
||||||
|
unsigned* pDest,
|
||||||
|
unsigned width,
|
||||||
|
unsigned height
|
||||||
|
) {
|
||||||
|
const unsigned x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
const unsigned y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
|
if (x < width && y < height) {
|
||||||
|
// let reversed_y = (size.height - 1) - y;
|
||||||
|
unsigned reversed_y = (height - 1) - y;
|
||||||
|
pDest[y * width + x] = pSrc[reversed_y * width + x];
|
||||||
|
}
|
||||||
|
}";
|
||||||
|
|
||||||
#[tokio::main(flavor = "multi_thread", worker_threads = 2)]
|
#[tokio::main(flavor = "multi_thread", worker_threads = 2)]
|
||||||
async fn main() -> anyhow::Result<()> {
|
async fn main() -> anyhow::Result<()> {
|
||||||
// Setup a tracing subscriber
|
// Setup a tracing subscriber
|
||||||
|
@ -186,36 +210,56 @@ async fn main() -> anyhow::Result<()> {
|
||||||
|
|
||||||
tracing::subscriber::set_global_default(subscriber).unwrap();
|
tracing::subscriber::set_global_default(subscriber).unwrap();
|
||||||
|
|
||||||
let surface = Arc::new(Mutex::new(surface::Surface::new()));
|
|
||||||
|
|
||||||
// H.264 encoder related
|
// H.264 encoder related
|
||||||
let frame: Arc<Mutex<Option<ffmpeg::frame::Video>>> = Arc::new(Mutex::new(None));
|
let device = CudaDevice::new(0)?;
|
||||||
let (mut encoder_rx, encoder_tx) = encoder_thread::encoder_thread_spawn(&frame);
|
|
||||||
|
|
||||||
let transport = Arc::new(crate::transport::websocket::WebsocketTransport::new());
|
let ptx = cudarc::nvrtc::compile_ptx(&FLIP_SRC).with_context(|| "compiling support kernel")?;
|
||||||
|
|
||||||
|
println!("compiled kernel");
|
||||||
|
|
||||||
|
// pop it in
|
||||||
|
device.load_ptx(ptx, "module", &["flip_opengl"])?;
|
||||||
|
|
||||||
|
let egl_ctx = Arc::new(Mutex::new(DeviceContext::new(0)));
|
||||||
|
|
||||||
|
let resource = Arc::new(Mutex::new(GraphicsResource::new(&device)));
|
||||||
|
|
||||||
|
let (mut encoder_rx, encoder_tx) = encoder_thread::encoder_thread_spawn_hwframe(
|
||||||
|
&device.clone(),
|
||||||
|
&resource.clone(),
|
||||||
|
&egl_ctx.clone(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let transport = Arc::new(WebsocketTransport::new());
|
||||||
let state = Arc::new(AppState::new(encoder_tx, transport.clone()));
|
let state = Arc::new(AppState::new(encoder_tx, transport.clone()));
|
||||||
|
|
||||||
let (mut event_rx, event_in_tx) = spawn_retro_thread(surface.clone());
|
let (mut retro_event_rx, retro_input_event_tx) =
|
||||||
|
spawn_retro_thread(egl_ctx.clone(), resource.clone());
|
||||||
|
|
||||||
let state_clone = state.clone();
|
let state_clone = state.clone();
|
||||||
|
|
||||||
let transport_clone = transport.clone();
|
|
||||||
|
|
||||||
// retro event handler. drives the encoder thread too
|
// retro event handler. drives the encoder thread too
|
||||||
let _ = std::thread::Builder::new()
|
let _ = std::thread::Builder::new()
|
||||||
.name("retro_event_rx".into())
|
.name("retro_event_rx".into())
|
||||||
.spawn(move || {
|
.spawn(move || {
|
||||||
let surface_clone = surface.clone();
|
// load game
|
||||||
let frame_clone = frame.clone();
|
let _ = retro_input_event_tx.blocking_send(retro_thread::RetroInEvent::LoadCore(
|
||||||
|
"cores/swanstation_libretro.so".into(),
|
||||||
|
));
|
||||||
|
let _ = retro_input_event_tx.blocking_send(retro_thread::RetroInEvent::LoadGame(
|
||||||
|
"roms/nmv1_us.cue".into(),
|
||||||
|
));
|
||||||
|
|
||||||
// start the libretro thread looping now that we're alive
|
// start the libretro thread looping now that we're alive
|
||||||
let _ = event_in_tx.blocking_send(retro_thread::RetroInEvent::Start);
|
let _ = retro_input_event_tx.blocking_send(retro_thread::RetroInEvent::Start);
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
match event_rx.try_recv() {
|
match retro_event_rx.blocking_recv() {
|
||||||
Ok(msg) => match msg {
|
Some(msg) => match msg {
|
||||||
RetroEvent::Frame => {
|
RetroEvent::Frame => {
|
||||||
|
/*
|
||||||
|
let mut same = true;
|
||||||
|
|
||||||
{
|
{
|
||||||
let mut frame_locked = frame.lock().expect(
|
let mut frame_locked = frame.lock().expect(
|
||||||
"Couldn't lock frame on our end. Did the encoder thread panic?",
|
"Couldn't lock frame on our end. Did the encoder thread panic?",
|
||||||
|
@ -229,6 +273,8 @@ async fn main() -> anyhow::Result<()> {
|
||||||
let mut surf = surface_clone.lock().expect(
|
let mut surf = surface_clone.lock().expect(
|
||||||
"locking the VNC surface to paint it to the ffmpeg frame failed",
|
"locking the VNC surface to paint it to the ffmpeg frame failed",
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
let surf_buf = surf.get_buffer();
|
let surf_buf = surf.get_buffer();
|
||||||
|
|
||||||
let buf_ptr =
|
let buf_ptr =
|
||||||
|
@ -247,12 +293,27 @@ async fn main() -> anyhow::Result<()> {
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// If any line differs then the frame isn't the same and should be encoded.
|
||||||
|
if &surf_buf[line_stride..line_stride + width as usize]
|
||||||
|
!= dest_line_slice
|
||||||
|
{
|
||||||
|
same = false;
|
||||||
|
}
|
||||||
|
|
||||||
dest_line_slice.copy_from_slice(
|
dest_line_slice.copy_from_slice(
|
||||||
&surf_buf[line_stride..line_stride + width as usize],
|
&surf_buf[line_stride..line_stride + width as usize],
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !same {
|
||||||
|
let _ = state_clone
|
||||||
|
.encoder_tx
|
||||||
|
.blocking_lock()
|
||||||
|
.blocking_send(encoder_thread::EncodeThreadInput::SendFrame);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
let _ = state_clone
|
let _ = state_clone
|
||||||
.encoder_tx
|
.encoder_tx
|
||||||
.blocking_lock()
|
.blocking_lock()
|
||||||
|
@ -261,6 +322,12 @@ async fn main() -> anyhow::Result<()> {
|
||||||
|
|
||||||
RetroEvent::Resize { size } => {
|
RetroEvent::Resize { size } => {
|
||||||
// make a new frame for the encoder
|
// make a new frame for the encoder
|
||||||
|
|
||||||
|
let _ = state_clone.encoder_tx.blocking_lock().blocking_send(
|
||||||
|
encoder_thread::EncodeThreadInput::Init { size: size.clone() },
|
||||||
|
);
|
||||||
|
|
||||||
|
/*
|
||||||
{
|
{
|
||||||
let mut lk_frame = frame_clone.lock().expect("Couldn't lock frame");
|
let mut lk_frame = frame_clone.lock().expect("Couldn't lock frame");
|
||||||
|
|
||||||
|
@ -270,30 +337,21 @@ async fn main() -> anyhow::Result<()> {
|
||||||
size.clone().height,
|
size.clone().height,
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
let _ = state_clone.encoder_tx.blocking_lock().blocking_send(
|
|
||||||
encoder_thread::EncodeThreadInput::Init { size: size.clone() },
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
RetroEvent::WantInputs { tx } => {
|
RetroEvent::WantInputs { tx } => {
|
||||||
let inputs = state_clone.inputs.blocking_lock();
|
let inputs = state_clone.inputs.blocking_lock();
|
||||||
//tracing::info!("giving inputs {:?}", inputs);
|
|
||||||
tx.send(inputs.clone()).expect("FUCK");
|
tx.send(inputs.clone()).expect("FUCK");
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
Err(TryRecvError::Disconnected) => break,
|
None => break,
|
||||||
Err(TryRecvError::Empty) => {}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
match encoder_rx.try_recv() {
|
match encoder_rx.try_recv() {
|
||||||
Ok(msg) => match msg {
|
Ok(msg) => match msg {
|
||||||
encoder_thread::EncodeThreadOutput::Frame { packet } => {
|
encoder_thread::EncodeThreadOutput::Frame { packet } => {
|
||||||
// let _ = state_clone
|
|
||||||
// .websocket_broadcast_tx
|
|
||||||
// .send(WsMessage::VideoPacket { packet });
|
|
||||||
|
|
||||||
// :(
|
// :(
|
||||||
let packet_data = {
|
let packet_data = {
|
||||||
let slice = packet.data().expect(
|
let slice = packet.data().expect(
|
||||||
|
@ -301,7 +359,9 @@ async fn main() -> anyhow::Result<()> {
|
||||||
);
|
);
|
||||||
slice.to_vec()
|
slice.to_vec()
|
||||||
};
|
};
|
||||||
let _ = transport_clone.broadcast_message(transport::TransportMessage::Binary(packet_data));
|
let _ = state_clone.transport.broadcast_message(
|
||||||
|
transport::TransportMessage::Binary(packet_data),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
Err(TryRecvError::Empty) => {}
|
Err(TryRecvError::Empty) => {}
|
||||||
|
|
|
@ -17,7 +17,7 @@ use retro_frontend::{
|
||||||
use gpu::egl_helpers::DeviceContext;
|
use gpu::egl_helpers::DeviceContext;
|
||||||
use letsplay_gpu as gpu;
|
use letsplay_gpu as gpu;
|
||||||
|
|
||||||
use crate::{surface::Surface, types::Size};
|
use crate::{surface::Surface, types::Size, video::cuda_gl::safe::GraphicsResource};
|
||||||
|
|
||||||
/// Called by OpenGL. We use this to dump errors.
|
/// Called by OpenGL. We use this to dump errors.
|
||||||
extern "system" fn opengl_message_callback(
|
extern "system" fn opengl_message_callback(
|
||||||
|
@ -49,30 +49,38 @@ pub struct RetroState {
|
||||||
pad: RetroPad,
|
pad: RetroPad,
|
||||||
|
|
||||||
// EGL state
|
// EGL state
|
||||||
egl_context: Option<DeviceContext>,
|
egl_context: Arc<Mutex<DeviceContext>>,
|
||||||
|
|
||||||
/// Locked framebuffer.
|
/// Locked framebuffer.
|
||||||
framebuffer: Arc<Mutex<Surface>>,
|
software_framebuffer: Surface,
|
||||||
|
|
||||||
/// OpenGL FBO
|
/// OpenGL FBO
|
||||||
gl_framebuffer: gpu::GlFramebuffer,
|
gl_framebuffer: gpu::GlFramebuffer,
|
||||||
|
|
||||||
/// Cached readback buffer.
|
gl_rendering: bool,
|
||||||
readback_buffer: Surface,
|
|
||||||
|
cuda_resource: Arc<Mutex<GraphicsResource>>,
|
||||||
|
|
||||||
event_tx: mpsc::Sender<RetroEvent>,
|
event_tx: mpsc::Sender<RetroEvent>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RetroState {
|
impl RetroState {
|
||||||
pub fn new(framebuffer: Arc<Mutex<Surface>>, event_tx: mpsc::Sender<RetroEvent>) -> Box<Self> {
|
pub fn new(
|
||||||
|
device_context: Arc<Mutex<DeviceContext>>,
|
||||||
|
resource: Arc<Mutex<GraphicsResource>>,
|
||||||
|
event_tx: mpsc::Sender<RetroEvent>,
|
||||||
|
) -> Box<Self> {
|
||||||
let mut boxed = Box::new(Self {
|
let mut boxed = Box::new(Self {
|
||||||
frontend: None,
|
frontend: None,
|
||||||
pad: RetroPad::new(),
|
pad: RetroPad::new(),
|
||||||
|
|
||||||
egl_context: None,
|
egl_context: device_context.clone(),
|
||||||
framebuffer,
|
software_framebuffer: Surface::new(),
|
||||||
gl_framebuffer: gpu::GlFramebuffer::new(),
|
gl_framebuffer: gpu::GlFramebuffer::new(),
|
||||||
readback_buffer: Surface::new(),
|
gl_rendering: false,
|
||||||
|
|
||||||
|
cuda_resource: resource.clone(),
|
||||||
|
|
||||||
event_tx,
|
event_tx,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -127,14 +135,29 @@ impl RetroState {
|
||||||
|
|
||||||
/// Initalizes the headless EGL context used for OpenGL rendering.
|
/// Initalizes the headless EGL context used for OpenGL rendering.
|
||||||
fn hw_gl_egl_init(&mut self) {
|
fn hw_gl_egl_init(&mut self) {
|
||||||
self.egl_context = Some(DeviceContext::new(0));
|
self.egl_context.lock().expect("piss").make_current();
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
// Load OpenGL functions using the EGL loader.
|
||||||
|
gl::load_with(|s| {
|
||||||
|
let str = std::ffi::CString::new(s).expect("gl::load_with fail");
|
||||||
|
std::mem::transmute(gpu::egl::GetProcAddress(str.as_ptr()))
|
||||||
|
});
|
||||||
|
|
||||||
|
// set OpenGL debug message callback
|
||||||
|
gl::Enable(gl::DEBUG_OUTPUT);
|
||||||
|
gl::DebugMessageCallback(Some(opengl_message_callback), std::ptr::null());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Destroys OpenGL resources and the EGL context.
|
/// Destroys OpenGL resources and the EGL context.
|
||||||
fn hw_gl_destroy(&mut self) {
|
fn hw_gl_destroy(&mut self) {
|
||||||
if self.egl_context.is_some() {
|
self.gl_framebuffer.destroy();
|
||||||
self.gl_framebuffer.destroy();
|
|
||||||
self.egl_context.take().unwrap().destroy()
|
{
|
||||||
|
let mut locked = self.egl_context.lock().expect("piss");
|
||||||
|
locked.release();
|
||||||
|
locked.destroy();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -143,59 +166,39 @@ impl RetroState {
|
||||||
let step_ms = (1.0 / av_info.timing.fps) * 1000.;
|
let step_ms = (1.0 / av_info.timing.fps) * 1000.;
|
||||||
let step_duration = Duration::from_millis(step_ms as u64);
|
let step_duration = Duration::from_millis(step_ms as u64);
|
||||||
|
|
||||||
self.get_frontend().run_frame();
|
{
|
||||||
|
let egl = (&mut self.egl_context).clone();
|
||||||
|
let locked_egl = egl.lock().expect("fuck YOU");
|
||||||
|
|
||||||
|
locked_egl.make_current();
|
||||||
|
self.get_frontend().run_frame();
|
||||||
|
locked_egl.release();
|
||||||
|
}
|
||||||
|
|
||||||
std::thread::sleep(step_duration);
|
std::thread::sleep(step_duration);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Bleh, I don't like this is an associated fn, but whatever
|
/// Bleh, I don't like this is an associated fn, but whatever
|
||||||
fn update_impl(framebuffer: Arc<Mutex<Surface>>, slice: &[u32], pitch: u32, from_opengl: bool) {
|
fn update_software_framebuffer(framebuffer: &mut Surface, slice: &[u32], pitch: u32) {
|
||||||
let mut framebuffer_locked = framebuffer.lock().expect("could not lock framebuffer");
|
let size = framebuffer.size.clone();
|
||||||
|
let buffer = framebuffer.get_buffer();
|
||||||
let size = framebuffer_locked.size.clone();
|
|
||||||
let buffer = framebuffer_locked.get_buffer();
|
|
||||||
|
|
||||||
let has_disconnected_pitch = pitch != size.width as u32;
|
let has_disconnected_pitch = pitch != size.width as u32;
|
||||||
|
|
||||||
// If this frame came from OpenGL we need to flip the image around
|
for y in 0..size.height {
|
||||||
// so it is right side up (from our perspective).
|
let src_line_off = (y as u32 * pitch) as usize;
|
||||||
//
|
let mut dest_line_off = (y as u32 * size.width) as usize;
|
||||||
// We do this in a bit of a convoluted way (but it's also zero allocation!)
|
|
||||||
if from_opengl {
|
|
||||||
for y in (0..size.height).rev() {
|
|
||||||
let src_line_off = (y as u32 * pitch) as usize;
|
|
||||||
let src_slice = &slice[src_line_off..src_line_off + size.width as usize];
|
|
||||||
|
|
||||||
let reversed_y = (size.height - 1) - y;
|
// copy only
|
||||||
|
if has_disconnected_pitch {
|
||||||
let src_line_off = (reversed_y as u32 * pitch) as usize;
|
dest_line_off = (y * pitch) as usize;
|
||||||
let mut dest_line_off = src_line_off;
|
|
||||||
|
|
||||||
// copy only
|
|
||||||
if has_disconnected_pitch {
|
|
||||||
dest_line_off = (reversed_y * pitch.min(size.width)) as usize;
|
|
||||||
}
|
|
||||||
|
|
||||||
let dest_slice = &mut buffer[dest_line_off..dest_line_off + size.width as usize];
|
|
||||||
|
|
||||||
dest_slice.copy_from_slice(src_slice);
|
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
for y in 0..size.height {
|
|
||||||
let src_line_off = (y as u32 * pitch) as usize;
|
|
||||||
let mut dest_line_off = src_line_off;
|
|
||||||
|
|
||||||
// copy only
|
// Create slices repressenting each part
|
||||||
if has_disconnected_pitch {
|
let src_slice = &slice[src_line_off..src_line_off + size.width as usize];
|
||||||
dest_line_off = (y * size.width) as usize;
|
let dest_slice = &mut buffer[dest_line_off..dest_line_off + size.width as usize];
|
||||||
}
|
|
||||||
|
|
||||||
// Create slices repressenting each part
|
dest_slice.copy_from_slice(src_slice);
|
||||||
let src_slice = &slice[src_line_off..src_line_off + size.width as usize];
|
|
||||||
let dest_slice = &mut framebuffer_locked.get_buffer()
|
|
||||||
[dest_line_off..dest_line_off + size.width as usize];
|
|
||||||
|
|
||||||
dest_slice.copy_from_slice(src_slice);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -204,21 +207,32 @@ impl FrontendInterface for RetroState {
|
||||||
fn video_resize(&mut self, width: u32, height: u32) {
|
fn video_resize(&mut self, width: u32, height: u32) {
|
||||||
tracing::info!("Resized to {width}x{height}");
|
tracing::info!("Resized to {width}x{height}");
|
||||||
|
|
||||||
if self.egl_context.is_some() {
|
self.gl_framebuffer.resize(width, height);
|
||||||
self.gl_framebuffer.resize(width, height);
|
let raw = self.gl_framebuffer.as_raw();
|
||||||
let raw = self.gl_framebuffer.as_raw();
|
|
||||||
|
|
||||||
// Notify the frontend layer about the new FBO ID
|
// Notify the frontend layer about the new FBO ID
|
||||||
self.get_frontend().set_gl_fbo(raw);
|
self.get_frontend().set_gl_fbo(raw);
|
||||||
|
|
||||||
// Resize the readback buffer
|
if !self.gl_rendering {
|
||||||
self.readback_buffer.resize(Size { width, height });
|
self.software_framebuffer.resize(Size { width, height });
|
||||||
}
|
}
|
||||||
|
|
||||||
self.framebuffer
|
unsafe {
|
||||||
.lock()
|
}
|
||||||
.expect("its over?")
|
|
||||||
.resize(Size { width, height });
|
// map to cuda
|
||||||
|
{
|
||||||
|
let mut locked = self
|
||||||
|
.cuda_resource
|
||||||
|
.lock()
|
||||||
|
.expect("YOU MOTHERFUCKER PISS GO COUNT YOUR DICK");
|
||||||
|
|
||||||
|
locked.device().bind_to_thread().expect("fuck");
|
||||||
|
|
||||||
|
locked
|
||||||
|
.register(self.gl_framebuffer.texture_id(), gl::TEXTURE_2D)
|
||||||
|
.expect("you fucking asswater");
|
||||||
|
}
|
||||||
|
|
||||||
let _ = self.event_tx.blocking_send(RetroEvent::Resize {
|
let _ = self.event_tx.blocking_send(RetroEvent::Resize {
|
||||||
size: Size { width, height },
|
size: Size { width, height },
|
||||||
|
@ -226,39 +240,29 @@ impl FrontendInterface for RetroState {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn video_update(&mut self, slice: &[u32], pitch: u32) {
|
fn video_update(&mut self, slice: &[u32], pitch: u32) {
|
||||||
Self::update_impl(self.framebuffer.clone(), slice, pitch, false);
|
Self::update_software_framebuffer(&mut self.software_framebuffer, slice, pitch);
|
||||||
|
|
||||||
|
let size = self.software_framebuffer.size.clone();
|
||||||
|
|
||||||
|
// upload texture to GPU
|
||||||
|
unsafe {
|
||||||
|
gl::TexImage2D(
|
||||||
|
gl::TEXTURE_2D,
|
||||||
|
0,
|
||||||
|
gl::RGBA as i32,
|
||||||
|
size.width as i32,
|
||||||
|
size.height as i32,
|
||||||
|
0,
|
||||||
|
gl::RGBA_INTEGER,
|
||||||
|
gl::UNSIGNED_INT_8_8_8_8,
|
||||||
|
self.software_framebuffer.get_buffer().as_mut_ptr() as *const _,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
let _ = self.event_tx.blocking_send(RetroEvent::Frame);
|
let _ = self.event_tx.blocking_send(RetroEvent::Frame);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn video_update_gl(&mut self) {
|
fn video_update_gl(&mut self) {
|
||||||
let dimensions = self.get_frontend().get_size();
|
|
||||||
|
|
||||||
// Read back the framebuffer
|
|
||||||
let slice = {
|
|
||||||
// lame, doesn't do bgra conversion for us
|
|
||||||
//self.gl_framebuffer.read_pixels(
|
|
||||||
// &mut self.readback_buffer.get_buffer()[..],
|
|
||||||
// dimensions.0,
|
|
||||||
// dimensions.1,
|
|
||||||
//);
|
|
||||||
|
|
||||||
unsafe {
|
|
||||||
let _bind = self.gl_framebuffer.bind();
|
|
||||||
gl::ReadPixels(
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
dimensions.0 as i32,
|
|
||||||
dimensions.1 as i32,
|
|
||||||
gl::BGRA,
|
|
||||||
gl::UNSIGNED_BYTE,
|
|
||||||
(&mut self.readback_buffer.get_buffer()).as_mut_ptr() as *mut std::ffi::c_void,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
self.readback_buffer.get_buffer()
|
|
||||||
};
|
|
||||||
|
|
||||||
Self::update_impl(self.framebuffer.clone(), slice, dimensions.0, true);
|
|
||||||
let _ = self.event_tx.blocking_send(RetroEvent::Frame);
|
let _ = self.event_tx.blocking_send(RetroEvent::Frame);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -270,9 +274,13 @@ impl FrontendInterface for RetroState {
|
||||||
let (tx, rx) = oneshot::channel();
|
let (tx, rx) = oneshot::channel();
|
||||||
let _ = self.event_tx.blocking_send(RetroEvent::WantInputs { tx });
|
let _ = self.event_tx.blocking_send(RetroEvent::WantInputs { tx });
|
||||||
|
|
||||||
let inputs = rx.blocking_recv().expect("what the FUCK are you doing");
|
let inputs = rx.blocking_recv();
|
||||||
|
|
||||||
for key in &inputs {
|
if inputs.is_err() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for key in &inputs.unwrap() {
|
||||||
use xkeysym::key as Key;
|
use xkeysym::key as Key;
|
||||||
|
|
||||||
match *key {
|
match *key {
|
||||||
|
@ -349,38 +357,23 @@ impl FrontendInterface for RetroState {
|
||||||
|
|
||||||
fn hw_gl_init(&mut self) -> Option<HwGlInitData> {
|
fn hw_gl_init(&mut self) -> Option<HwGlInitData> {
|
||||||
// Only create a new EGL/OpenGL context if we have to.
|
// Only create a new EGL/OpenGL context if we have to.
|
||||||
if self.egl_context.is_none() {
|
let context = self.egl_context.lock().expect("fuck you!");
|
||||||
// Initalize EGL
|
let extensions = gpu::egl_helpers::get_extensions(context.get_display());
|
||||||
self.hw_gl_egl_init();
|
|
||||||
|
|
||||||
let context = self.egl_context.as_ref().unwrap();
|
tracing::debug!("Supported EGL extensions: {:?}", extensions);
|
||||||
let extensions = gpu::egl_helpers::get_extensions(context.get_display());
|
|
||||||
|
|
||||||
tracing::debug!("Supported EGL extensions: {:?}", extensions);
|
// Check for EGL_KHR_get_all_proc_addresses, so we can use eglGetProcAddress() to load OpenGL functions
|
||||||
|
if !extensions.contains(&"EGL_KHR_get_all_proc_addresses".into()) {
|
||||||
// Check for EGL_KHR_get_all_proc_addresses, so we can use eglGetProcAddress() to load OpenGL functions
|
tracing::error!("Your graphics driver doesn't support the EGL_KHR_get_all_proc_addresses extension.");
|
||||||
if !extensions.contains(&"EGL_KHR_get_all_proc_addresses".into()) {
|
tracing::error!("Retrodemo currently needs this to load OpenGL functions. HW rendering will be disabled.");
|
||||||
tracing::error!("Your graphics driver doesn't support the EGL_KHR_get_all_proc_addresses extension.");
|
return None;
|
||||||
tracing::error!("Retrodemo currently needs this to load OpenGL functions. HW rendering will be disabled.");
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsafe {
|
|
||||||
// Load OpenGL functions using the EGL loader.
|
|
||||||
gl::load_with(|s| {
|
|
||||||
let str = std::ffi::CString::new(s).expect("gl::load_with fail");
|
|
||||||
std::mem::transmute(gpu::egl::GetProcAddress(str.as_ptr()))
|
|
||||||
});
|
|
||||||
|
|
||||||
// set OpenGL debug message callback
|
|
||||||
gl::Enable(gl::DEBUG_OUTPUT);
|
|
||||||
gl::DebugMessageCallback(Some(opengl_message_callback), std::ptr::null());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create the initial FBO for the core to render to
|
// If we get here, we can be certain that we're no longer
|
||||||
let dimensions = self.get_frontend().get_size();
|
// going to use software rendering. Therefore, we can
|
||||||
self.gl_framebuffer.resize(dimensions.0, dimensions.1);
|
// clear (free) the software framebuffer, since it won't be of use to us anymore.
|
||||||
|
self.software_framebuffer.clear();
|
||||||
|
self.gl_rendering = true;
|
||||||
|
|
||||||
return Some(HwGlInitData {
|
return Some(HwGlInitData {
|
||||||
get_proc_address: gpu::egl::GetProcAddress as *mut std::ffi::c_void,
|
get_proc_address: gpu::egl::GetProcAddress as *mut std::ffi::c_void,
|
||||||
|
@ -403,25 +396,34 @@ pub enum RetroEvent {
|
||||||
|
|
||||||
pub enum RetroInEvent {
|
pub enum RetroInEvent {
|
||||||
Start,
|
Start,
|
||||||
|
LoadCore(std::path::PathBuf),
|
||||||
|
LoadGame(std::path::PathBuf),
|
||||||
}
|
}
|
||||||
|
|
||||||
fn retro_thread_main(
|
fn retro_thread_main(
|
||||||
surface: Arc<Mutex<Surface>>,
|
context: &Arc<Mutex<DeviceContext>>,
|
||||||
|
resource: &Arc<Mutex<GraphicsResource>>,
|
||||||
event_tx: mpsc::Sender<RetroEvent>,
|
event_tx: mpsc::Sender<RetroEvent>,
|
||||||
mut event_rx: mpsc::Receiver<RetroInEvent>,
|
mut event_rx: mpsc::Receiver<RetroInEvent>,
|
||||||
) {
|
) {
|
||||||
let mut app = RetroState::new(surface, event_tx);
|
let mut app = RetroState::new(context.clone(), resource.clone(), event_tx);
|
||||||
|
|
||||||
app.load_core("cores/swanstation_libretro.so")
|
// do EGL init first
|
||||||
.expect("failed to load core");
|
app.hw_gl_egl_init();
|
||||||
app.load_game("roms/merged/nmv3/us/nmv3_us.cue") //merged/nmv1/us/nmv1_us.cue
|
|
||||||
.expect("failed to load game");
|
|
||||||
|
|
||||||
// sync
|
// pre-setup
|
||||||
loop {
|
loop {
|
||||||
match event_rx.blocking_recv() {
|
match event_rx.blocking_recv() {
|
||||||
None => return (),
|
None => return (),
|
||||||
Some(msg) => match msg {
|
Some(msg) => match msg {
|
||||||
|
RetroInEvent::LoadCore(path) => {
|
||||||
|
app.load_core(path).expect("Failed to load core.");
|
||||||
|
}
|
||||||
|
|
||||||
|
RetroInEvent::LoadGame(path) => {
|
||||||
|
app.load_game(path).expect("Failed to load game!");
|
||||||
|
}
|
||||||
|
|
||||||
RetroInEvent::Start => break,
|
RetroInEvent::Start => break,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -435,18 +437,23 @@ fn retro_thread_main(
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn spawn_retro_thread(
|
pub fn spawn_retro_thread(
|
||||||
surface: Arc<Mutex<Surface>>,
|
context: Arc<Mutex<DeviceContext>>,
|
||||||
|
resource: Arc<Mutex<GraphicsResource>>,
|
||||||
) -> (mpsc::Receiver<RetroEvent>, mpsc::Sender<RetroInEvent>) {
|
) -> (mpsc::Receiver<RetroEvent>, mpsc::Sender<RetroInEvent>) {
|
||||||
let (event_tx, event_rx) = mpsc::channel(8);
|
// essentially semaphores
|
||||||
let (event_in_tx, event_in_rx) = mpsc::channel(8);
|
let (event_tx, event_rx) = mpsc::channel(1);
|
||||||
let fb_clone = surface.clone();
|
let (event_in_tx, event_in_rx) = mpsc::channel(1);
|
||||||
|
|
||||||
|
let cloned = resource.clone();
|
||||||
|
let ctxcloned = context.clone();
|
||||||
|
|
||||||
// discard the join handle
|
// discard the join handle
|
||||||
let _ = std::thread::Builder::new()
|
let _ = std::thread::Builder::new()
|
||||||
.name("retro_game".into())
|
.name("retro_game".into())
|
||||||
.spawn(move || {
|
.spawn(move || {
|
||||||
retro_thread_main(fb_clone, event_tx, event_in_rx);
|
retro_thread_main(&ctxcloned, &cloned, event_tx, event_in_rx);
|
||||||
}).expect("failed to spawn the game thread");
|
})
|
||||||
|
.expect("failed to spawn the game thread");
|
||||||
|
|
||||||
(event_rx, event_in_tx)
|
(event_rx, event_in_tx)
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,7 +21,7 @@ pub fn alloc_boxed_slice<T: Sized>(len: usize) -> Box<[T]> {
|
||||||
unsafe { Box::from_raw(slice) }
|
unsafe { Box::from_raw(slice) }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A BGRA-format surface.
|
/// A BGRA (or RGBA, I'm not your dad. There are funtions that assume the former though) format surface.
|
||||||
pub struct Surface {
|
pub struct Surface {
|
||||||
buffer: Option<Box<[u32]>>,
|
buffer: Option<Box<[u32]>>,
|
||||||
pub size: Size,
|
pub size: Size,
|
||||||
|
@ -38,10 +38,18 @@ impl Surface {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn clear(&mut self) {
|
||||||
|
self.buffer = None;
|
||||||
|
self.size = Size {
|
||||||
|
width: 0,
|
||||||
|
height: 0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
pub fn resize(&mut self, size: Size) {
|
pub fn resize(&mut self, size: Size) {
|
||||||
self.size = size;
|
self.size = size;
|
||||||
|
|
||||||
self.buffer = Some(alloc_boxed_slice(self.size.linear()));
|
self.buffer = Some(alloc_boxed_slice(self.size.linear()));
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_buffer(&mut self) -> &mut [u32] {
|
pub fn get_buffer(&mut self) -> &mut [u32] {
|
||||||
|
@ -53,15 +61,14 @@ impl Surface {
|
||||||
pub fn blit_buffer(&mut self, src_at: Rect, data: &[u32]) {
|
pub fn blit_buffer(&mut self, src_at: Rect, data: &[u32]) {
|
||||||
let mut off = 0;
|
let mut off = 0;
|
||||||
|
|
||||||
let buf = self.buffer.as_mut().unwrap();
|
let buf = self.buffer.as_mut().unwrap();
|
||||||
let buf_slice = &mut *buf;
|
let buf_slice = &mut *buf;
|
||||||
|
|
||||||
for y in src_at.y..src_at.y + src_at.height {
|
for y in src_at.y..src_at.y + src_at.height {
|
||||||
let src = &data[off..off + src_at.width as usize];
|
let src = &data[off..off + src_at.width as usize];
|
||||||
let dest_start_offset = (y as usize * self.size.width as usize) + src_at.x as usize;
|
let dest_start_offset = (y as usize * self.size.width as usize) + src_at.x as usize;
|
||||||
|
|
||||||
let dest =
|
let dest = &mut buf_slice[dest_start_offset..dest_start_offset + src_at.width as usize];
|
||||||
&mut buf_slice[dest_start_offset..dest_start_offset + src_at.width as usize];
|
|
||||||
|
|
||||||
// This forces alpha to always be 0xff. I *could* probably do this in a clearer way though :(
|
// This forces alpha to always be 0xff. I *could* probably do this in a clearer way though :(
|
||||||
for (dest, src_item) in dest.iter_mut().zip(src.iter()) {
|
for (dest, src_item) in dest.iter_mut().zip(src.iter()) {
|
||||||
|
|
27
server/src/video/cuda_gl/bindgen.sh
Executable file
27
server/src/video/cuda_gl/bindgen.sh
Executable file
|
@ -0,0 +1,27 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# Does bindgen for CUDA cudaGL. (needs postprocessing)
|
||||||
|
set -exu
|
||||||
|
|
||||||
|
# --allowlist-type="^CU.*" \
|
||||||
|
# --allowlist-type="^cuuint(32|64)_t" \
|
||||||
|
# --allowlist-type="^cudaError_enum" \
|
||||||
|
# --allowlist-type="^cu.*Complex$" \
|
||||||
|
# --allowlist-type="^cuda.*" \
|
||||||
|
# --allowlist-type="^libraryPropertyType.*" \
|
||||||
|
# --allowlist-var="^CU.*" \
|
||||||
|
|
||||||
|
echo "use cudarc::sys::*; /* Hack :3 */" > ./sys.rs
|
||||||
|
|
||||||
|
bindgen \
|
||||||
|
--allowlist-type="" \
|
||||||
|
--allowlist-function="^cuGraphicsGL.*" \
|
||||||
|
--default-enum-style=rust \
|
||||||
|
--no-doc-comments \
|
||||||
|
--with-derive-default \
|
||||||
|
--with-derive-eq \
|
||||||
|
--with-derive-hash \
|
||||||
|
--with-derive-ord \
|
||||||
|
--use-core \
|
||||||
|
--dynamic-loading Lib \
|
||||||
|
gl.h -- -I/opt/cuda/include \
|
||||||
|
>> ./sys.rs
|
1
server/src/video/cuda_gl/gl.h
Normal file
1
server/src/video/cuda_gl/gl.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
#include "cudaGL.h"
|
14
server/src/video/cuda_gl/mod.rs
Normal file
14
server/src/video/cuda_gl/mod.rs
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
pub mod sys;
|
||||||
|
use sys::*;
|
||||||
|
|
||||||
|
pub mod safe;
|
||||||
|
|
||||||
|
pub unsafe fn lib() -> &'static Lib {
|
||||||
|
static LIB: std::sync::OnceLock<Lib> = std::sync::OnceLock::new();
|
||||||
|
LIB.get_or_init(|| {
|
||||||
|
if let Ok(lib) = Lib::new(libloading::library_filename("cuda")) {
|
||||||
|
return lib;
|
||||||
|
}
|
||||||
|
panic!("cuda library doesn't exist.");
|
||||||
|
})
|
||||||
|
}
|
135
server/src/video/cuda_gl/safe.rs
Normal file
135
server/src/video/cuda_gl/safe.rs
Normal file
|
@ -0,0 +1,135 @@
|
||||||
|
use cudarc::driver::{result as cuda_result, safe as cuda_safe, sys as cuda_sys, CudaDevice};
|
||||||
|
|
||||||
|
use super::sys;
|
||||||
|
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
pub struct MappedGraphicsResource {
|
||||||
|
resource: cuda_sys::CUgraphicsResource,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MappedGraphicsResource {
|
||||||
|
fn new(resource: cuda_sys::CUgraphicsResource) -> Self {
|
||||||
|
Self { resource }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn map(&mut self) -> Result<(), cuda_result::DriverError> {
|
||||||
|
unsafe {
|
||||||
|
cuda_sys::lib()
|
||||||
|
.cuGraphicsMapResources(1, &mut self.resource, std::ptr::null_mut())
|
||||||
|
.result()?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn unmap(&mut self) -> Result<(), cuda_result::DriverError> {
|
||||||
|
unsafe {
|
||||||
|
cuda_sys::lib()
|
||||||
|
.cuGraphicsUnmapResources(1, &mut self.resource, std::ptr::null_mut())
|
||||||
|
.result()?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_mapped_array(&mut self) -> Result<cuda_sys::CUarray, cuda_result::DriverError> {
|
||||||
|
assert!(
|
||||||
|
!self.resource.is_null(),
|
||||||
|
"do not call GraphicsResource::get_mapped_array if no resource is actually registered"
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut array: cuda_sys::CUarray = std::ptr::null_mut();
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
cuda_sys::lib()
|
||||||
|
.cuGraphicsSubResourceGetMappedArray(&mut array, self.resource, 0, 0)
|
||||||
|
.result()?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(array)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for MappedGraphicsResource {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
let _ = self.unmap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Wrapper over cuGraphicsGL* apis
|
||||||
|
pub struct GraphicsResource {
|
||||||
|
context: Arc<CudaDevice>,
|
||||||
|
resource: cuda_sys::CUgraphicsResource,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GraphicsResource {
|
||||||
|
pub fn new(device: &Arc<CudaDevice>) -> Self {
|
||||||
|
Self {
|
||||||
|
context: device.clone(),
|
||||||
|
resource: std::ptr::null_mut(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn device(&self) -> Arc<CudaDevice> {
|
||||||
|
self.context.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Maps this resource.
|
||||||
|
pub fn map(&mut self) -> Result<MappedGraphicsResource, cuda_result::DriverError> {
|
||||||
|
let mut res = MappedGraphicsResource::new(self.resource);
|
||||||
|
res.map()?;
|
||||||
|
|
||||||
|
Ok(res)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn register(
|
||||||
|
&mut self,
|
||||||
|
texture_id: gl::types::GLuint,
|
||||||
|
texture_kind: gl::types::GLuint,
|
||||||
|
) -> Result<(), cuda_result::DriverError> {
|
||||||
|
// better to be safe than leak memory? idk.
|
||||||
|
if !self.resource.is_null() {
|
||||||
|
self.unregister()?;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
super::lib()
|
||||||
|
.cuGraphicsGLRegisterImage(&mut self.resource, texture_id, texture_kind, 1)
|
||||||
|
.result()?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_registered(&self) -> bool {
|
||||||
|
!self.resource.is_null()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn unregister(&mut self) -> Result<(), cuda_result::DriverError> {
|
||||||
|
assert!(
|
||||||
|
!self.resource.is_null(),
|
||||||
|
"do not call if no resource is actually registered"
|
||||||
|
);
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
cuda_sys::lib()
|
||||||
|
.cuGraphicsUnregisterResource(self.resource)
|
||||||
|
.result()?;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.resource = std::ptr::null_mut();
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for GraphicsResource {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
if self.is_registered() {
|
||||||
|
let _ = self.unregister();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe impl Send for GraphicsResource {}
|
73
server/src/video/cuda_gl/sys.rs
Normal file
73
server/src/video/cuda_gl/sys.rs
Normal file
|
@ -0,0 +1,73 @@
|
||||||
|
use cudarc::driver::sys::*; /* Hack :3 */
|
||||||
|
use gl::types::{GLenum, GLuint};
|
||||||
|
/* automatically generated by rust-bindgen 0.69.4 */
|
||||||
|
|
||||||
|
pub struct Lib {
|
||||||
|
__library: ::libloading::Library,
|
||||||
|
pub cuGraphicsGLRegisterBuffer: Result<
|
||||||
|
unsafe extern "C" fn(
|
||||||
|
pCudaResource: *mut CUgraphicsResource,
|
||||||
|
buffer: GLuint,
|
||||||
|
Flags: ::core::ffi::c_uint,
|
||||||
|
) -> CUresult,
|
||||||
|
::libloading::Error,
|
||||||
|
>,
|
||||||
|
pub cuGraphicsGLRegisterImage: Result<
|
||||||
|
unsafe extern "C" fn(
|
||||||
|
pCudaResource: *mut CUgraphicsResource,
|
||||||
|
image: GLuint,
|
||||||
|
target: GLenum,
|
||||||
|
Flags: ::core::ffi::c_uint,
|
||||||
|
) -> CUresult,
|
||||||
|
::libloading::Error,
|
||||||
|
>,
|
||||||
|
}
|
||||||
|
impl Lib {
|
||||||
|
pub unsafe fn new<P>(path: P) -> Result<Self, ::libloading::Error>
|
||||||
|
where
|
||||||
|
P: AsRef<::std::ffi::OsStr>,
|
||||||
|
{
|
||||||
|
let library = ::libloading::Library::new(path)?;
|
||||||
|
Self::from_library(library)
|
||||||
|
}
|
||||||
|
pub unsafe fn from_library<L>(library: L) -> Result<Self, ::libloading::Error>
|
||||||
|
where
|
||||||
|
L: Into<::libloading::Library>,
|
||||||
|
{
|
||||||
|
let __library = library.into();
|
||||||
|
let cuGraphicsGLRegisterBuffer = __library
|
||||||
|
.get(b"cuGraphicsGLRegisterBuffer\0")
|
||||||
|
.map(|sym| *sym);
|
||||||
|
let cuGraphicsGLRegisterImage = __library
|
||||||
|
.get(b"cuGraphicsGLRegisterImage\0")
|
||||||
|
.map(|sym| *sym);
|
||||||
|
Ok(Lib {
|
||||||
|
__library,
|
||||||
|
cuGraphicsGLRegisterBuffer,
|
||||||
|
cuGraphicsGLRegisterImage,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
pub unsafe fn cuGraphicsGLRegisterBuffer(
|
||||||
|
&self,
|
||||||
|
pCudaResource: *mut CUgraphicsResource,
|
||||||
|
buffer: GLuint,
|
||||||
|
Flags: ::core::ffi::c_uint,
|
||||||
|
) -> CUresult {
|
||||||
|
(self
|
||||||
|
.cuGraphicsGLRegisterBuffer
|
||||||
|
.as_ref()
|
||||||
|
.expect("Expected function, got error."))(pCudaResource, buffer, Flags)
|
||||||
|
}
|
||||||
|
pub unsafe fn cuGraphicsGLRegisterImage(
|
||||||
|
&self,
|
||||||
|
pCudaResource: *mut CUgraphicsResource,
|
||||||
|
image: GLuint,
|
||||||
|
target: GLenum,
|
||||||
|
Flags: ::core::ffi::c_uint,
|
||||||
|
) -> CUresult {
|
||||||
|
(self
|
||||||
|
.cuGraphicsGLRegisterImage
|
||||||
|
.as_ref()
|
||||||
|
.expect("Expected function, got error."))(pCudaResource, image, target, Flags)
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,11 +1,16 @@
|
||||||
|
use cudarc::driver::{
|
||||||
|
sys::{CUdeviceptr, CUmemorytype},
|
||||||
|
CudaDevice, LaunchAsync,
|
||||||
|
};
|
||||||
|
use letsplay_gpu::egl_helpers::DeviceContext;
|
||||||
use std::{
|
use std::{
|
||||||
sync::{Arc, Mutex},
|
sync::{Arc, Mutex},
|
||||||
time::Duration,
|
time::Duration,
|
||||||
};
|
};
|
||||||
use tokio::sync::mpsc::{self, error::TryRecvError};
|
use tokio::sync::mpsc::{self, error::TryRecvError};
|
||||||
|
|
||||||
use super::ffmpeg;
|
|
||||||
use super::h264_encoder::H264Encoder;
|
use super::h264_encoder::H264Encoder;
|
||||||
|
use super::{cuda_gl::safe::GraphicsResource, ffmpeg};
|
||||||
|
|
||||||
pub enum EncodeThreadInput {
|
pub enum EncodeThreadInput {
|
||||||
Init { size: crate::types::Size },
|
Init { size: crate::types::Size },
|
||||||
|
@ -37,7 +42,7 @@ impl EncoderState {
|
||||||
self.encoder = Some(H264Encoder::new_nvenc_swframe(
|
self.encoder = Some(H264Encoder::new_nvenc_swframe(
|
||||||
size.clone(),
|
size.clone(),
|
||||||
60,
|
60,
|
||||||
3 * (1024 * 1024),
|
2 * (1024 * 1024),
|
||||||
)?);
|
)?);
|
||||||
|
|
||||||
// replace packet
|
// replace packet
|
||||||
|
@ -85,7 +90,76 @@ impl EncoderState {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn encoder_thread_main(
|
struct EncoderStateHW {
|
||||||
|
encoder: Option<H264Encoder>,
|
||||||
|
frame: ffmpeg::frame::Video,
|
||||||
|
packet: ffmpeg::Packet,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EncoderStateHW {
|
||||||
|
fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
encoder: None,
|
||||||
|
frame: ffmpeg::frame::Video::empty(),
|
||||||
|
packet: ffmpeg::Packet::empty(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn init(&mut self, device: &Arc<CudaDevice>, size: crate::types::Size) -> anyhow::Result<()> {
|
||||||
|
self.encoder = Some(H264Encoder::new_nvenc_hwframe(
|
||||||
|
&device,
|
||||||
|
size.clone(),
|
||||||
|
60,
|
||||||
|
2 * (1024 * 1024),
|
||||||
|
)?);
|
||||||
|
|
||||||
|
// replace packet
|
||||||
|
self.packet = ffmpeg::Packet::empty();
|
||||||
|
self.frame = self.encoder.as_mut().unwrap().create_frame()?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn frame(&mut self) -> &mut ffmpeg::frame::Video {
|
||||||
|
&mut self.frame
|
||||||
|
}
|
||||||
|
|
||||||
|
fn send_frame(&mut self, pts: u64, force_keyframe: bool) -> Option<ffmpeg::Packet> {
|
||||||
|
let frame = &mut self.frame;
|
||||||
|
let encoder = self.encoder.as_mut().unwrap();
|
||||||
|
|
||||||
|
// set frame metadata
|
||||||
|
unsafe {
|
||||||
|
if force_keyframe {
|
||||||
|
(*frame.as_mut_ptr()).pict_type = ffmpeg::sys::AVPictureType::AV_PICTURE_TYPE_I;
|
||||||
|
(*frame.as_mut_ptr()).flags = ffmpeg::sys::AV_FRAME_FLAG_KEY;
|
||||||
|
(*frame.as_mut_ptr()).key_frame = 1;
|
||||||
|
} else {
|
||||||
|
(*frame.as_mut_ptr()).pict_type = ffmpeg::sys::AVPictureType::AV_PICTURE_TYPE_NONE;
|
||||||
|
(*frame.as_mut_ptr()).flags = 0i32;
|
||||||
|
(*frame.as_mut_ptr()).key_frame = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
(*frame.as_mut_ptr()).pts = pts as i64;
|
||||||
|
}
|
||||||
|
|
||||||
|
encoder.send_frame(&*frame);
|
||||||
|
encoder
|
||||||
|
.receive_packet(&mut self.packet)
|
||||||
|
.expect("Failed to recieve packet");
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
if !self.packet.is_empty() {
|
||||||
|
return Some(self.packet.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encoder_thread_swframe_main(
|
||||||
mut rx: mpsc::Receiver<EncodeThreadInput>,
|
mut rx: mpsc::Receiver<EncodeThreadInput>,
|
||||||
tx: mpsc::Sender<EncodeThreadOutput>,
|
tx: mpsc::Sender<EncodeThreadOutput>,
|
||||||
frame: &Arc<Mutex<Option<ffmpeg::frame::Video>>>,
|
frame: &Arc<Mutex<Option<ffmpeg::frame::Video>>>,
|
||||||
|
@ -143,7 +217,7 @@ fn encoder_thread_main(
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn encoder_thread_spawn(
|
pub fn encoder_thread_spawn_swframe(
|
||||||
frame: &Arc<Mutex<Option<ffmpeg::frame::Video>>>,
|
frame: &Arc<Mutex<Option<ffmpeg::frame::Video>>>,
|
||||||
) -> (
|
) -> (
|
||||||
mpsc::Receiver<EncodeThreadOutput>,
|
mpsc::Receiver<EncodeThreadOutput>,
|
||||||
|
@ -154,7 +228,192 @@ pub fn encoder_thread_spawn(
|
||||||
|
|
||||||
let clone = Arc::clone(frame);
|
let clone = Arc::clone(frame);
|
||||||
|
|
||||||
std::thread::spawn(move || encoder_thread_main(in_rx, out_tx, &clone));
|
std::thread::spawn(move || encoder_thread_swframe_main(in_rx, out_tx, &clone));
|
||||||
|
|
||||||
|
(out_rx, in_tx)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encoder_thread_hwframe_main(
|
||||||
|
mut rx: mpsc::Receiver<EncodeThreadInput>,
|
||||||
|
tx: mpsc::Sender<EncodeThreadOutput>,
|
||||||
|
|
||||||
|
cuda_device: &Arc<CudaDevice>,
|
||||||
|
cuda_resource: &Arc<Mutex<GraphicsResource>>,
|
||||||
|
gl_context: &Arc<Mutex<DeviceContext>>,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let mut frame_number = 0u64;
|
||||||
|
let mut force_keyframe = false;
|
||||||
|
|
||||||
|
let mut encoder = EncoderStateHW::new();
|
||||||
|
|
||||||
|
// :)
|
||||||
|
cuda_device.bind_to_thread()?;
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
|
let mut memcpy = cudarc::driver::sys::CUDA_MEMCPY2D_st::default();
|
||||||
|
// setup basic src stuff
|
||||||
|
memcpy.srcXInBytes = 0;
|
||||||
|
memcpy.srcY = 0;
|
||||||
|
memcpy.srcMemoryType = CUmemorytype::CU_MEMORYTYPE_ARRAY;
|
||||||
|
|
||||||
|
// dest
|
||||||
|
memcpy.dstXInBytes = 0;
|
||||||
|
memcpy.dstY = 0;
|
||||||
|
memcpy.dstMemoryType = CUmemorytype::CU_MEMORYTYPE_DEVICE;
|
||||||
|
*/
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match rx.try_recv() {
|
||||||
|
Ok(msg) => match msg {
|
||||||
|
EncodeThreadInput::Init { size } => {
|
||||||
|
frame_number = 0;
|
||||||
|
|
||||||
|
if force_keyframe {
|
||||||
|
force_keyframe = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
encoder
|
||||||
|
.init(cuda_device, size)
|
||||||
|
.expect("encoder init failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
EncodeThreadInput::ForceKeyframe => {
|
||||||
|
force_keyframe = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
EncodeThreadInput::SendFrame => {
|
||||||
|
// copy gl frame *ON THE GPU*
|
||||||
|
{
|
||||||
|
let mut gl_ctx = gl_context.lock().expect("you dumb fuck");
|
||||||
|
let mut gl_resource =
|
||||||
|
cuda_resource.lock().expect("couldnt lock GL resource!");
|
||||||
|
|
||||||
|
gl_ctx.make_current();
|
||||||
|
|
||||||
|
let mut mapped = gl_resource
|
||||||
|
.map()
|
||||||
|
.expect("couldnt map graphics resource. Its joever");
|
||||||
|
|
||||||
|
let array = mapped
|
||||||
|
.get_mapped_array()
|
||||||
|
.expect("well its all over anyways");
|
||||||
|
|
||||||
|
{
|
||||||
|
let frame = encoder.frame();
|
||||||
|
let width = frame.width();
|
||||||
|
let height = frame.height();
|
||||||
|
|
||||||
|
let launch_config = cudarc::driver::LaunchConfig {
|
||||||
|
grid_dim: (width / 16 + 1, height / 2 + 1, 0),
|
||||||
|
block_dim: (16, 2, 0),
|
||||||
|
shared_mem_bytes: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
let flip_opengl = cuda_device
|
||||||
|
.get_func("module", "flip_opengl")
|
||||||
|
.expect("dumb fucker");
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
let frame_ptr = frame.as_mut_ptr();
|
||||||
|
|
||||||
|
let mut params = [
|
||||||
|
array as *mut std::ffi::c_void,
|
||||||
|
(*frame_ptr).data[0] as CUdeviceptr as *mut std::ffi::c_void,
|
||||||
|
width as *const u32 as *mut std::ffi::c_void,
|
||||||
|
height as *const u32 as *mut std::ffi::c_void,
|
||||||
|
];
|
||||||
|
|
||||||
|
flip_opengl
|
||||||
|
.launch(launch_config, &mut params[..])
|
||||||
|
.expect("oh its done");
|
||||||
|
|
||||||
|
|
||||||
|
cudarc::driver::sys::lib()
|
||||||
|
.cuStreamSynchronize(std::ptr::null_mut())
|
||||||
|
.result()
|
||||||
|
.expect("you banned");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
// setup the cuMemcpy2D
|
||||||
|
{
|
||||||
|
let frame = encoder.frame();
|
||||||
|
|
||||||
|
memcpy.srcArray = array;
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
let frame_ptr = frame.as_mut_ptr();
|
||||||
|
memcpy.dstDevice = (*frame_ptr).data[0] as CUdeviceptr;
|
||||||
|
memcpy.dstPitch = (*frame_ptr).linesize[0] as usize;
|
||||||
|
memcpy.WidthInBytes = ((*frame_ptr).width * 4) as usize;
|
||||||
|
memcpy.Height = (*frame_ptr).height as usize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
cudarc::driver::sys::lib()
|
||||||
|
.cuMemcpy2DAsync_v2(&memcpy, std::ptr::null_mut())
|
||||||
|
.result()
|
||||||
|
.expect("cuMemcpy2D fail epic");
|
||||||
|
|
||||||
|
cudarc::driver::sys::lib()
|
||||||
|
.cuStreamSynchronize(std::ptr::null_mut())
|
||||||
|
.result()
|
||||||
|
.expect("you banned");
|
||||||
|
|
||||||
|
} */
|
||||||
|
|
||||||
|
mapped.unmap().expect("fuck you asshole");
|
||||||
|
gl_ctx.release();
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(pkt) = encoder.send_frame(frame_number as u64, force_keyframe) {
|
||||||
|
// A bit less clear than ::empty(), but it's "Safe"
|
||||||
|
if let Some(_) = pkt.data() {
|
||||||
|
let _ = tx.blocking_send(EncodeThreadOutput::Frame {
|
||||||
|
packet: pkt.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
frame_number += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if force_keyframe {
|
||||||
|
force_keyframe = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
Err(TryRecvError::Disconnected) => break,
|
||||||
|
Err(TryRecvError::Empty) => {
|
||||||
|
std::thread::sleep(Duration::from_millis(1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn encoder_thread_spawn_hwframe(
|
||||||
|
cuda_device: &Arc<CudaDevice>,
|
||||||
|
cuda_resource: &Arc<Mutex<GraphicsResource>>,
|
||||||
|
gl_context: &Arc<Mutex<DeviceContext>>,
|
||||||
|
) -> (
|
||||||
|
mpsc::Receiver<EncodeThreadOutput>,
|
||||||
|
mpsc::Sender<EncodeThreadInput>,
|
||||||
|
) {
|
||||||
|
let (in_tx, in_rx) = mpsc::channel(1);
|
||||||
|
let (out_tx, out_rx) = mpsc::channel(1);
|
||||||
|
|
||||||
|
let dev_clone = Arc::clone(cuda_device);
|
||||||
|
let rsrc_clone = Arc::clone(cuda_resource);
|
||||||
|
let gl_clone = Arc::clone(gl_context);
|
||||||
|
|
||||||
|
std::thread::spawn(move || {
|
||||||
|
encoder_thread_hwframe_main(in_rx, out_tx, &dev_clone, &rsrc_clone, &gl_clone)
|
||||||
|
});
|
||||||
|
|
||||||
(out_rx, in_tx)
|
(out_rx, in_tx)
|
||||||
}
|
}
|
||||||
|
|
|
@ -135,8 +135,8 @@ impl H264Encoder {
|
||||||
|
|
||||||
video_encoder_context.set_format(ffmpeg::format::Pixel::ZRGB32);
|
video_encoder_context.set_format(ffmpeg::format::Pixel::ZRGB32);
|
||||||
|
|
||||||
video_encoder_context.set_qmin(38);
|
video_encoder_context.set_qmin(37);
|
||||||
video_encoder_context.set_qmax(32);
|
video_encoder_context.set_qmax(33);
|
||||||
|
|
||||||
// set h264_nvenc options
|
// set h264_nvenc options
|
||||||
let mut dict = ffmpeg::Dictionary::new();
|
let mut dict = ffmpeg::Dictionary::new();
|
||||||
|
@ -148,7 +148,7 @@ impl H264Encoder {
|
||||||
|
|
||||||
// TODO:
|
// TODO:
|
||||||
dict.set("rc", "vbr");
|
dict.set("rc", "vbr");
|
||||||
dict.set("qp", "45");
|
dict.set("qp", "35");
|
||||||
|
|
||||||
dict.set("forced-idr", "1");
|
dict.set("forced-idr", "1");
|
||||||
|
|
||||||
|
@ -169,9 +169,6 @@ impl H264Encoder {
|
||||||
max_framerate: u32,
|
max_framerate: u32,
|
||||||
bitrate: usize,
|
bitrate: usize,
|
||||||
) -> anyhow::Result<Self> {
|
) -> anyhow::Result<Self> {
|
||||||
/*
|
|
||||||
(See FIXMEs above)
|
|
||||||
|
|
||||||
let cuda_device_context = super::hwdevice::CudaDeviceContextBuilder::new()?
|
let cuda_device_context = super::hwdevice::CudaDeviceContextBuilder::new()?
|
||||||
.set_cuda_context((*cuda_device.cu_primary_ctx()) as *mut _)
|
.set_cuda_context((*cuda_device.cu_primary_ctx()) as *mut _)
|
||||||
.build()
|
.build()
|
||||||
|
@ -180,14 +177,53 @@ impl H264Encoder {
|
||||||
let mut hw_frame_context = super::hwframe::HwFrameContextBuilder::new(cuda_device_context)?
|
let mut hw_frame_context = super::hwframe::HwFrameContextBuilder::new(cuda_device_context)?
|
||||||
.set_width(size.width)
|
.set_width(size.width)
|
||||||
.set_height(size.height)
|
.set_height(size.height)
|
||||||
.set_sw_format(ffmpeg::format::Pixel::ZRGB32)
|
.set_sw_format(ffmpeg::format::Pixel::ZBGR32)
|
||||||
.set_format(ffmpeg::format::Pixel::CUDA)
|
.set_format(ffmpeg::format::Pixel::CUDA)
|
||||||
.build()
|
.build()
|
||||||
.with_context(|| "while trying to create CUDA frame context")?;
|
.with_context(|| "while trying to create CUDA frame context")?;
|
||||||
|
|
||||||
*/
|
let (mut encoder, mut video_encoder_context) =
|
||||||
|
create_context_and_set_common_parameters("h264_nvenc", &size, max_framerate, bitrate)
|
||||||
|
.with_context(|| "while trying to create encoder")?;
|
||||||
|
|
||||||
todo!("Implement me!");
|
video_encoder_context.set_format(ffmpeg::format::Pixel::CUDA);
|
||||||
|
|
||||||
|
video_encoder_context.set_qmin(37);
|
||||||
|
video_encoder_context.set_qmax(33);
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
// FIXME: this currently breaks the avbufferref system a bit
|
||||||
|
(*video_encoder_context.as_mut_ptr()).hw_frames_ctx = hw_frame_context.as_raw_mut();
|
||||||
|
(*video_encoder_context.as_mut_ptr()).hw_device_ctx =
|
||||||
|
hw_frame_context.as_device_context_mut();
|
||||||
|
}
|
||||||
|
|
||||||
|
// set h264_nvenc options
|
||||||
|
let mut dict = ffmpeg::Dictionary::new();
|
||||||
|
|
||||||
|
dict.set("tune", "ull");
|
||||||
|
dict.set("preset", "p1");
|
||||||
|
|
||||||
|
dict.set("profile", "main");
|
||||||
|
|
||||||
|
// TODO:
|
||||||
|
dict.set("rc", "vbr");
|
||||||
|
dict.set("qp", "35");
|
||||||
|
|
||||||
|
dict.set("forced-idr", "1");
|
||||||
|
|
||||||
|
// damn you
|
||||||
|
dict.set("delay", "0");
|
||||||
|
dict.set("zerolatency", "1");
|
||||||
|
|
||||||
|
let encoder = video_encoder_context
|
||||||
|
.open_as_with(encoder, dict)
|
||||||
|
.with_context(|| "While opening h264_nvenc video codec")?;
|
||||||
|
|
||||||
|
Ok(Self::NvencHWFrame {
|
||||||
|
encoder: encoder,
|
||||||
|
hw_context: hw_frame_context,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// NOTE: It's a bit pointless to have this have a mut borrow,
|
// NOTE: It's a bit pointless to have this have a mut borrow,
|
||||||
|
@ -207,46 +243,44 @@ impl H264Encoder {
|
||||||
// }
|
// }
|
||||||
//}
|
//}
|
||||||
|
|
||||||
pub fn create_frame(&mut self) -> ffmpeg::frame::Video {
|
pub fn create_frame(&mut self) -> anyhow::Result<ffmpeg::frame::Video> {
|
||||||
|
|
||||||
match self {
|
match self {
|
||||||
Self::Software { encoder } | Self::NvencSWFrame { encoder } => {
|
Self::Software { encoder } | Self::NvencSWFrame { encoder } => {
|
||||||
return ffmpeg::frame::Video::new(encoder.format(), encoder.width(), encoder.height());
|
return Ok(ffmpeg::frame::Video::new(
|
||||||
|
encoder.format(),
|
||||||
|
encoder.width(),
|
||||||
|
encoder.height(),
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
Self::NvencHWFrame {
|
Self::NvencHWFrame {
|
||||||
encoder,
|
encoder,
|
||||||
hw_context,
|
hw_context,
|
||||||
} => {
|
} => {
|
||||||
todo!("Implement H264Encoder::create_frame() for NvencHWFrame!");
|
let mut frame = ffmpeg::frame::Video::empty();
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
(*frame.as_mut_ptr()).format = ffmpeg::format::Pixel::CUDA as i32;
|
||||||
|
(*frame.as_mut_ptr()).width = encoder.width() as i32;
|
||||||
|
(*frame.as_mut_ptr()).height = encoder.height() as i32;
|
||||||
|
(*frame.as_mut_ptr()).hw_frames_ctx = hw_context.as_raw_mut();
|
||||||
|
|
||||||
|
//ffmpeg::sys::av_frame_get_buffer(frame.as_mut_ptr(), 32);
|
||||||
|
|
||||||
|
hw_context.get_buffer(&mut frame)?;
|
||||||
|
hw_context.get_buffer(&mut frame)?;
|
||||||
|
|
||||||
|
(*frame.as_mut_ptr()).linesize[0] = (*frame.as_ptr()).width * 4;
|
||||||
|
|
||||||
|
return Ok(frame);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
unsafe {
|
|
||||||
let mut frame = ffmpeg::Frame::empty();
|
|
||||||
(*frame.as_mut_ptr()).format = pixel_format as i32;
|
|
||||||
(*frame.as_mut_ptr()).width = width as i32;
|
|
||||||
(*frame.as_mut_ptr()).height = height as i32;
|
|
||||||
(*frame.as_mut_ptr()).hw_frames_ctx = context.as_raw_mut();
|
|
||||||
|
|
||||||
super::check_ret(ffmpeg::sys::av_hwframe_get_buffer(
|
|
||||||
context.as_raw_mut(),
|
|
||||||
frame.as_mut_ptr(),
|
|
||||||
0,
|
|
||||||
))?;
|
|
||||||
super::check_ret(ffmpeg::sys::av_hwframe_get_buffer(
|
|
||||||
context.as_raw_mut(),
|
|
||||||
frame.as_mut_ptr(),
|
|
||||||
0,
|
|
||||||
))?;
|
|
||||||
|
|
||||||
(*frame.as_mut_ptr()).linesize[0] = (*frame.as_ptr()).width * 4;
|
*/
|
||||||
|
|
||||||
Ok(frame)
|
|
||||||
}
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
todo!("FIXME");
|
todo!("FIXME");
|
||||||
}
|
}
|
||||||
|
@ -267,7 +301,8 @@ impl H264Encoder {
|
||||||
encoder,
|
encoder,
|
||||||
hw_context,
|
hw_context,
|
||||||
} => {
|
} => {
|
||||||
todo!("Implement send_frame() for NvencHWFrame");
|
//todo!("Implement send_frame() for NvencHWFrame");
|
||||||
|
encoder.send_frame(frame).unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,82 +7,103 @@ use ffmpeg::format::Pixel;
|
||||||
use super::{check_ret, hwdevice::CudaDeviceContext};
|
use super::{check_ret, hwdevice::CudaDeviceContext};
|
||||||
|
|
||||||
pub struct HwFrameContext {
|
pub struct HwFrameContext {
|
||||||
_cuda_device_context: CudaDeviceContext,
|
_cuda_device_context: CudaDeviceContext,
|
||||||
buffer: *mut ffmpeg::sys::AVBufferRef,
|
buffer: *mut ffmpeg::sys::AVBufferRef,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl HwFrameContext {
|
impl HwFrameContext {
|
||||||
fn new(cuda_device_context: CudaDeviceContext, buffer: *mut ffmpeg::sys::AVBufferRef) -> Self {
|
fn new(cuda_device_context: CudaDeviceContext, buffer: *mut ffmpeg::sys::AVBufferRef) -> Self {
|
||||||
Self { _cuda_device_context: cuda_device_context, buffer }
|
Self {
|
||||||
}
|
_cuda_device_context: cuda_device_context,
|
||||||
|
buffer,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// pub fn as_context_mut(&mut self) -> &mut ffmpeg::sys::AVHWFramesContext {
|
// pub fn as_context_mut(&mut self) -> &mut ffmpeg::sys::AVHWFramesContext {
|
||||||
// unsafe { &mut *((*self.buffer).data as *mut ffmpeg::sys::AVHWFramesContext) }
|
// unsafe { &mut *((*self.buffer).data as *mut ffmpeg::sys::AVHWFramesContext) }
|
||||||
// }
|
// }
|
||||||
|
|
||||||
// pub fn as_context(&self) -> &ffmpeg::sys::AVHWFramesContext {
|
// pub fn as_context(&self) -> &ffmpeg::sys::AVHWFramesContext {
|
||||||
// unsafe { &*((*self.buffer).data as *const ffmpeg::sys::AVHWFramesContext) }
|
// unsafe { &*((*self.buffer).data as *const ffmpeg::sys::AVHWFramesContext) }
|
||||||
// }
|
// }
|
||||||
|
|
||||||
pub fn as_raw_mut(&mut self) -> &mut ffmpeg::sys::AVBufferRef {
|
pub fn as_raw_mut(&mut self) -> &mut ffmpeg::sys::AVBufferRef {
|
||||||
unsafe { &mut *self.buffer }
|
unsafe { &mut *self.buffer }
|
||||||
}
|
}
|
||||||
|
|
||||||
// pub fn as_raw(&self) -> &ffmpeg::sys::AVBufferRef {
|
pub fn as_device_context_mut(&mut self) -> &mut ffmpeg::sys::AVBufferRef {
|
||||||
// unsafe { &*self.buffer }
|
unsafe {
|
||||||
// }
|
self._cuda_device_context.as_raw_mut()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// call once to allocate frame
|
||||||
|
pub fn get_buffer(&mut self, frame: &mut ffmpeg::frame::Video) -> Result<(), ffmpeg::Error> {
|
||||||
|
unsafe {
|
||||||
|
super::check_ret(ffmpeg::sys::av_hwframe_get_buffer(self.buffer, frame.as_mut_ptr(), 0))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// pub fn as_raw(&self) -> &ffmpeg::sys::AVBufferRef {
|
||||||
|
// unsafe { &*self.buffer }
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe impl Send for HwFrameContext { }
|
unsafe impl Send for HwFrameContext {}
|
||||||
|
|
||||||
pub struct HwFrameContextBuilder {
|
pub struct HwFrameContextBuilder {
|
||||||
cuda_device_context: CudaDeviceContext,
|
cuda_device_context: CudaDeviceContext,
|
||||||
buffer: *mut ffmpeg::sys::AVBufferRef,
|
buffer: *mut ffmpeg::sys::AVBufferRef,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl HwFrameContextBuilder {
|
impl HwFrameContextBuilder {
|
||||||
pub fn new(mut cuda_device_context: CudaDeviceContext) -> anyhow::Result<Self> {
|
pub fn new(mut cuda_device_context: CudaDeviceContext) -> anyhow::Result<Self> {
|
||||||
let buffer = unsafe { ffmpeg::sys::av_hwframe_ctx_alloc(cuda_device_context.as_raw_mut()) };
|
let buffer = unsafe { ffmpeg::sys::av_hwframe_ctx_alloc(cuda_device_context.as_raw_mut()) };
|
||||||
if buffer.is_null() {
|
if buffer.is_null() {
|
||||||
return Err(anyhow::anyhow!("could not allocate a hwframe context"));
|
return Err(anyhow::anyhow!("could not allocate a hwframe context"));
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Self { cuda_device_context, buffer })
|
Ok(Self {
|
||||||
}
|
cuda_device_context,
|
||||||
|
buffer,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
pub fn build(mut self) -> Result<HwFrameContext, ffmpeg::Error> {
|
pub fn build(mut self) -> Result<HwFrameContext, ffmpeg::Error> {
|
||||||
check_ret(unsafe { ffmpeg::sys::av_hwframe_ctx_init(self.buffer) })?;
|
check_ret(unsafe { ffmpeg::sys::av_hwframe_ctx_init(self.buffer) })?;
|
||||||
let result = Ok(HwFrameContext::new(self.cuda_device_context, self.buffer));
|
let result = Ok(HwFrameContext::new(self.cuda_device_context, self.buffer));
|
||||||
self.buffer = null_mut();
|
self.buffer = null_mut();
|
||||||
|
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn set_width(mut self, width: u32) -> Self {
|
pub fn set_width(mut self, width: u32) -> Self {
|
||||||
self.as_frame_mut().width = width as i32;
|
self.as_frame_mut().width = width as i32;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn set_height(mut self, height: u32) -> Self {
|
pub fn set_height(mut self, height: u32) -> Self {
|
||||||
self.as_frame_mut().height = height as i32;
|
self.as_frame_mut().height = height as i32;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn set_sw_format(mut self, sw_format: Pixel) -> Self {
|
pub fn set_sw_format(mut self, sw_format: Pixel) -> Self {
|
||||||
self.as_frame_mut().sw_format = sw_format.into();
|
self.as_frame_mut().sw_format = sw_format.into();
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn set_format(mut self, format: Pixel) -> Self {
|
pub fn set_format(mut self, format: Pixel) -> Self {
|
||||||
self.as_frame_mut().format = format.into();
|
self.as_frame_mut().format = format.into();
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn as_frame_mut(&mut self) -> &mut ffmpeg::sys::AVHWFramesContext {
|
pub fn as_frame_mut(&mut self) -> &mut ffmpeg::sys::AVHWFramesContext {
|
||||||
unsafe { &mut *((*self.buffer).data as *mut ffmpeg::sys::AVHWFramesContext) }
|
unsafe { &mut *((*self.buffer).data as *mut ffmpeg::sys::AVHWFramesContext) }
|
||||||
}
|
}
|
||||||
|
|
||||||
// pub fn as_frame(&self) -> &ffmpeg::sys::AVHWFramesContext {
|
// pub fn as_frame(&self) -> &ffmpeg::sys::AVHWFramesContext {
|
||||||
// unsafe { &*((*self.buffer).data as *const ffmpeg::sys::AVHWFramesContext) }
|
// unsafe { &*((*self.buffer).data as *const ffmpeg::sys::AVHWFramesContext) }
|
||||||
// }
|
// }
|
||||||
}
|
}
|
|
@ -9,6 +9,8 @@ pub mod hwframe;
|
||||||
|
|
||||||
pub mod encoder_thread;
|
pub mod encoder_thread;
|
||||||
|
|
||||||
|
pub mod cuda_gl;
|
||||||
|
|
||||||
// from hgaiser/moonshine
|
// from hgaiser/moonshine
|
||||||
pub fn check_ret(error_code: i32) -> Result<(), ffmpeg::Error> {
|
pub fn check_ret(error_code: i32) -> Result<(), ffmpeg::Error> {
|
||||||
if error_code != 0 {
|
if error_code != 0 {
|
||||||
|
|
Loading…
Reference in a new issue