remove CUDA api usage in software framebuffer update

We now use standard OpenGL APIs here, which is nice.
This commit is contained in:
Lily Tsuru 2024-10-16 20:16:30 -04:00
parent 889d7e42ce
commit 4f585df501

View file

@ -4,14 +4,13 @@ use std::{
time::Duration, time::Duration,
}; };
use cudarc::driver::sys::CUmemorytype;
use tokio::sync::{mpsc, oneshot}; use tokio::sync::{mpsc, oneshot};
use anyhow::Result; use anyhow::Result;
use retro_frontend::{ use retro_frontend::{
frontend::{Frontend, FrontendInterface, HwGlInitData}, frontend::{Frontend, FrontendInterface, HwGlInitData},
input_devices::{AnalogRetroPad, InputDevice}, input_devices::{AnalogRetroPad, InputDevice, RetroPad},
libretro_sys_new, libretro_sys_new,
}; };
@ -47,7 +46,7 @@ extern "system" fn opengl_message_callback(
pub struct RetroState { pub struct RetroState {
frontend: Option<Box<Frontend>>, frontend: Option<Box<Frontend>>,
pad: AnalogRetroPad, pad: RetroPad,
// EGL state // EGL state
egl_context: Arc<Mutex<DeviceContext>>, egl_context: Arc<Mutex<DeviceContext>>,
@ -73,7 +72,7 @@ impl RetroState {
) -> Box<Self> { ) -> Box<Self> {
let mut boxed = Box::new(Self { let mut boxed = Box::new(Self {
frontend: None, frontend: None,
pad: AnalogRetroPad::new(), pad: RetroPad::new(),
egl_context: device_context.clone(), egl_context: device_context.clone(),
software_framebuffer: Surface::new(), software_framebuffer: Surface::new(),
@ -195,7 +194,7 @@ impl RetroState {
// copy only // copy only
if has_disconnected_pitch { if has_disconnected_pitch {
dest_line_off = (y * pitch) as usize; //dest_line_off = (y * pitch) as usize;
} }
// Create slices repressenting each part // Create slices repressenting each part
@ -248,78 +247,24 @@ impl FrontendInterface for RetroState {
let size = self.software_framebuffer.size.clone(); let size = self.software_framebuffer.size.clone();
// upload texture to GPU // Upload the software framebuffer to the GPU
/*unsafe { // (we already flip it to make opengl happy)
tracing::info!("bind tex"); unsafe {
gl::BindTexture(gl::TEXTURE_2D, self.gl_framebuffer.texture_id()); gl::BindTexture(gl::TEXTURE_2D, self.gl_framebuffer.texture_id());
tracing::info!("upload"); gl::TexSubImage2D(
gl::TexImage2D(
gl::TEXTURE_2D, gl::TEXTURE_2D,
0, 0,
gl::RGBA8 as i32, 0,
0,
size.width as i32, size.width as i32,
size.height as i32, size.height as i32,
0,
gl::RGBA, gl::RGBA,
gl::UNSIGNED_BYTE, gl::UNSIGNED_BYTE,
self.software_framebuffer.get_buffer().as_mut_ptr() as *const _, self.software_framebuffer.get_buffer().as_mut_ptr() as *const _,
); );
tracing::info!("unbind tex");
gl::BindTexture(gl::TEXTURE_2D, 0); gl::BindTexture(gl::TEXTURE_2D, 0);
}*/
// TODO: Figure out a way to do this with standard OpenGL primitives.
// (the above does not work *at all*. Don't ask how I know)
unsafe {
let mut cuda_resource = self
.cuda_resource
.lock()
.expect("Failed to lock CUDA resource");
cuda_resource
.device()
.bind_to_thread()
.expect("Failed to bind CUDA device to thread");
let mut mapped_cuda_resource =
cuda_resource.map().expect("Failed to map CUDA resource");
let array = mapped_cuda_resource
.get_mapped_array()
.expect("Failed to get CUarray from CUDA resource");
let mut memcpy = cudarc::driver::sys::CUDA_MEMCPY2D_st::default();
// src
memcpy.srcXInBytes = 0;
memcpy.srcY = 0;
memcpy.srcMemoryType = CUmemorytype::CU_MEMORYTYPE_HOST;
memcpy.srcHost = self.software_framebuffer.get_buffer().as_mut_ptr() as *const _;
// dest
memcpy.dstXInBytes = 0;
memcpy.dstY = 0;
memcpy.dstMemoryType = CUmemorytype::CU_MEMORYTYPE_ARRAY;
memcpy.dstArray = array;
memcpy.WidthInBytes = (size.width * 4) as usize;
memcpy.Height = size.height as usize;
memcpy.dstPitch = (size.width * 4) as usize;
// kick it off
cudarc::driver::sys::lib()
.cuMemcpy2DAsync_v2(&memcpy, std::ptr::null_mut())
.result()
.expect("cuMemcpy2D fail epic");
cudarc::driver::sys::lib()
.cuStreamSynchronize(std::ptr::null_mut())
.result()
.expect("fucking");
mapped_cuda_resource.unmap().expect("fuck you asshole");
} }
let _ = self.event_tx.blocking_send(RetroEvent::Frame); let _ = self.event_tx.blocking_send(RetroEvent::Frame);