render/vulkan: import semaphore to DMA-BUF instead of blocking

Right now the Vulkan renderer blocks until the frame is complete
after rendering. This is necessary because Vulkan doesn't
interoperate well with implicit sync we use everywhere else.

Use the new kernel API to import a sync_file into a DMA-BUF to
avoid blocking.
This commit is contained in:
Simon Ser 2022-05-26 14:45:15 +02:00
parent aaf828d3d2
commit d2238bf722
3 changed files with 115 additions and 8 deletions

View File

@ -38,6 +38,8 @@ struct wlr_vk_device {
int drm_fd;
bool implicit_sync_interop;
// we only ever need one queue for rendering and transfer commands
uint32_t queue_family;
VkQueue queue;
@ -46,6 +48,7 @@ struct wlr_vk_device {
PFN_vkGetMemoryFdPropertiesKHR getMemoryFdPropertiesKHR;
PFN_vkWaitSemaphoresKHR waitSemaphoresKHR;
PFN_vkGetSemaphoreCounterValueKHR getSemaphoreCounterValueKHR;
PFN_vkGetSemaphoreFdKHR getSemaphoreFdKHR;
} api;
uint32_t format_prop_count;
@ -152,6 +155,9 @@ struct wlr_vk_command_buffer {
struct wl_list destroy_textures; // wlr_vk_texture.destroy_link
// Staging shared buffers to release after the command buffer completes
struct wl_list stage_buffers; // wlr_vk_shared_buffer.link
// For DMA-BUF implicit sync interop, may be NULL
VkSemaphore binary_semaphore;
};
#define VULKAN_COMMAND_BUFFERS_CAP 64

View File

@ -17,6 +17,7 @@
#include <wlr/backend/interface.h>
#include <wlr/types/wlr_linux_dmabuf_v1.h>
#include "render/dmabuf.h"
#include "render/pixel_format.h"
#include "render/vulkan.h"
#include "render/vulkan/shaders/common.vert.h"
@ -772,6 +773,49 @@ static bool vulkan_begin(struct wlr_renderer *wlr_renderer,
return true;
}
static bool vulkan_sync_render_buffer(struct wlr_vk_renderer *renderer,
struct wlr_vk_command_buffer *cb) {
VkResult res;
if (!renderer->dev->implicit_sync_interop) {
// We have no choice but to block here sadly
return wait_command_buffer(cb, renderer);
}
struct wlr_dmabuf_attributes dmabuf = {0};
if (!wlr_buffer_get_dmabuf(renderer->current_render_buffer->wlr_buffer,
&dmabuf)) {
wlr_log(WLR_ERROR, "wlr_buffer_get_dmabuf failed");
return false;
}
// Note: vkGetSemaphoreFdKHR implicitly resets the semaphore
const VkSemaphoreGetFdInfoKHR get_fence_fd_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
.semaphore = cb->binary_semaphore,
.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
};
int sync_file_fd = -1;
res = renderer->dev->api.getSemaphoreFdKHR(renderer->dev->dev,
&get_fence_fd_info, &sync_file_fd);
if (res != VK_SUCCESS) {
wlr_vk_error("vkGetSemaphoreFdKHR", res);
return false;
}
for (int i = 0; i < dmabuf.n_planes; i++) {
if (!dmabuf_import_sync_file(dmabuf.fd[i], DMA_BUF_SYNC_WRITE,
sync_file_fd)) {
close(sync_file_fd);
return false;
}
}
close(sync_file_fd);
return true;
}
static void vulkan_end(struct wlr_renderer *wlr_renderer) {
struct wlr_vk_renderer *renderer = vulkan_get_renderer(wlr_renderer);
assert(renderer->current_render_buffer);
@ -932,10 +976,35 @@ static void vulkan_end(struct wlr_renderer *wlr_renderer) {
return;
}
size_t render_signal_len = 1;
VkSemaphore render_signal[2] = { renderer->timeline_semaphore };
uint64_t render_signal_timeline_points[2] = { render_timeline_point };
if (renderer->dev->implicit_sync_interop) {
if (render_cb->binary_semaphore == VK_NULL_HANDLE) {
VkExportSemaphoreCreateInfo export_info = {
.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
.handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
};
VkSemaphoreCreateInfo semaphore_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
.pNext = &export_info,
};
VkResult res = vkCreateSemaphore(renderer->dev->dev, &semaphore_info,
NULL, &render_cb->binary_semaphore);
if (res != VK_SUCCESS) {
wlr_vk_error("vkCreateSemaphore", res);
return;
}
}
render_signal[render_signal_len++] = render_cb->binary_semaphore;
}
VkTimelineSemaphoreSubmitInfoKHR render_timeline_submit_info = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
.signalSemaphoreValueCount = 1,
.pSignalSemaphoreValues = &render_timeline_point,
.signalSemaphoreValueCount = render_signal_len,
.pSignalSemaphoreValues = render_signal_timeline_points,
};
VkSubmitInfo *render_sub = &submit_infos[submit_count];
@ -943,8 +1012,8 @@ static void vulkan_end(struct wlr_renderer *wlr_renderer) {
render_sub->pNext = &render_timeline_submit_info;
render_sub->pCommandBuffers = &render_cb->vk;
render_sub->commandBufferCount = 1u;
render_sub->signalSemaphoreCount = 1;
render_sub->pSignalSemaphores = &renderer->timeline_semaphore,
render_sub->signalSemaphoreCount = render_signal_len;
render_sub->pSignalSemaphores = render_signal,
++submit_count;
VkResult res = vkQueueSubmit(renderer->dev->queue, submit_count,
@ -967,10 +1036,7 @@ static void vulkan_end(struct wlr_renderer *wlr_renderer) {
wl_list_insert(&stage_cb->stage_buffers, &stage_buf->link);
}
// sadly this is required due to the current api/rendering model of wlr
// ideally we could use gpu and cpu in parallel (_without_ the
// implicit synchronization overhead and mess of opengl drivers)
if (!wait_command_buffer(render_cb, renderer)) {
if (!vulkan_sync_render_buffer(renderer, render_cb)) {
return;
}
}
@ -1173,6 +1239,9 @@ static void vulkan_destroy(struct wlr_renderer *wlr_renderer) {
continue;
}
release_command_buffer_resources(cb, renderer);
if (cb->binary_semaphore != VK_NULL_HANDLE) {
vkDestroySemaphore(renderer->dev->dev, cb->binary_semaphore, NULL);
}
}
// stage.cb automatically freed with command pool

View File

@ -10,6 +10,7 @@
#include <wlr/util/log.h>
#include <wlr/version.h>
#include <wlr/config.h>
#include "render/dmabuf.h"
#include "render/vulkan.h"
#if defined(__linux__)
@ -416,6 +417,7 @@ struct wlr_vk_device *vulkan_device_create(struct wlr_vk_instance *ini,
// image_drm_format_modifier extensions.
const char *extensions[] = {
VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME,
VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, // or vulkan 1.2
VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME,
VK_EXT_QUEUE_FAMILY_FOREIGN_EXTENSION_NAME,
@ -453,6 +455,35 @@ struct wlr_vk_device *vulkan_device_create(struct wlr_vk_instance *ini,
assert(graphics_found);
}
const VkPhysicalDeviceExternalSemaphoreInfo ext_semaphore_info = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_SEMAPHORE_INFO,
.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
};
VkExternalSemaphoreProperties ext_semaphore_props = {
.sType = VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES,
};
vkGetPhysicalDeviceExternalSemaphoreProperties(phdev,
&ext_semaphore_info, &ext_semaphore_props);
bool exportable_semaphore = ext_semaphore_props.externalSemaphoreFeatures &
VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT;
if (!exportable_semaphore) {
wlr_log(WLR_DEBUG, "VkSemaphore is not exportable to a sync_file");
}
bool dmabuf_sync_file_import_export = dmabuf_check_sync_file_import_export();
if (!dmabuf_sync_file_import_export) {
wlr_log(WLR_DEBUG, "DMA-BUF sync_file import/export not supported");
}
dev->implicit_sync_interop =
exportable_semaphore && dmabuf_sync_file_import_export;
if (dev->implicit_sync_interop) {
wlr_log(WLR_DEBUG, "Implicit sync interop supported");
} else {
wlr_log(WLR_INFO, "Implicit sync interop not supported, "
"falling back to blocking");
}
const float prio = 1.f;
VkDeviceQueueCreateInfo qinfo = {
.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
@ -487,6 +518,7 @@ struct wlr_vk_device *vulkan_device_create(struct wlr_vk_instance *ini,
load_device_proc(dev, "vkWaitSemaphoresKHR", &dev->api.waitSemaphoresKHR);
load_device_proc(dev, "vkGetSemaphoreCounterValueKHR",
&dev->api.getSemaphoreCounterValueKHR);
load_device_proc(dev, "vkGetSemaphoreFdKHR", &dev->api.getSemaphoreFdKHR);
// - check device format support -
size_t max_fmts;