From d2238bf722d9a83156077473a5f5632b751d803a Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Thu, 26 May 2022 14:45:15 +0200 Subject: [PATCH] render/vulkan: import semaphore to DMA-BUF instead of blocking Right now the Vulkan renderer blocks until the frame is complete after rendering. This is necessary because Vulkan doesn't interoperate well with implicit sync we use everywhere else. Use the new kernel API to import a sync_file into a DMA-BUF to avoid blocking. --- include/render/vulkan.h | 6 +++ render/vulkan/renderer.c | 85 ++++++++++++++++++++++++++++++++++++---- render/vulkan/vulkan.c | 32 +++++++++++++++ 3 files changed, 115 insertions(+), 8 deletions(-) diff --git a/include/render/vulkan.h b/include/render/vulkan.h index d21a7ca0..93e492cf 100644 --- a/include/render/vulkan.h +++ b/include/render/vulkan.h @@ -38,6 +38,8 @@ struct wlr_vk_device { int drm_fd; + bool implicit_sync_interop; + // we only ever need one queue for rendering and transfer commands uint32_t queue_family; VkQueue queue; @@ -46,6 +48,7 @@ struct wlr_vk_device { PFN_vkGetMemoryFdPropertiesKHR getMemoryFdPropertiesKHR; PFN_vkWaitSemaphoresKHR waitSemaphoresKHR; PFN_vkGetSemaphoreCounterValueKHR getSemaphoreCounterValueKHR; + PFN_vkGetSemaphoreFdKHR getSemaphoreFdKHR; } api; uint32_t format_prop_count; @@ -152,6 +155,9 @@ struct wlr_vk_command_buffer { struct wl_list destroy_textures; // wlr_vk_texture.destroy_link // Staging shared buffers to release after the command buffer completes struct wl_list stage_buffers; // wlr_vk_shared_buffer.link + + // For DMA-BUF implicit sync interop, may be NULL + VkSemaphore binary_semaphore; }; #define VULKAN_COMMAND_BUFFERS_CAP 64 diff --git a/render/vulkan/renderer.c b/render/vulkan/renderer.c index 91667405..35361813 100644 --- a/render/vulkan/renderer.c +++ b/render/vulkan/renderer.c @@ -17,6 +17,7 @@ #include #include +#include "render/dmabuf.h" #include "render/pixel_format.h" #include "render/vulkan.h" #include "render/vulkan/shaders/common.vert.h" @@ -772,6 +773,49 @@ static bool vulkan_begin(struct wlr_renderer *wlr_renderer, return true; } +static bool vulkan_sync_render_buffer(struct wlr_vk_renderer *renderer, + struct wlr_vk_command_buffer *cb) { + VkResult res; + + if (!renderer->dev->implicit_sync_interop) { + // We have no choice but to block here sadly + return wait_command_buffer(cb, renderer); + } + + struct wlr_dmabuf_attributes dmabuf = {0}; + if (!wlr_buffer_get_dmabuf(renderer->current_render_buffer->wlr_buffer, + &dmabuf)) { + wlr_log(WLR_ERROR, "wlr_buffer_get_dmabuf failed"); + return false; + } + + // Note: vkGetSemaphoreFdKHR implicitly resets the semaphore + const VkSemaphoreGetFdInfoKHR get_fence_fd_info = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, + .semaphore = cb->binary_semaphore, + .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, + }; + int sync_file_fd = -1; + res = renderer->dev->api.getSemaphoreFdKHR(renderer->dev->dev, + &get_fence_fd_info, &sync_file_fd); + if (res != VK_SUCCESS) { + wlr_vk_error("vkGetSemaphoreFdKHR", res); + return false; + } + + for (int i = 0; i < dmabuf.n_planes; i++) { + if (!dmabuf_import_sync_file(dmabuf.fd[i], DMA_BUF_SYNC_WRITE, + sync_file_fd)) { + close(sync_file_fd); + return false; + } + } + + close(sync_file_fd); + + return true; +} + static void vulkan_end(struct wlr_renderer *wlr_renderer) { struct wlr_vk_renderer *renderer = vulkan_get_renderer(wlr_renderer); assert(renderer->current_render_buffer); @@ -932,10 +976,35 @@ static void vulkan_end(struct wlr_renderer *wlr_renderer) { return; } + size_t render_signal_len = 1; + VkSemaphore render_signal[2] = { renderer->timeline_semaphore }; + uint64_t render_signal_timeline_points[2] = { render_timeline_point }; + + if (renderer->dev->implicit_sync_interop) { + if (render_cb->binary_semaphore == VK_NULL_HANDLE) { + VkExportSemaphoreCreateInfo export_info = { + .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, + .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, + }; + VkSemaphoreCreateInfo semaphore_info = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = &export_info, + }; + VkResult res = vkCreateSemaphore(renderer->dev->dev, &semaphore_info, + NULL, &render_cb->binary_semaphore); + if (res != VK_SUCCESS) { + wlr_vk_error("vkCreateSemaphore", res); + return; + } + } + + render_signal[render_signal_len++] = render_cb->binary_semaphore; + } + VkTimelineSemaphoreSubmitInfoKHR render_timeline_submit_info = { .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, - .signalSemaphoreValueCount = 1, - .pSignalSemaphoreValues = &render_timeline_point, + .signalSemaphoreValueCount = render_signal_len, + .pSignalSemaphoreValues = render_signal_timeline_points, }; VkSubmitInfo *render_sub = &submit_infos[submit_count]; @@ -943,8 +1012,8 @@ static void vulkan_end(struct wlr_renderer *wlr_renderer) { render_sub->pNext = &render_timeline_submit_info; render_sub->pCommandBuffers = &render_cb->vk; render_sub->commandBufferCount = 1u; - render_sub->signalSemaphoreCount = 1; - render_sub->pSignalSemaphores = &renderer->timeline_semaphore, + render_sub->signalSemaphoreCount = render_signal_len; + render_sub->pSignalSemaphores = render_signal, ++submit_count; VkResult res = vkQueueSubmit(renderer->dev->queue, submit_count, @@ -967,10 +1036,7 @@ static void vulkan_end(struct wlr_renderer *wlr_renderer) { wl_list_insert(&stage_cb->stage_buffers, &stage_buf->link); } - // sadly this is required due to the current api/rendering model of wlr - // ideally we could use gpu and cpu in parallel (_without_ the - // implicit synchronization overhead and mess of opengl drivers) - if (!wait_command_buffer(render_cb, renderer)) { + if (!vulkan_sync_render_buffer(renderer, render_cb)) { return; } } @@ -1173,6 +1239,9 @@ static void vulkan_destroy(struct wlr_renderer *wlr_renderer) { continue; } release_command_buffer_resources(cb, renderer); + if (cb->binary_semaphore != VK_NULL_HANDLE) { + vkDestroySemaphore(renderer->dev->dev, cb->binary_semaphore, NULL); + } } // stage.cb automatically freed with command pool diff --git a/render/vulkan/vulkan.c b/render/vulkan/vulkan.c index 64d021c2..1791a21e 100644 --- a/render/vulkan/vulkan.c +++ b/render/vulkan/vulkan.c @@ -10,6 +10,7 @@ #include #include #include +#include "render/dmabuf.h" #include "render/vulkan.h" #if defined(__linux__) @@ -416,6 +417,7 @@ struct wlr_vk_device *vulkan_device_create(struct wlr_vk_instance *ini, // image_drm_format_modifier extensions. const char *extensions[] = { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, + VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, // or vulkan 1.2 VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, VK_EXT_QUEUE_FAMILY_FOREIGN_EXTENSION_NAME, @@ -453,6 +455,35 @@ struct wlr_vk_device *vulkan_device_create(struct wlr_vk_instance *ini, assert(graphics_found); } + const VkPhysicalDeviceExternalSemaphoreInfo ext_semaphore_info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_SEMAPHORE_INFO, + .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, + }; + VkExternalSemaphoreProperties ext_semaphore_props = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES, + }; + vkGetPhysicalDeviceExternalSemaphoreProperties(phdev, + &ext_semaphore_info, &ext_semaphore_props); + bool exportable_semaphore = ext_semaphore_props.externalSemaphoreFeatures & + VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT; + if (!exportable_semaphore) { + wlr_log(WLR_DEBUG, "VkSemaphore is not exportable to a sync_file"); + } + + bool dmabuf_sync_file_import_export = dmabuf_check_sync_file_import_export(); + if (!dmabuf_sync_file_import_export) { + wlr_log(WLR_DEBUG, "DMA-BUF sync_file import/export not supported"); + } + + dev->implicit_sync_interop = + exportable_semaphore && dmabuf_sync_file_import_export; + if (dev->implicit_sync_interop) { + wlr_log(WLR_DEBUG, "Implicit sync interop supported"); + } else { + wlr_log(WLR_INFO, "Implicit sync interop not supported, " + "falling back to blocking"); + } + const float prio = 1.f; VkDeviceQueueCreateInfo qinfo = { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, @@ -487,6 +518,7 @@ struct wlr_vk_device *vulkan_device_create(struct wlr_vk_instance *ini, load_device_proc(dev, "vkWaitSemaphoresKHR", &dev->api.waitSemaphoresKHR); load_device_proc(dev, "vkGetSemaphoreCounterValueKHR", &dev->api.getSemaphoreCounterValueKHR); + load_device_proc(dev, "vkGetSemaphoreFdKHR", &dev->api.getSemaphoreFdKHR); // - check device format support - size_t max_fmts;