From e20eaa2382e7888a4e06ccb015c476a6fb1fda0c Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Thu, 23 Nov 2017 16:26:35 +0800 Subject: vfio: ABI for mdev display dma-buf operation Add VFIO_DEVICE_QUERY_GFX_PLANE ioctl command to let user query and get a plane and its information. So far, two types of buffers are supported: buffers based on dma-buf and buffers based on region. This ioctl can be invoked with: 1) Either DMABUF or REGION flag. Vendor driver returns a plane_info successfully only when the specific kind of buffer is supported. 2) Flag PROBE. And at the same time either DMABUF or REGION must be set, so that vendor driver returns success only when the specific kind of buffer is supported. Add VFIO_DEVICE_GET_GFX_DMABUF ioctl command to let user get a specific dma-buf fd of an exposed MDEV buffer provided by dmabuf_id which was returned in VFIO_DEVICE_QUERY_GFX_PLANE ioctl command. The life cycle of an exposed MDEV buffer is handled by userspace and tracked by kernel space. The returned dmabuf_id in struct vfio_device_ query_gfx_plane can be a new id of a new exposed buffer or an old id of a re-exported buffer. Host user can check the value of dmabuf_id to see if it needs to create new resources according to the new exposed buffer or just re-use the existing resource related to the old buffer. v18: - update comments for VFIO_DEVICE_GET_GFX_DMABUF. (Alex) v17: - modify VFIO_DEVICE_GET_GFX_DMABUF interface. (Alex) v16: - add x_hot and y_hot fields. (Gerd) - add comments for VFIO_DEVICE_GET_GFX_DMABUF. (Alex) - rebase to 4.14.0-rc6. v15: - add a ioctl to get a dmabuf for a given dmabuf id. (Gerd) v14: - add PROBE, DMABUF and REGION flags. (Alex) v12: - add drm_format_mod back. (Gerd and Zhenyu) - add region_index. (Gerd) v11: - rename plane_type to drm_plane_type. (Gerd) - move fields of vfio_device_query_gfx_plane to vfio_device_gfx_plane_info. (Gerd) - remove drm_format_mod, start fields. (Daniel) - remove plane_id. v10: - refine the ABI API VFIO_DEVICE_QUERY_GFX_PLANE. (Alex) (Gerd) v3: - add a field gvt_plane_info in the drm_i915_gem_obj structure to save the decoded plane information to avoid look up while need the plane info. (Gerd) Signed-off-by: Tina Zhang Reviewed-by: Gerd Hoffmann Reviewed-by: Kirti Wankhede Acked-by: Alex Williamson Cc: Daniel Vetter Signed-off-by: Zhenyu Wang --- include/uapi/linux/vfio.h | 62 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index ae461050661a..5c1cca2ba04d 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -502,6 +502,68 @@ struct vfio_pci_hot_reset { #define VFIO_DEVICE_PCI_HOT_RESET _IO(VFIO_TYPE, VFIO_BASE + 13) +/** + * VFIO_DEVICE_QUERY_GFX_PLANE - _IOW(VFIO_TYPE, VFIO_BASE + 14, + * struct vfio_device_query_gfx_plane) + * + * Set the drm_plane_type and flags, then retrieve the gfx plane info. + * + * flags supported: + * - VFIO_GFX_PLANE_TYPE_PROBE and VFIO_GFX_PLANE_TYPE_DMABUF are set + * to ask if the mdev supports dma-buf. 0 on support, -EINVAL on no + * support for dma-buf. + * - VFIO_GFX_PLANE_TYPE_PROBE and VFIO_GFX_PLANE_TYPE_REGION are set + * to ask if the mdev supports region. 0 on support, -EINVAL on no + * support for region. + * - VFIO_GFX_PLANE_TYPE_DMABUF or VFIO_GFX_PLANE_TYPE_REGION is set + * with each call to query the plane info. + * - Others are invalid and return -EINVAL. + * + * Note: + * 1. Plane could be disabled by guest. In that case, success will be + * returned with zero-initialized drm_format, size, width and height + * fields. + * 2. x_hot/y_hot is set to 0xFFFFFFFF if no hotspot information available + * + * Return: 0 on success, -errno on other failure. + */ +struct vfio_device_gfx_plane_info { + __u32 argsz; + __u32 flags; +#define VFIO_GFX_PLANE_TYPE_PROBE (1 << 0) +#define VFIO_GFX_PLANE_TYPE_DMABUF (1 << 1) +#define VFIO_GFX_PLANE_TYPE_REGION (1 << 2) + /* in */ + __u32 drm_plane_type; /* type of plane: DRM_PLANE_TYPE_* */ + /* out */ + __u32 drm_format; /* drm format of plane */ + __u64 drm_format_mod; /* tiled mode */ + __u32 width; /* width of plane */ + __u32 height; /* height of plane */ + __u32 stride; /* stride of plane */ + __u32 size; /* size of plane in bytes, align on page*/ + __u32 x_pos; /* horizontal position of cursor plane */ + __u32 y_pos; /* vertical position of cursor plane*/ + __u32 x_hot; /* horizontal position of cursor hotspot */ + __u32 y_hot; /* vertical position of cursor hotspot */ + union { + __u32 region_index; /* region index */ + __u32 dmabuf_id; /* dma-buf id */ + }; +}; + +#define VFIO_DEVICE_QUERY_GFX_PLANE _IO(VFIO_TYPE, VFIO_BASE + 14) + +/** + * VFIO_DEVICE_GET_GFX_DMABUF - _IOW(VFIO_TYPE, VFIO_BASE + 15, __u32) + * + * Return a new dma-buf file descriptor for an exposed guest framebuffer + * described by the provided dmabuf_id. The dmabuf_id is returned from VFIO_ + * DEVICE_QUERY_GFX_PLANE as a token of the exposed guest framebuffer. + */ + +#define VFIO_DEVICE_GET_GFX_DMABUF _IO(VFIO_TYPE, VFIO_BASE + 15) + /* -------- API for Type1 VFIO IOMMU -------- */ /** -- cgit v1.2.3 From 373d7080896a3cb3b28ae3a2abdafb7bb87552b1 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 14 Nov 2017 16:41:19 -0500 Subject: drm/amdkfd: Add CWSR support This hardware feature allows the GPU to preempt shader execution in the middle of a compute wave, save the state and restore it later to resume execution. Memory for saving the state is allocated per queue in user mode and the address and size passed to the create_queue ioctl. The size depends on the number of waves that can be in flight simultaneously on a given ASIC. Signed-off-by: Shaoyun.liu Signed-off-by: Yong Zhao Signed-off-by: Felix Kuehling Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 +- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 20 ++++- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 6 ++ drivers/gpu/drm/amd/amdkfd/kfd_module.c | 4 + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 27 +++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 31 +++++++- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 87 +++++++++++++++++++++- include/uapi/linux/kfd_ioctl.h | 3 +- 8 files changed, 179 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 505d39156acd..2a4612d8437a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -117,7 +117,7 @@ static int kfd_open(struct inode *inode, struct file *filep) return -EPERM; } - process = kfd_create_process(current); + process = kfd_create_process(filep); if (IS_ERR(process)) return PTR_ERR(process); @@ -206,6 +206,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->ctx_save_restore_area_address = args->ctx_save_restore_address; q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size; + q_properties->ctl_stack_size = args->ctl_stack_size; if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) q_properties->type = KFD_QUEUE_TYPE_COMPUTE; @@ -1088,6 +1089,10 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) KFD_MMAP_EVENTS_MASK) { vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK; return kfd_event_mmap(process, vma); + } else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) == + KFD_MMAP_RESERVED_MEM_MASK) { + vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK; + return kfd_reserved_mem_mmap(process, vma); } return -EFAULT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 621a3b53a038..4f05eacca786 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -27,6 +27,7 @@ #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_pm4_headers_vi.h" +#include "cwsr_trap_handler_gfx8.asm" #define MQD_SIZE_ALIGNED 768 @@ -38,7 +39,8 @@ static const struct kfd_device_info kaveri_device_info = { .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = false, }; static const struct kfd_device_info carrizo_device_info = { @@ -49,7 +51,8 @@ static const struct kfd_device_info carrizo_device_info = { .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, }; struct kfd_deviceid { @@ -212,6 +215,17 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, return AMD_IOMMU_INV_PRI_RSP_INVALID; } +static void kfd_cwsr_init(struct kfd_dev *kfd) +{ + if (cwsr_enable && kfd->device_info->supports_cwsr) { + BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); + + kfd->cwsr_isa = cwsr_trap_gfx8_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); + kfd->cwsr_enabled = true; + } +} + bool kgd2kfd_device_init(struct kfd_dev *kfd, const struct kgd2kfd_shared_resources *gpu_resources) { @@ -286,6 +300,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto device_iommu_pasid_error; } + kfd_cwsr_init(kfd); + if (kfd_resume(kfd)) goto kfd_resume_error; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index e202921c150e..5c065024e285 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -173,6 +173,9 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, *allocated_vmid = qpd->vmid; q->properties.vmid = qpd->vmid; + q->properties.tba_addr = qpd->tba_addr; + q->properties.tma_addr = qpd->tma_addr; + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) retval = create_compute_queue_nocpsch(dqm, q, qpd); else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) @@ -846,6 +849,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, } dqm->asic_ops.init_sdma_vm(dqm, q, qpd); + + q->properties.tba_addr = qpd->tba_addr; + q->properties.tma_addr = qpd->tma_addr; retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index f744caeaee04..ee8adf654cd0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -50,6 +50,10 @@ module_param(sched_policy, int, 0444); MODULE_PARM_DESC(sched_policy, "Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)"); +int cwsr_enable = 1; +module_param(cwsr_enable, int, 0444); +MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))"); + int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT; module_param(max_num_of_queues_per_device, int, 0444); MODULE_PARM_DESC(max_num_of_queues_per_device, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 2ba7cea7b99b..00e1f1a9728b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -89,6 +89,28 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, if (q->format == KFD_QUEUE_FORMAT_AQL) m->cp_hqd_iq_rptr = 1; + if (q->tba_addr) { + m->compute_tba_lo = lower_32_bits(q->tba_addr >> 8); + m->compute_tba_hi = upper_32_bits(q->tba_addr >> 8); + m->compute_tma_lo = lower_32_bits(q->tma_addr >> 8); + m->compute_tma_hi = upper_32_bits(q->tma_addr >> 8); + m->compute_pgm_rsrc2 |= + (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT); + } + + if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) { + m->cp_hqd_persistent_state |= + (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT); + m->cp_hqd_ctx_save_base_addr_lo = + lower_32_bits(q->ctx_save_restore_area_address); + m->cp_hqd_ctx_save_base_addr_hi = + upper_32_bits(q->ctx_save_restore_area_address); + m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size; + m->cp_hqd_cntl_stack_size = q->ctl_stack_size; + m->cp_hqd_cntl_stack_offset = q->ctl_stack_size; + m->cp_hqd_wg_state_offset = q->ctl_stack_size; + } + *mqd = m; if (gart_addr) *gart_addr = addr; @@ -167,6 +189,11 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT; } + if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) + m->cp_hqd_ctx_save_control = + atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT | + mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT; + q->is_active = (q->queue_size > 0 && q->queue_address != 0 && q->queue_percent > 0); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 47504737ab4a..a66876467995 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -41,6 +41,7 @@ #define KFD_MMAP_DOORBELL_MASK 0x8000000000000 #define KFD_MMAP_EVENTS_MASK 0x4000000000000 +#define KFD_MMAP_RESERVED_MEM_MASK 0x2000000000000 /* * When working with cp scheduler we should assign the HIQ manually or via @@ -62,6 +63,15 @@ #define KFD_MAX_NUM_OF_PROCESSES 512 #define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 +/* + * Size of the per-process TBA+TMA buffer: 2 pages + * + * The first page is the TBA used for the CWSR ISA code. The second + * page is used as TMA for daisy changing a user-mode trap handler. + */ +#define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2) +#define KFD_CWSR_TMA_OFFSET PAGE_SIZE + /* * Kernel module parameter to specify maximum number of supported queues per * device @@ -78,6 +88,8 @@ extern int max_num_of_queues_per_device; /* Kernel module parameter to specify the scheduling policy */ extern int sched_policy; +extern int cwsr_enable; + /* * Kernel module parameter to specify whether to send sigterm to HSA process on * unhandled exception @@ -131,6 +143,7 @@ struct kfd_device_info { size_t ih_ring_entry_size; uint8_t num_of_watch_points; uint16_t mqd_size_aligned; + bool supports_cwsr; }; struct kfd_mem_obj { @@ -200,6 +213,11 @@ struct kfd_dev { /* Debug manager */ struct kfd_dbgmgr *dbgmgr; + + /* CWSR */ + bool cwsr_enabled; + const void *cwsr_isa; + unsigned int cwsr_isa_size; }; /* KGD2KFD callbacks */ @@ -332,6 +350,9 @@ struct queue_properties { uint32_t eop_ring_buffer_size; uint64_t ctx_save_restore_area_address; uint32_t ctx_save_restore_area_size; + uint32_t ctl_stack_size; + uint64_t tba_addr; + uint64_t tma_addr; }; /** @@ -439,6 +460,11 @@ struct qcm_process_device { uint32_t num_gws; uint32_t num_oac; uint32_t sh_hidden_private_base; + + /* CWSR memory */ + void *cwsr_kaddr; + uint64_t tba_addr; + uint64_t tma_addr; }; @@ -563,7 +589,7 @@ struct amdkfd_ioctl_desc { void kfd_process_create_wq(void); void kfd_process_destroy_wq(void); -struct kfd_process *kfd_create_process(const struct task_struct *); +struct kfd_process *kfd_create_process(struct file *filep); struct kfd_process *kfd_get_process(const struct task_struct *); struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); @@ -577,6 +603,9 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, struct kfd_process *p); +int kfd_reserved_mem_mmap(struct kfd_process *process, + struct vm_area_struct *vma); + /* Process device data iterator */ struct kfd_process_device *kfd_get_first_process_device_data( struct kfd_process *p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 1bb9b2643d5a..39f4c19aaf61 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -28,6 +28,7 @@ #include #include #include +#include struct mm_struct; @@ -53,6 +54,8 @@ struct kfd_process_release_work { static struct kfd_process *find_process(const struct task_struct *thread); static struct kfd_process *create_process(const struct task_struct *thread); +static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep); + void kfd_process_create_wq(void) { @@ -68,9 +71,10 @@ void kfd_process_destroy_wq(void) } } -struct kfd_process *kfd_create_process(const struct task_struct *thread) +struct kfd_process *kfd_create_process(struct file *filep) { struct kfd_process *process; + struct task_struct *thread = current; if (!thread->mm) return ERR_PTR(-EINVAL); @@ -101,6 +105,8 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread) up_write(&thread->mm->mmap_sem); + kfd_process_init_cwsr(process, filep); + return process; } @@ -168,6 +174,11 @@ static void kfd_process_wq_release(struct work_struct *work) amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); list_del(&pdd->per_device_list); + + if (pdd->qpd.cwsr_kaddr) + free_pages((unsigned long)pdd->qpd.cwsr_kaddr, + get_order(KFD_CWSR_TBA_TMA_SIZE)); + kfree(pdd); } @@ -260,6 +271,46 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { .release = kfd_process_notifier_release, }; +static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep) +{ + int err = 0; + unsigned long offset; + struct kfd_process_device *temp, *pdd = NULL; + struct kfd_dev *dev = NULL; + struct qcm_process_device *qpd = NULL; + + mutex_lock(&p->mutex); + list_for_each_entry_safe(pdd, temp, &p->per_device_data, + per_device_list) { + dev = pdd->dev; + qpd = &pdd->qpd; + if (!dev->cwsr_enabled || qpd->cwsr_kaddr) + continue; + offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT; + qpd->tba_addr = (int64_t)vm_mmap(filep, 0, + KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, + MAP_SHARED, offset); + + if (IS_ERR_VALUE(qpd->tba_addr)) { + pr_err("Failure to set tba address. error -%d.\n", + (int)qpd->tba_addr); + err = qpd->tba_addr; + qpd->tba_addr = 0; + qpd->cwsr_kaddr = NULL; + goto out; + } + + memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size); + + qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; + pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n", + qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); + } +out: + mutex_unlock(&p->mutex); + return err; +} + static struct kfd_process *create_process(const struct task_struct *thread) { struct kfd_process *process; @@ -535,3 +586,37 @@ struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) return p; } + +int kfd_reserved_mem_mmap(struct kfd_process *process, + struct vm_area_struct *vma) +{ + struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff); + struct kfd_process_device *pdd; + struct qcm_process_device *qpd; + + if (!dev) + return -EINVAL; + if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) { + pr_err("Incorrect CWSR mapping size.\n"); + return -EINVAL; + } + + pdd = kfd_get_process_device_data(dev, process); + if (!pdd) + return -EINVAL; + qpd = &pdd->qpd; + + qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(KFD_CWSR_TBA_TMA_SIZE)); + if (!qpd->cwsr_kaddr) { + pr_err("Error allocating per process CWSR buffer.\n"); + return -ENOMEM; + } + + vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND + | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP; + /* Mapping pages to user process */ + return remap_pfn_range(vma, vma->vm_start, + PFN_DOWN(__pa(qpd->cwsr_kaddr)), + KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); +} diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 6e80501368ae..f7563ef2e883 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -58,7 +58,8 @@ struct kfd_ioctl_create_queue_args { __u64 eop_buffer_address; /* to KFD */ __u64 eop_buffer_size; /* to KFD */ __u64 ctx_save_restore_address; /* to KFD */ - __u64 ctx_save_restore_size; /* to KFD */ + __u32 ctx_save_restore_size; /* to KFD */ + __u32 ctl_stack_size; /* to KFD */ }; struct kfd_ioctl_destroy_queue_args { -- cgit v1.2.3 From d7b9bd2248d794275b53d34e665f7c5a08c4b396 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 14 Nov 2017 16:41:20 -0500 Subject: drm/amdkfd: Add support for user-mode trap handlers A second-level user mode trap handler can be installed. The CWSR trap handler jumps to the secondary trap handler conditionally for any conditions not handled by it. This can be used e.g. for debugging or catching math exceptions. When CWSR is disabled, the user mode trap handler is installed as first level trap handler. Signed-off-by: Shaoyun.liu Signed-off-by: Jay Cornwall Signed-off-by: Felix Kuehling Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 37 +++++++++++++++++++++- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 22 +++++++++++++ .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 5 +++ include/uapi/linux/kfd_ioctl.h | 12 ++++++- 4 files changed, 74 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 2a4612d8437a..cc61ec289880 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -432,6 +432,38 @@ out: return err; } +static int kfd_ioctl_set_trap_handler(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_set_trap_handler_args *args = data; + struct kfd_dev *dev; + int err = 0; + struct kfd_process_device *pdd; + + dev = kfd_device_by_id(args->gpu_id); + if (dev == NULL) + return -EINVAL; + + mutex_lock(&p->mutex); + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + err = -ESRCH; + goto out; + } + + if (dev->dqm->ops.set_trap_handler(dev->dqm, + &pdd->qpd, + args->tba_addr, + args->tma_addr)) + err = -EINVAL; + +out: + mutex_unlock(&p->mutex); + + return err; +} + static int kfd_ioctl_dbg_register(struct file *filep, struct kfd_process *p, void *data) { @@ -980,7 +1012,10 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { kfd_ioctl_set_scratch_backing_va, 0), AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG, - kfd_ioctl_get_tile_config, 0) + kfd_ioctl_get_tile_config, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER, + kfd_ioctl_set_trap_handler, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 5c065024e285..8447810c9a1e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1116,6 +1116,26 @@ out: return retval; } +static int set_trap_handler(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + uint64_t tba_addr, + uint64_t tma_addr) +{ + uint64_t *tma; + + if (dqm->dev->cwsr_enabled) { + /* Jump from CWSR trap handler to user trap */ + tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET); + tma[0] = tba_addr; + tma[1] = tma_addr; + } else { + qpd->tba_addr = tba_addr; + qpd->tma_addr = tma_addr; + } + + return 0; +} + static int process_termination_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { @@ -1247,6 +1267,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; dqm->ops.set_cache_memory_policy = set_cache_memory_policy; + dqm->ops.set_trap_handler = set_trap_handler; dqm->ops.process_termination = process_termination_cpsch; break; case KFD_SCHED_POLICY_NO_HWS: @@ -1262,6 +1283,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.initialize = initialize_nocpsch; dqm->ops.uninitialize = uninitialize; dqm->ops.set_cache_memory_policy = set_cache_memory_policy; + dqm->ops.set_trap_handler = set_trap_handler; dqm->ops.process_termination = process_termination_nocpsch; break; default: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 5b77cb69f732..8752edf9cd9b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -123,6 +123,11 @@ struct device_queue_manager_ops { void __user *alternate_aperture_base, uint64_t alternate_aperture_size); + int (*set_trap_handler)(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + uint64_t tba_addr, + uint64_t tma_addr); + int (*process_termination)(struct device_queue_manager *dqm, struct qcm_process_device *qpd); }; diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index f7563ef2e883..f4cab5b3ba9a 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -262,6 +262,13 @@ struct kfd_ioctl_get_tile_config_args { */ }; +struct kfd_ioctl_set_trap_handler_args { + uint64_t tba_addr; /* to KFD */ + uint64_t tma_addr; /* to KFD */ + uint32_t gpu_id; /* to KFD */ + uint32_t pad; +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -322,7 +329,10 @@ struct kfd_ioctl_get_tile_config_args { #define AMDKFD_IOC_GET_TILE_CONFIG \ AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args) +#define AMDKFD_IOC_SET_TRAP_HANDLER \ + AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x13 +#define AMDKFD_COMMAND_END 0x14 #endif -- cgit v1.2.3