Skip to content

Commit c40bd7d

Browse files
davejiangvinodkoul
authored andcommitted
dmaengine: idxd: process user page faults for completion record
DSA supports page fault handling through PRS. However, the DMA engine that's processing the descriptor is blocked until the PRS response is received. Other workqueues sharing the engine are also blocked. Page fault handing by the driver with PRS disabled can be used to mitigate the stalling. With PRS disabled while ATS remain enabled, DSA handles page faults on a completion record by reporting an event in the event log. In this instance, the descriptor is completed and the event log contains the completion record address and the contents of the completion record. Add support to the event log handling code to fault in the completion record and copy the content of the completion record to user memory. A bitmap is introduced to keep track of discarded event log entries. When the user process initiates ->release() of the char device, it no longer is interested in any remaining event log entries tied to the relevant wq and PASID. The driver will mark the event log entry index in the bitmap. Upon encountering the entries during processing, the event log handler will just clear the bitmap bit and skip the entry rather than attempt to process the event log entry. Tested-by: Tony Zhu <[email protected]> Signed-off-by: Dave Jiang <[email protected]> Co-developed-by: Fenghua Yu <[email protected]> Signed-off-by: Fenghua Yu <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Vinod Koul <[email protected]>
1 parent b022f59 commit c40bd7d

File tree

6 files changed

+137
-7
lines changed

6 files changed

+137
-7
lines changed

drivers/dma/idxd/cdev.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,35 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
164164
return rc;
165165
}
166166

167+
static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid)
168+
{
169+
struct idxd_device *idxd = wq->idxd;
170+
struct idxd_evl *evl = idxd->evl;
171+
union evl_status_reg status;
172+
u16 h, t, size;
173+
int ent_size = evl_ent_size(idxd);
174+
struct __evl_entry *entry_head;
175+
176+
if (!evl)
177+
return;
178+
179+
spin_lock(&evl->lock);
180+
status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET);
181+
t = status.tail;
182+
h = evl->head;
183+
size = evl->size;
184+
185+
while (h != t) {
186+
entry_head = (struct __evl_entry *)(evl->log + (h * ent_size));
187+
if (entry_head->pasid == pasid && entry_head->wq_idx == wq->id)
188+
set_bit(h, evl->bmap);
189+
h = (h + 1) % size;
190+
}
191+
spin_unlock(&evl->lock);
192+
193+
drain_workqueue(wq->wq);
194+
}
195+
167196
static int idxd_cdev_release(struct inode *node, struct file *filep)
168197
{
169198
struct idxd_user_context *ctx = filep->private_data;
@@ -190,6 +219,7 @@ static int idxd_cdev_release(struct inode *node, struct file *filep)
190219
}
191220

192221
if (ctx->sva) {
222+
idxd_cdev_evl_drain_pasid(wq, ctx->pasid);
193223
iommu_sva_unbind_device(ctx->sva);
194224
idxd_xa_pasid_remove(ctx);
195225
}

drivers/dma/idxd/device.c

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -762,25 +762,37 @@ static int idxd_device_evl_setup(struct idxd_device *idxd)
762762
dma_addr_t dma_addr;
763763
int size;
764764
struct idxd_evl *evl = idxd->evl;
765+
unsigned long *bmap;
766+
int rc;
765767

766768
if (!evl)
767769
return 0;
768770

769771
size = evl_size(idxd);
772+
773+
bmap = bitmap_zalloc(size, GFP_KERNEL);
774+
if (!bmap) {
775+
rc = -ENOMEM;
776+
goto err_bmap;
777+
}
778+
770779
/*
771780
* Address needs to be page aligned. However, dma_alloc_coherent() provides
772781
* at minimal page size aligned address. No manual alignment required.
773782
*/
774783
addr = dma_alloc_coherent(dev, size, &dma_addr, GFP_KERNEL);
775-
if (!addr)
776-
return -ENOMEM;
784+
if (!addr) {
785+
rc = -ENOMEM;
786+
goto err_alloc;
787+
}
777788

778789
memset(addr, 0, size);
779790

780791
spin_lock(&evl->lock);
781792
evl->log = addr;
782793
evl->dma = dma_addr;
783794
evl->log_size = size;
795+
evl->bmap = bmap;
784796

785797
memset(&evlcfg, 0, sizeof(evlcfg));
786798
evlcfg.bits[0] = dma_addr & GENMASK(63, 12);
@@ -799,6 +811,11 @@ static int idxd_device_evl_setup(struct idxd_device *idxd)
799811

800812
spin_unlock(&evl->lock);
801813
return 0;
814+
815+
err_alloc:
816+
bitmap_free(bmap);
817+
err_bmap:
818+
return rc;
802819
}
803820

804821
static void idxd_device_evl_free(struct idxd_device *idxd)
@@ -824,6 +841,7 @@ static void idxd_device_evl_free(struct idxd_device *idxd)
824841
iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET + 8);
825842

826843
dma_free_coherent(dev, evl->log_size, evl->log, evl->dma);
844+
bitmap_free(evl->bmap);
827845
evl->log = NULL;
828846
evl->size = IDXD_EVL_SIZE_MIN;
829847
spin_unlock(&evl->lock);

drivers/dma/idxd/idxd.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ struct idxd_driver_data {
264264
struct device_type *dev_type;
265265
int compl_size;
266266
int align;
267+
int evl_cr_off;
267268
};
268269

269270
struct idxd_evl {
@@ -276,6 +277,7 @@ struct idxd_evl {
276277
/* The number of entries in the event log. */
277278
u16 size;
278279
u16 head;
280+
unsigned long *bmap;
279281
};
280282

281283
struct idxd_evl_fault {

drivers/dma/idxd/init.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,15 @@ static struct idxd_driver_data idxd_driver_data[] = {
4646
.compl_size = sizeof(struct dsa_completion_record),
4747
.align = 32,
4848
.dev_type = &dsa_device_type,
49+
.evl_cr_off = offsetof(struct dsa_evl_entry, cr),
4950
},
5051
[IDXD_TYPE_IAX] = {
5152
.name_prefix = "iax",
5253
.type = IDXD_TYPE_IAX,
5354
.compl_size = sizeof(struct iax_completion_record),
5455
.align = 64,
5556
.dev_type = &iax_device_type,
57+
.evl_cr_off = offsetof(struct iax_evl_entry, cr),
5658
},
5759
};
5860

drivers/dma/idxd/irq.c

Lines changed: 82 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
#include <linux/io-64-nonatomic-lo-hi.h>
88
#include <linux/dmaengine.h>
99
#include <linux/delay.h>
10+
#include <linux/iommu.h>
11+
#include <linux/sched/mm.h>
1012
#include <uapi/linux/idxd.h>
1113
#include "../dmaengine.h"
1214
#include "idxd.h"
@@ -217,14 +219,89 @@ static void idxd_int_handle_revoke(struct work_struct *work)
217219
kfree(revoke);
218220
}
219221

220-
static void process_evl_entry(struct idxd_device *idxd, struct __evl_entry *entry_head)
222+
static void idxd_evl_fault_work(struct work_struct *work)
223+
{
224+
struct idxd_evl_fault *fault = container_of(work, struct idxd_evl_fault, work);
225+
struct idxd_wq *wq = fault->wq;
226+
struct idxd_device *idxd = wq->idxd;
227+
struct device *dev = &idxd->pdev->dev;
228+
struct __evl_entry *entry_head = fault->entry;
229+
void *cr = (void *)entry_head + idxd->data->evl_cr_off;
230+
int cr_size = idxd->data->compl_size, copied;
231+
232+
switch (fault->status) {
233+
case DSA_COMP_CRA_XLAT:
234+
case DSA_COMP_DRAIN_EVL:
235+
/*
236+
* Copy completion record to fault_addr in user address space
237+
* that is found by wq and PASID.
238+
*/
239+
copied = idxd_copy_cr(wq, entry_head->pasid,
240+
entry_head->fault_addr,
241+
cr, cr_size);
242+
/*
243+
* The task that triggered the page fault is unknown currently
244+
* because multiple threads may share the user address
245+
* space or the task exits already before this fault.
246+
* So if the copy fails, SIGSEGV can not be sent to the task.
247+
* Just print an error for the failure. The user application
248+
* waiting for the completion record will time out on this
249+
* failure.
250+
*/
251+
if (copied != cr_size) {
252+
dev_dbg_ratelimited(dev, "Failed to write to completion record. (%d:%d)\n",
253+
cr_size, copied);
254+
}
255+
break;
256+
default:
257+
dev_dbg_ratelimited(dev, "Unrecognized error code: %#x\n",
258+
DSA_COMP_STATUS(entry_head->error));
259+
break;
260+
}
261+
262+
kmem_cache_free(idxd->evl_cache, fault);
263+
}
264+
265+
static void process_evl_entry(struct idxd_device *idxd,
266+
struct __evl_entry *entry_head, unsigned int index)
221267
{
222268
struct device *dev = &idxd->pdev->dev;
269+
struct idxd_evl *evl = idxd->evl;
223270
u8 status;
224271

225-
status = DSA_COMP_STATUS(entry_head->error);
226-
dev_warn_ratelimited(dev, "Device error %#x operation: %#x fault addr: %#llx\n",
227-
status, entry_head->operation, entry_head->fault_addr);
272+
if (test_bit(index, evl->bmap)) {
273+
clear_bit(index, evl->bmap);
274+
} else {
275+
status = DSA_COMP_STATUS(entry_head->error);
276+
277+
if (status == DSA_COMP_CRA_XLAT || status == DSA_COMP_DRAIN_EVL) {
278+
struct idxd_evl_fault *fault;
279+
int ent_size = evl_ent_size(idxd);
280+
281+
if (entry_head->rci)
282+
dev_dbg(dev, "Completion Int Req set, ignoring!\n");
283+
284+
if (!entry_head->rcr && status == DSA_COMP_DRAIN_EVL)
285+
return;
286+
287+
fault = kmem_cache_alloc(idxd->evl_cache, GFP_ATOMIC);
288+
if (fault) {
289+
struct idxd_wq *wq = idxd->wqs[entry_head->wq_idx];
290+
291+
fault->wq = wq;
292+
fault->status = status;
293+
memcpy(&fault->entry, entry_head, ent_size);
294+
INIT_WORK(&fault->work, idxd_evl_fault_work);
295+
queue_work(wq->wq, &fault->work);
296+
} else {
297+
dev_warn(dev, "Failed to service fault work.\n");
298+
}
299+
} else {
300+
dev_warn_ratelimited(dev, "Device error %#x operation: %#x fault addr: %#llx\n",
301+
status, entry_head->operation,
302+
entry_head->fault_addr);
303+
}
304+
}
228305
}
229306

230307
static void process_evl_entries(struct idxd_device *idxd)
@@ -250,7 +327,7 @@ static void process_evl_entries(struct idxd_device *idxd)
250327

251328
while (h != t) {
252329
entry_head = (struct __evl_entry *)(evl->log + (h * ent_size));
253-
process_evl_entry(idxd, entry_head);
330+
process_evl_entry(idxd, entry_head, h);
254331
h = (h + 1) % size;
255332
}
256333

include/uapi/linux/idxd.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ enum dsa_completion_status {
135135
DSA_COMP_HW_ERR1,
136136
DSA_COMP_HW_ERR_DRB,
137137
DSA_COMP_TRANSLATION_FAIL,
138+
DSA_COMP_DRAIN_EVL = 0x26,
138139
};
139140

140141
enum iax_completion_status {

0 commit comments

Comments
 (0)