Skip to content

Commit ea01f94

Browse files
committed
fuse: {uring} Pin the user buffer
This is to allow copying into the buffer from the application without the need to copy in ring context (and with that, the need that the ring task is active in kernel space). Signed-off-by: Bernd Schubert <[email protected]> (cherry picked from commit 43d1a63)
1 parent 3f71501 commit ea01f94

File tree

4 files changed

+214
-16
lines changed

4 files changed

+214
-16
lines changed

fs/fuse/dev.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -752,6 +752,15 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
752752
cs->pipebufs++;
753753
cs->nr_segs++;
754754
}
755+
} else if (cs->ring.pages) {
756+
cs->pg = cs->ring.pages[cs->ring.page_idx++];
757+
/*
758+
* non stricly needed, just to avoid a uring exception in
759+
* fuse_copy_finish
760+
*/
761+
get_page(cs->pg);
762+
cs->len = PAGE_SIZE;
763+
cs->offset = 0;
755764
} else {
756765
size_t off;
757766
err = iov_iter_get_pages2(cs->iter, &page, PAGE_SIZE, 1, &off);

fs/fuse/dev_uring.c

Lines changed: 199 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,16 @@
1111

1212
#include <linux/fs.h>
1313
#include <linux/io_uring/cmd.h>
14+
#include <linux/page-flags.h>
1415

1516
static bool __read_mostly enable_uring;
1617
module_param(enable_uring, bool, 0644);
1718
MODULE_PARM_DESC(enable_uring,
1819
"Enable userspace communication through io-uring");
1920

2021
#define FUSE_URING_IOV_SEGS 2 /* header and payload */
22+
#define FUSE_RING_HEADER_PG 0
23+
#define FUSE_RING_PAYLOAD_PG 1
2124

2225
/* redfs only to allow patch backports */
2326
#define IO_URING_F_TASK_DEAD (1 << 13)
@@ -155,6 +158,21 @@ void fuse_uring_abort_end_requests(struct fuse_ring *ring)
155158
}
156159
}
157160

161+
/*
162+
* Copy from memmap.c, should be exported
163+
*/
164+
static void io_pages_free(struct page ***pages, int npages)
165+
{
166+
struct page **page_array = *pages;
167+
168+
if (!page_array)
169+
return;
170+
171+
unpin_user_pages(page_array, npages);
172+
kvfree(page_array);
173+
*pages = NULL;
174+
}
175+
158176
void fuse_uring_destruct(struct fuse_conn *fc)
159177
{
160178
struct fuse_ring *ring = fc->ring;
@@ -178,6 +196,9 @@ void fuse_uring_destruct(struct fuse_conn *fc)
178196
list_for_each_entry_safe(ent, next, &queue->ent_released,
179197
list) {
180198
list_del_init(&ent->list);
199+
io_pages_free(&ent->header_pages, ent->nr_header_pages);
200+
io_pages_free(&ent->payload_pages,
201+
ent->nr_payload_pages);
181202
kfree(ent);
182203
}
183204

@@ -569,13 +590,67 @@ static int fuse_uring_copy_from_ring(struct fuse_ring *ring,
569590
fuse_copy_init(&cs, 0, &iter);
570591
cs.is_uring = 1;
571592
cs.req = req;
593+
if (ent->payload_pages)
594+
cs.ring.pages = ent->payload_pages;
572595

573596
return fuse_copy_out_args(&cs, args, ring_in_out.payload_sz);
574597
}
575598

576-
/*
577-
* Copy data from the req to the ring buffer
578-
*/
599+
/*
600+
* Copy data from the req to the ring buffer
601+
* In order to be able to write into the ring buffer from the application,
602+
* i.e. to avoid io_uring_cmd_complete_in_task(), the header needs to be
603+
* pinned as well.
604+
*/
605+
static int fuse_uring_args_to_ring_pages(struct fuse_ring *ring,
606+
struct fuse_req *req,
607+
struct fuse_ring_ent *ent,
608+
struct fuse_uring_req_header *headers)
609+
{
610+
struct fuse_copy_state cs;
611+
struct fuse_args *args = req->args;
612+
struct fuse_in_arg *in_args = args->in_args;
613+
int num_args = args->in_numargs;
614+
int err;
615+
616+
struct fuse_uring_ent_in_out ent_in_out = {
617+
.flags = 0,
618+
.commit_id = req->in.h.unique,
619+
};
620+
621+
fuse_copy_init(&cs, 1, NULL);
622+
cs.is_uring = 1;
623+
cs.req = req;
624+
cs.ring.pages = ent->payload_pages;
625+
626+
if (num_args > 0) {
627+
/*
628+
* Expectation is that the first argument is the per op header.
629+
* Some op code have that as zero size.
630+
*/
631+
if (args->in_args[0].size > 0) {
632+
memcpy(&headers->op_in, in_args->value, in_args->size);
633+
}
634+
in_args++;
635+
num_args--;
636+
}
637+
638+
/* copy the payload */
639+
err = fuse_copy_args(&cs, num_args, args->in_pages,
640+
(struct fuse_arg *)in_args, 0);
641+
if (err) {
642+
pr_info_ratelimited("%s fuse_copy_args failed\n", __func__);
643+
return err;
644+
}
645+
646+
ent_in_out.payload_sz = cs.ring.copied_sz;
647+
memcpy(&headers->ring_ent_in_out, &ent_in_out, sizeof(ent_in_out));
648+
return err;
649+
}
650+
651+
/*
652+
* Copy data from the req to the ring buffer
653+
*/
579654
static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req,
580655
struct fuse_ring_ent *ent)
581656
{
@@ -599,6 +674,8 @@ static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req,
599674
fuse_copy_init(&cs, 1, &iter);
600675
cs.is_uring = 1;
601676
cs.req = req;
677+
if (ent->payload_pages)
678+
cs.ring.pages = ent->payload_pages;
602679

603680
if (num_args > 0) {
604681
/*
@@ -638,6 +715,7 @@ static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent,
638715
struct fuse_ring_queue *queue = ent->queue;
639716
struct fuse_ring *ring = queue->ring;
640717
int err;
718+
struct fuse_uring_req_header *headers = NULL;
641719

642720
err = -EIO;
643721
if (WARN_ON(ent->state != FRRS_FUSE_REQ)) {
@@ -650,22 +728,29 @@ static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent,
650728
if (WARN_ON(req->in.h.unique == 0))
651729
return err;
652730

653-
/* copy the request */
654-
err = fuse_uring_args_to_ring(ring, req, ent);
655-
if (unlikely(err)) {
656-
pr_info_ratelimited("Copy to ring failed: %d\n", err);
657-
return err;
658-
}
659-
660731
/* copy fuse_in_header */
661-
err = copy_to_user(&ent->headers->in_out, &req->in.h,
662-
sizeof(req->in.h));
663-
if (err) {
664-
err = -EFAULT;
665-
return err;
732+
if (ent->header_pages) {
733+
headers = kmap_local_page(
734+
ent->header_pages[FUSE_RING_HEADER_PG]);
735+
736+
memcpy(&headers->in_out, &req->in.h, sizeof(req->in.h));
737+
738+
err = fuse_uring_args_to_ring_pages(ring, req, ent, headers);
739+
kunmap_local(headers);
740+
} else {
741+
/* copy the request */
742+
err = fuse_uring_args_to_ring(ring, req, ent);
743+
if (unlikely(err)) {
744+
pr_info_ratelimited("Copy to ring failed: %d\n", err);
745+
return err;
746+
}
747+
err = copy_to_user(&ent->headers->in_out, &req->in.h,
748+
sizeof(req->in.h));
749+
if (err)
750+
err = -EFAULT;
666751
}
667752

668-
return 0;
753+
return err;
669754
}
670755

671756
static int fuse_uring_prepare_send(struct fuse_ring_ent *ent,
@@ -979,6 +1064,45 @@ static void fuse_uring_do_register(struct fuse_ring_ent *ent,
9791064
}
9801065
}
9811066

1067+
/*
1068+
* Copy from memmap.c, should be exported there
1069+
*/
1070+
static struct page **io_pin_pages(unsigned long uaddr, unsigned long len,
1071+
int *npages)
1072+
{
1073+
unsigned long start, end, nr_pages;
1074+
struct page **pages;
1075+
int ret;
1076+
1077+
end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
1078+
start = uaddr >> PAGE_SHIFT;
1079+
nr_pages = end - start;
1080+
if (WARN_ON_ONCE(!nr_pages))
1081+
return ERR_PTR(-EINVAL);
1082+
1083+
pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL);
1084+
if (!pages)
1085+
return ERR_PTR(-ENOMEM);
1086+
1087+
ret = pin_user_pages_fast(uaddr, nr_pages, FOLL_WRITE | FOLL_LONGTERM,
1088+
pages);
1089+
/* success, mapped all pages */
1090+
if (ret == nr_pages) {
1091+
*npages = nr_pages;
1092+
return pages;
1093+
}
1094+
1095+
/* partial map, or didn't map anything */
1096+
if (ret >= 0) {
1097+
/* if we did partial map, release any pages we did get */
1098+
if (ret)
1099+
unpin_user_pages(pages, ret);
1100+
ret = -EFAULT;
1101+
}
1102+
kvfree(pages);
1103+
return ERR_PTR(ret);
1104+
}
1105+
9821106
/*
9831107
* sqe->addr is a ptr to an iovec array, iov[0] has the headers, iov[1]
9841108
* the payload
@@ -1005,6 +1129,59 @@ static int fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe *sqe,
10051129
return 0;
10061130
}
10071131

1132+
static int fuse_uring_pin_pages(struct fuse_ring_ent *ent)
1133+
{
1134+
struct fuse_ring *ring = ent->queue->ring;
1135+
int err;
1136+
1137+
/*
1138+
* This needs to do locked memory accounting, for now privileged servers
1139+
* only.
1140+
*/
1141+
if (!capable(CAP_SYS_ADMIN))
1142+
return 0;
1143+
1144+
/* Pin header pages */
1145+
if (!PAGE_ALIGNED(ent->headers)) {
1146+
pr_info_ratelimited("ent->headers is not page-aligned: %p\n",
1147+
ent->headers);
1148+
return -EINVAL;
1149+
}
1150+
1151+
ent->header_pages = io_pin_pages((unsigned long)ent->headers,
1152+
sizeof(struct fuse_uring_req_header),
1153+
&ent->nr_header_pages);
1154+
if (IS_ERR(ent->header_pages)) {
1155+
err = PTR_ERR(ent->header_pages);
1156+
pr_info_ratelimited("Failed to pin header pages, err=%d\n",
1157+
err);
1158+
ent->header_pages = NULL;
1159+
return err;
1160+
}
1161+
1162+
if (ent->nr_header_pages != 1) {
1163+
pr_info_ratelimited("Header pages not pinned as one page\n");
1164+
io_pages_free(&ent->header_pages, ent->nr_header_pages);
1165+
ent->header_pages = NULL;
1166+
return -EINVAL;
1167+
}
1168+
1169+
/* Pin payload pages */
1170+
ent->payload_pages = io_pin_pages((unsigned long)ent->payload,
1171+
ring->max_payload_sz,
1172+
&ent->nr_payload_pages);
1173+
if (IS_ERR(ent->payload_pages)) {
1174+
err = PTR_ERR(ent->payload_pages);
1175+
pr_info_ratelimited("Failed to pin payload pages, err=%d\n",
1176+
err);
1177+
io_pages_free(&ent->header_pages, ent->nr_header_pages);
1178+
ent->payload_pages = NULL;
1179+
return err;
1180+
}
1181+
1182+
return 0;
1183+
}
1184+
10081185
static struct fuse_ring_ent *
10091186
fuse_uring_create_ring_ent(struct io_uring_cmd *cmd,
10101187
struct fuse_ring_queue *queue)
@@ -1046,6 +1223,12 @@ fuse_uring_create_ring_ent(struct io_uring_cmd *cmd,
10461223
ent->headers = iov[0].iov_base;
10471224
ent->payload = iov[1].iov_base;
10481225

1226+
err = fuse_uring_pin_pages(ent);
1227+
if (err) {
1228+
kfree(ent);
1229+
return ERR_PTR(err);
1230+
}
1231+
10491232
atomic_inc(&ring->queue_refs);
10501233
return ent;
10511234
}

fs/fuse/dev_uring_i.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,11 @@ enum fuse_ring_req_state {
4040
struct fuse_ring_ent {
4141
/* userspace buffer */
4242
struct fuse_uring_req_header __user *headers;
43+
struct page **header_pages;
44+
int nr_header_pages;
4345
void __user *payload;
46+
struct page **payload_pages;
47+
int nr_payload_pages;
4448

4549
/* the ring queue that owns the request */
4650
struct fuse_ring_queue *queue;

fs/fuse/fuse_dev_i.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ struct fuse_copy_state {
3030
unsigned int is_uring:1;
3131
struct {
3232
unsigned int copied_sz; /* copied size into the user buffer */
33+
struct page **pages;
34+
int page_idx;
3335
} ring;
3436
};
3537

0 commit comments

Comments
 (0)