11
11
12
12
#include <linux/fs.h>
13
13
#include <linux/io_uring/cmd.h>
14
+ #include <linux/page-flags.h>
14
15
15
16
static bool __read_mostly enable_uring ;
16
17
module_param (enable_uring , bool , 0644 );
17
18
MODULE_PARM_DESC (enable_uring ,
18
19
"Enable userspace communication through io-uring" );
19
20
20
21
#define FUSE_URING_IOV_SEGS 2 /* header and payload */
22
+ #define FUSE_RING_HEADER_PG 0
23
+ #define FUSE_RING_PAYLOAD_PG 1
21
24
22
25
/* redfs only to allow patch backports */
23
26
#define IO_URING_F_TASK_DEAD (1 << 13)
@@ -155,6 +158,21 @@ void fuse_uring_abort_end_requests(struct fuse_ring *ring)
155
158
}
156
159
}
157
160
161
+ /*
162
+ * Copy from memmap.c, should be exported
163
+ */
164
+ static void io_pages_free (struct page * * * pages , int npages )
165
+ {
166
+ struct page * * page_array = * pages ;
167
+
168
+ if (!page_array )
169
+ return ;
170
+
171
+ unpin_user_pages (page_array , npages );
172
+ kvfree (page_array );
173
+ * pages = NULL ;
174
+ }
175
+
158
176
void fuse_uring_destruct (struct fuse_conn * fc )
159
177
{
160
178
struct fuse_ring * ring = fc -> ring ;
@@ -178,6 +196,9 @@ void fuse_uring_destruct(struct fuse_conn *fc)
178
196
list_for_each_entry_safe (ent , next , & queue -> ent_released ,
179
197
list ) {
180
198
list_del_init (& ent -> list );
199
+ io_pages_free (& ent -> header_pages , ent -> nr_header_pages );
200
+ io_pages_free (& ent -> payload_pages ,
201
+ ent -> nr_payload_pages );
181
202
kfree (ent );
182
203
}
183
204
@@ -569,13 +590,67 @@ static int fuse_uring_copy_from_ring(struct fuse_ring *ring,
569
590
fuse_copy_init (& cs , 0 , & iter );
570
591
cs .is_uring = 1 ;
571
592
cs .req = req ;
593
+ if (ent -> payload_pages )
594
+ cs .ring .pages = ent -> payload_pages ;
572
595
573
596
return fuse_copy_out_args (& cs , args , ring_in_out .payload_sz );
574
597
}
575
598
576
- /*
577
- * Copy data from the req to the ring buffer
578
- */
599
+ /*
600
+ * Copy data from the req to the ring buffer
601
+ * In order to be able to write into the ring buffer from the application,
602
+ * i.e. to avoid io_uring_cmd_complete_in_task(), the header needs to be
603
+ * pinned as well.
604
+ */
605
+ static int fuse_uring_args_to_ring_pages (struct fuse_ring * ring ,
606
+ struct fuse_req * req ,
607
+ struct fuse_ring_ent * ent ,
608
+ struct fuse_uring_req_header * headers )
609
+ {
610
+ struct fuse_copy_state cs ;
611
+ struct fuse_args * args = req -> args ;
612
+ struct fuse_in_arg * in_args = args -> in_args ;
613
+ int num_args = args -> in_numargs ;
614
+ int err ;
615
+
616
+ struct fuse_uring_ent_in_out ent_in_out = {
617
+ .flags = 0 ,
618
+ .commit_id = req -> in .h .unique ,
619
+ };
620
+
621
+ fuse_copy_init (& cs , 1 , NULL );
622
+ cs .is_uring = 1 ;
623
+ cs .req = req ;
624
+ cs .ring .pages = ent -> payload_pages ;
625
+
626
+ if (num_args > 0 ) {
627
+ /*
628
+ * Expectation is that the first argument is the per op header.
629
+ * Some op code have that as zero size.
630
+ */
631
+ if (args -> in_args [0 ].size > 0 ) {
632
+ memcpy (& headers -> op_in , in_args -> value , in_args -> size );
633
+ }
634
+ in_args ++ ;
635
+ num_args -- ;
636
+ }
637
+
638
+ /* copy the payload */
639
+ err = fuse_copy_args (& cs , num_args , args -> in_pages ,
640
+ (struct fuse_arg * )in_args , 0 );
641
+ if (err ) {
642
+ pr_info_ratelimited ("%s fuse_copy_args failed\n" , __func__ );
643
+ return err ;
644
+ }
645
+
646
+ ent_in_out .payload_sz = cs .ring .copied_sz ;
647
+ memcpy (& headers -> ring_ent_in_out , & ent_in_out , sizeof (ent_in_out ));
648
+ return err ;
649
+ }
650
+
651
+ /*
652
+ * Copy data from the req to the ring buffer
653
+ */
579
654
static int fuse_uring_args_to_ring (struct fuse_ring * ring , struct fuse_req * req ,
580
655
struct fuse_ring_ent * ent )
581
656
{
@@ -599,6 +674,8 @@ static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req,
599
674
fuse_copy_init (& cs , 1 , & iter );
600
675
cs .is_uring = 1 ;
601
676
cs .req = req ;
677
+ if (ent -> payload_pages )
678
+ cs .ring .pages = ent -> payload_pages ;
602
679
603
680
if (num_args > 0 ) {
604
681
/*
@@ -638,6 +715,7 @@ static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent,
638
715
struct fuse_ring_queue * queue = ent -> queue ;
639
716
struct fuse_ring * ring = queue -> ring ;
640
717
int err ;
718
+ struct fuse_uring_req_header * headers = NULL ;
641
719
642
720
err = - EIO ;
643
721
if (WARN_ON (ent -> state != FRRS_FUSE_REQ )) {
@@ -650,22 +728,29 @@ static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent,
650
728
if (WARN_ON (req -> in .h .unique == 0 ))
651
729
return err ;
652
730
653
- /* copy the request */
654
- err = fuse_uring_args_to_ring (ring , req , ent );
655
- if (unlikely (err )) {
656
- pr_info_ratelimited ("Copy to ring failed: %d\n" , err );
657
- return err ;
658
- }
659
-
660
731
/* copy fuse_in_header */
661
- err = copy_to_user (& ent -> headers -> in_out , & req -> in .h ,
662
- sizeof (req -> in .h ));
663
- if (err ) {
664
- err = - EFAULT ;
665
- return err ;
732
+ if (ent -> header_pages ) {
733
+ headers = kmap_local_page (
734
+ ent -> header_pages [FUSE_RING_HEADER_PG ]);
735
+
736
+ memcpy (& headers -> in_out , & req -> in .h , sizeof (req -> in .h ));
737
+
738
+ err = fuse_uring_args_to_ring_pages (ring , req , ent , headers );
739
+ kunmap_local (headers );
740
+ } else {
741
+ /* copy the request */
742
+ err = fuse_uring_args_to_ring (ring , req , ent );
743
+ if (unlikely (err )) {
744
+ pr_info_ratelimited ("Copy to ring failed: %d\n" , err );
745
+ return err ;
746
+ }
747
+ err = copy_to_user (& ent -> headers -> in_out , & req -> in .h ,
748
+ sizeof (req -> in .h ));
749
+ if (err )
750
+ err = - EFAULT ;
666
751
}
667
752
668
- return 0 ;
753
+ return err ;
669
754
}
670
755
671
756
static int fuse_uring_prepare_send (struct fuse_ring_ent * ent ,
@@ -979,6 +1064,45 @@ static void fuse_uring_do_register(struct fuse_ring_ent *ent,
979
1064
}
980
1065
}
981
1066
1067
+ /*
1068
+ * Copy from memmap.c, should be exported there
1069
+ */
1070
+ static struct page * * io_pin_pages (unsigned long uaddr , unsigned long len ,
1071
+ int * npages )
1072
+ {
1073
+ unsigned long start , end , nr_pages ;
1074
+ struct page * * pages ;
1075
+ int ret ;
1076
+
1077
+ end = (uaddr + len + PAGE_SIZE - 1 ) >> PAGE_SHIFT ;
1078
+ start = uaddr >> PAGE_SHIFT ;
1079
+ nr_pages = end - start ;
1080
+ if (WARN_ON_ONCE (!nr_pages ))
1081
+ return ERR_PTR (- EINVAL );
1082
+
1083
+ pages = kvmalloc_array (nr_pages , sizeof (struct page * ), GFP_KERNEL );
1084
+ if (!pages )
1085
+ return ERR_PTR (- ENOMEM );
1086
+
1087
+ ret = pin_user_pages_fast (uaddr , nr_pages , FOLL_WRITE | FOLL_LONGTERM ,
1088
+ pages );
1089
+ /* success, mapped all pages */
1090
+ if (ret == nr_pages ) {
1091
+ * npages = nr_pages ;
1092
+ return pages ;
1093
+ }
1094
+
1095
+ /* partial map, or didn't map anything */
1096
+ if (ret >= 0 ) {
1097
+ /* if we did partial map, release any pages we did get */
1098
+ if (ret )
1099
+ unpin_user_pages (pages , ret );
1100
+ ret = - EFAULT ;
1101
+ }
1102
+ kvfree (pages );
1103
+ return ERR_PTR (ret );
1104
+ }
1105
+
982
1106
/*
983
1107
* sqe->addr is a ptr to an iovec array, iov[0] has the headers, iov[1]
984
1108
* the payload
@@ -1005,6 +1129,59 @@ static int fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe *sqe,
1005
1129
return 0 ;
1006
1130
}
1007
1131
1132
+ static int fuse_uring_pin_pages (struct fuse_ring_ent * ent )
1133
+ {
1134
+ struct fuse_ring * ring = ent -> queue -> ring ;
1135
+ int err ;
1136
+
1137
+ /*
1138
+ * This needs to do locked memory accounting, for now privileged servers
1139
+ * only.
1140
+ */
1141
+ if (!capable (CAP_SYS_ADMIN ))
1142
+ return 0 ;
1143
+
1144
+ /* Pin header pages */
1145
+ if (!PAGE_ALIGNED (ent -> headers )) {
1146
+ pr_info_ratelimited ("ent->headers is not page-aligned: %p\n" ,
1147
+ ent -> headers );
1148
+ return - EINVAL ;
1149
+ }
1150
+
1151
+ ent -> header_pages = io_pin_pages ((unsigned long )ent -> headers ,
1152
+ sizeof (struct fuse_uring_req_header ),
1153
+ & ent -> nr_header_pages );
1154
+ if (IS_ERR (ent -> header_pages )) {
1155
+ err = PTR_ERR (ent -> header_pages );
1156
+ pr_info_ratelimited ("Failed to pin header pages, err=%d\n" ,
1157
+ err );
1158
+ ent -> header_pages = NULL ;
1159
+ return err ;
1160
+ }
1161
+
1162
+ if (ent -> nr_header_pages != 1 ) {
1163
+ pr_info_ratelimited ("Header pages not pinned as one page\n" );
1164
+ io_pages_free (& ent -> header_pages , ent -> nr_header_pages );
1165
+ ent -> header_pages = NULL ;
1166
+ return - EINVAL ;
1167
+ }
1168
+
1169
+ /* Pin payload pages */
1170
+ ent -> payload_pages = io_pin_pages ((unsigned long )ent -> payload ,
1171
+ ring -> max_payload_sz ,
1172
+ & ent -> nr_payload_pages );
1173
+ if (IS_ERR (ent -> payload_pages )) {
1174
+ err = PTR_ERR (ent -> payload_pages );
1175
+ pr_info_ratelimited ("Failed to pin payload pages, err=%d\n" ,
1176
+ err );
1177
+ io_pages_free (& ent -> header_pages , ent -> nr_header_pages );
1178
+ ent -> payload_pages = NULL ;
1179
+ return err ;
1180
+ }
1181
+
1182
+ return 0 ;
1183
+ }
1184
+
1008
1185
static struct fuse_ring_ent *
1009
1186
fuse_uring_create_ring_ent (struct io_uring_cmd * cmd ,
1010
1187
struct fuse_ring_queue * queue )
@@ -1046,6 +1223,12 @@ fuse_uring_create_ring_ent(struct io_uring_cmd *cmd,
1046
1223
ent -> headers = iov [0 ].iov_base ;
1047
1224
ent -> payload = iov [1 ].iov_base ;
1048
1225
1226
+ err = fuse_uring_pin_pages (ent );
1227
+ if (err ) {
1228
+ kfree (ent );
1229
+ return ERR_PTR (err );
1230
+ }
1231
+
1049
1232
atomic_inc (& ring -> queue_refs );
1050
1233
return ent ;
1051
1234
}
0 commit comments