Skip to content

Commit c11f1df

Browse files
spuiuksmfrench
authored andcommitted
cifs: Wait for writebacks to complete before attempting write.
Problem reported in Red Hat bz 1040329 for strict writes where we cache only when we hold oplock and write direct to the server when we don't. When we receive an oplock break, we first change the oplock value for the inode in cifsInodeInfo->oplock to indicate that we no longer hold the oplock before we enqueue a task to flush changes to the backing device. Once we have completed flushing the changes, we return the oplock to the server. There are 2 ways here where we can have data corruption 1) While we flush changes to the backing device as part of the oplock break, we can have processes write to the file. These writes check for the oplock, find none and attempt to write directly to the server. These direct writes made while we are flushing from cache could be overwritten by data being flushed from the cache causing data corruption. 2) While a thread runs in cifs_strict_writev, the machine could receive and process an oplock break after the thread has checked the oplock and found that it allows us to cache and before we have made changes to the cache. In that case, we end up with a dirty page in cache when we shouldn't have any. This will be flushed later and will overwrite all subsequent writes to the part of the file represented by this page. Before making any writes to the server, we need to confirm that we are not in the process of flushing data to the server and if we are, we should wait until the process is complete before we attempt the write. We should also wait for existing writes to complete before we process an oplock break request which changes oplock values. We add a version specific downgrade_oplock() operation to allow for differences in the oplock values set for the different smb versions. Cc: [email protected] Signed-off-by: Sachin Prabhu <[email protected]> Reviewed-by: Jeff Layton <[email protected]> Reviewed-by: Pavel Shilovsky <[email protected]> Signed-off-by: Steve French <[email protected]>
1 parent 0f689a3 commit c11f1df

File tree

8 files changed

+164
-9
lines changed

8 files changed

+164
-9
lines changed

fs/cifs/cifsfs.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,11 @@ cifs_alloc_inode(struct super_block *sb)
253253
cifs_set_oplock_level(cifs_inode, 0);
254254
cifs_inode->delete_pending = false;
255255
cifs_inode->invalid_mapping = false;
256+
clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cifs_inode->flags);
257+
clear_bit(CIFS_INODE_PENDING_WRITERS, &cifs_inode->flags);
258+
clear_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cifs_inode->flags);
259+
spin_lock_init(&cifs_inode->writers_lock);
260+
cifs_inode->writers = 0;
256261
cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
257262
cifs_inode->server_eof = 0;
258263
cifs_inode->uniqueid = 0;
@@ -732,19 +737,26 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
732737
unsigned long nr_segs, loff_t pos)
733738
{
734739
struct inode *inode = file_inode(iocb->ki_filp);
740+
struct cifsInodeInfo *cinode = CIFS_I(inode);
735741
ssize_t written;
736742
int rc;
737743

744+
written = cifs_get_writer(cinode);
745+
if (written)
746+
return written;
747+
738748
written = generic_file_aio_write(iocb, iov, nr_segs, pos);
739749

740750
if (CIFS_CACHE_WRITE(CIFS_I(inode)))
741-
return written;
751+
goto out;
742752

743753
rc = filemap_fdatawrite(inode->i_mapping);
744754
if (rc)
745755
cifs_dbg(FYI, "cifs_file_aio_write: %d rc on %p inode\n",
746756
rc, inode);
747757

758+
out:
759+
cifs_put_writer(cinode);
748760
return written;
749761
}
750762

fs/cifs/cifsglob.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,8 @@ struct smb_version_operations {
228228
/* verify the message */
229229
int (*check_message)(char *, unsigned int);
230230
bool (*is_oplock_break)(char *, struct TCP_Server_Info *);
231+
void (*downgrade_oplock)(struct TCP_Server_Info *,
232+
struct cifsInodeInfo *, bool);
231233
/* process transaction2 response */
232234
bool (*check_trans2)(struct mid_q_entry *, struct TCP_Server_Info *,
233235
char *, int);
@@ -1113,6 +1115,12 @@ struct cifsInodeInfo {
11131115
unsigned int epoch; /* used to track lease state changes */
11141116
bool delete_pending; /* DELETE_ON_CLOSE is set */
11151117
bool invalid_mapping; /* pagecache is invalid */
1118+
unsigned long flags;
1119+
#define CIFS_INODE_PENDING_OPLOCK_BREAK (0) /* oplock break in progress */
1120+
#define CIFS_INODE_PENDING_WRITERS (1) /* Writes in progress */
1121+
#define CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2 (2) /* Downgrade oplock to L2 */
1122+
spinlock_t writers_lock;
1123+
unsigned int writers; /* Number of writers on this inode */
11161124
unsigned long time; /* jiffies of last update of inode */
11171125
u64 server_eof; /* current file size on server -- protected by i_lock */
11181126
u64 uniqueid; /* server inode number */

fs/cifs/cifsproto.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,9 @@ extern u64 cifs_UnixTimeToNT(struct timespec);
127127
extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
128128
int offset);
129129
extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock);
130+
extern int cifs_get_writer(struct cifsInodeInfo *cinode);
131+
extern void cifs_put_writer(struct cifsInodeInfo *cinode);
132+
extern void cifs_done_oplock_break(struct cifsInodeInfo *cinode);
130133
extern int cifs_unlock_range(struct cifsFileInfo *cfile,
131134
struct file_lock *flock, const unsigned int xid);
132135
extern int cifs_push_mandatory_locks(struct cifsFileInfo *cfile);

fs/cifs/file.c

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2621,12 +2621,20 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
26212621
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
26222622
ssize_t written;
26232623

2624+
written = cifs_get_writer(cinode);
2625+
if (written)
2626+
return written;
2627+
26242628
if (CIFS_CACHE_WRITE(cinode)) {
26252629
if (cap_unix(tcon->ses) &&
26262630
(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2627-
&& ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2628-
return generic_file_aio_write(iocb, iov, nr_segs, pos);
2629-
return cifs_writev(iocb, iov, nr_segs, pos);
2631+
&& ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2632+
written = generic_file_aio_write(
2633+
iocb, iov, nr_segs, pos);
2634+
goto out;
2635+
}
2636+
written = cifs_writev(iocb, iov, nr_segs, pos);
2637+
goto out;
26302638
}
26312639
/*
26322640
* For non-oplocked files in strict cache mode we need to write the data
@@ -2646,6 +2654,8 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
26462654
inode);
26472655
cinode->oplock = 0;
26482656
}
2657+
out:
2658+
cifs_put_writer(cinode);
26492659
return written;
26502660
}
26512661

@@ -3621,15 +3631,29 @@ static int cifs_launder_page(struct page *page)
36213631
return rc;
36223632
}
36233633

3634+
static int
3635+
cifs_pending_writers_wait(void *unused)
3636+
{
3637+
schedule();
3638+
return 0;
3639+
}
3640+
36243641
void cifs_oplock_break(struct work_struct *work)
36253642
{
36263643
struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
36273644
oplock_break);
36283645
struct inode *inode = cfile->dentry->d_inode;
36293646
struct cifsInodeInfo *cinode = CIFS_I(inode);
36303647
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3648+
struct TCP_Server_Info *server = tcon->ses->server;
36313649
int rc = 0;
36323650

3651+
wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3652+
cifs_pending_writers_wait, TASK_UNINTERRUPTIBLE);
3653+
3654+
server->ops->downgrade_oplock(server, cinode,
3655+
test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3656+
36333657
if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
36343658
cifs_has_mand_locks(cinode)) {
36353659
cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
@@ -3666,6 +3690,7 @@ void cifs_oplock_break(struct work_struct *work)
36663690
cinode);
36673691
cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
36683692
}
3693+
cifs_done_oplock_break(cinode);
36693694
}
36703695

36713696
/*

fs/cifs/misc.c

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -466,8 +466,22 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
466466
cifs_dbg(FYI, "file id match, oplock break\n");
467467
pCifsInode = CIFS_I(netfile->dentry->d_inode);
468468

469-
cifs_set_oplock_level(pCifsInode,
470-
pSMB->OplockLevel ? OPLOCK_READ : 0);
469+
set_bit(CIFS_INODE_PENDING_OPLOCK_BREAK,
470+
&pCifsInode->flags);
471+
472+
/*
473+
* Set flag if the server downgrades the oplock
474+
* to L2 else clear.
475+
*/
476+
if (pSMB->OplockLevel)
477+
set_bit(
478+
CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
479+
&pCifsInode->flags);
480+
else
481+
clear_bit(
482+
CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
483+
&pCifsInode->flags);
484+
471485
queue_work(cifsiod_wq,
472486
&netfile->oplock_break);
473487
netfile->oplock_break_cancelled = false;
@@ -551,6 +565,62 @@ void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock)
551565
cinode->oplock = 0;
552566
}
553567

568+
static int
569+
cifs_oplock_break_wait(void *unused)
570+
{
571+
schedule();
572+
return signal_pending(current) ? -ERESTARTSYS : 0;
573+
}
574+
575+
/*
576+
* We wait for oplock breaks to be processed before we attempt to perform
577+
* writes.
578+
*/
579+
int cifs_get_writer(struct cifsInodeInfo *cinode)
580+
{
581+
int rc;
582+
583+
start:
584+
rc = wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK,
585+
cifs_oplock_break_wait, TASK_KILLABLE);
586+
if (rc)
587+
return rc;
588+
589+
spin_lock(&cinode->writers_lock);
590+
if (!cinode->writers)
591+
set_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags);
592+
cinode->writers++;
593+
/* Check to see if we have started servicing an oplock break */
594+
if (test_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags)) {
595+
cinode->writers--;
596+
if (cinode->writers == 0) {
597+
clear_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags);
598+
wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS);
599+
}
600+
spin_unlock(&cinode->writers_lock);
601+
goto start;
602+
}
603+
spin_unlock(&cinode->writers_lock);
604+
return 0;
605+
}
606+
607+
void cifs_put_writer(struct cifsInodeInfo *cinode)
608+
{
609+
spin_lock(&cinode->writers_lock);
610+
cinode->writers--;
611+
if (cinode->writers == 0) {
612+
clear_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags);
613+
wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS);
614+
}
615+
spin_unlock(&cinode->writers_lock);
616+
}
617+
618+
void cifs_done_oplock_break(struct cifsInodeInfo *cinode)
619+
{
620+
clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags);
621+
wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK);
622+
}
623+
554624
bool
555625
backup_cred(struct cifs_sb_info *cifs_sb)
556626
{

fs/cifs/smb1ops.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,16 @@ coalesce_t2(char *second_buf, struct smb_hdr *target_hdr)
372372
return 0;
373373
}
374374

375+
static void
376+
cifs_downgrade_oplock(struct TCP_Server_Info *server,
377+
struct cifsInodeInfo *cinode, bool set_level2)
378+
{
379+
if (set_level2)
380+
cifs_set_oplock_level(cinode, OPLOCK_READ);
381+
else
382+
cifs_set_oplock_level(cinode, 0);
383+
}
384+
375385
static bool
376386
cifs_check_trans2(struct mid_q_entry *mid, struct TCP_Server_Info *server,
377387
char *buf, int malformed)
@@ -1019,6 +1029,7 @@ struct smb_version_operations smb1_operations = {
10191029
.clear_stats = cifs_clear_stats,
10201030
.print_stats = cifs_print_stats,
10211031
.is_oplock_break = is_valid_oplock_break,
1032+
.downgrade_oplock = cifs_downgrade_oplock,
10221033
.check_trans2 = cifs_check_trans2,
10231034
.need_neg = cifs_need_neg,
10241035
.negotiate = cifs_negotiate,

fs/cifs/smb2misc.c

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -575,9 +575,21 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
575575
else
576576
cfile->oplock_break_cancelled = false;
577577

578-
server->ops->set_oplock_level(cinode,
579-
rsp->OplockLevel ? SMB2_OPLOCK_LEVEL_II : 0,
580-
0, NULL);
578+
set_bit(CIFS_INODE_PENDING_OPLOCK_BREAK,
579+
&cinode->flags);
580+
581+
/*
582+
* Set flag if the server downgrades the oplock
583+
* to L2 else clear.
584+
*/
585+
if (rsp->OplockLevel)
586+
set_bit(
587+
CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
588+
&cinode->flags);
589+
else
590+
clear_bit(
591+
CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
592+
&cinode->flags);
581593

582594
queue_work(cifsiod_wq, &cfile->oplock_break);
583595

fs/cifs/smb2ops.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -904,6 +904,17 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
904904
return rc;
905905
}
906906

907+
static void
908+
smb2_downgrade_oplock(struct TCP_Server_Info *server,
909+
struct cifsInodeInfo *cinode, bool set_level2)
910+
{
911+
if (set_level2)
912+
server->ops->set_oplock_level(cinode, SMB2_OPLOCK_LEVEL_II,
913+
0, NULL);
914+
else
915+
server->ops->set_oplock_level(cinode, 0, 0, NULL);
916+
}
917+
907918
static void
908919
smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
909920
unsigned int epoch, bool *purge_cache)
@@ -1110,6 +1121,7 @@ struct smb_version_operations smb20_operations = {
11101121
.clear_stats = smb2_clear_stats,
11111122
.print_stats = smb2_print_stats,
11121123
.is_oplock_break = smb2_is_valid_oplock_break,
1124+
.downgrade_oplock = smb2_downgrade_oplock,
11131125
.need_neg = smb2_need_neg,
11141126
.negotiate = smb2_negotiate,
11151127
.negotiate_wsize = smb2_negotiate_wsize,
@@ -1184,6 +1196,7 @@ struct smb_version_operations smb21_operations = {
11841196
.clear_stats = smb2_clear_stats,
11851197
.print_stats = smb2_print_stats,
11861198
.is_oplock_break = smb2_is_valid_oplock_break,
1199+
.downgrade_oplock = smb2_downgrade_oplock,
11871200
.need_neg = smb2_need_neg,
11881201
.negotiate = smb2_negotiate,
11891202
.negotiate_wsize = smb2_negotiate_wsize,
@@ -1259,6 +1272,7 @@ struct smb_version_operations smb30_operations = {
12591272
.print_stats = smb2_print_stats,
12601273
.dump_share_caps = smb2_dump_share_caps,
12611274
.is_oplock_break = smb2_is_valid_oplock_break,
1275+
.downgrade_oplock = smb2_downgrade_oplock,
12621276
.need_neg = smb2_need_neg,
12631277
.negotiate = smb2_negotiate,
12641278
.negotiate_wsize = smb2_negotiate_wsize,

0 commit comments

Comments
 (0)