aboutsummaryrefslogtreecommitdiffstats
path: root/fs/cifs
diff options
context:
space:
mode:
authorSachin Prabhu <sprabhu@redhat.com>2014-03-11 12:11:47 -0400
committerSteve French <smfrench@gmail.com>2014-04-16 14:51:46 -0400
commitc11f1df5003d534fd067f0168bfad7befffb3b5c (patch)
tree048afe1d692c6ab1e446334930c3c116a161978e /fs/cifs
parent0f689a33ad17845363acdc6d52783befd6ad116c (diff)
cifs: Wait for writebacks to complete before attempting write.
Problem reported in Red Hat bz 1040329 for strict writes where we cache only when we hold oplock and write direct to the server when we don't. When we receive an oplock break, we first change the oplock value for the inode in cifsInodeInfo->oplock to indicate that we no longer hold the oplock before we enqueue a task to flush changes to the backing device. Once we have completed flushing the changes, we return the oplock to the server. There are 2 ways here where we can have data corruption 1) While we flush changes to the backing device as part of the oplock break, we can have processes write to the file. These writes check for the oplock, find none and attempt to write directly to the server. These direct writes made while we are flushing from cache could be overwritten by data being flushed from the cache causing data corruption. 2) While a thread runs in cifs_strict_writev, the machine could receive and process an oplock break after the thread has checked the oplock and found that it allows us to cache and before we have made changes to the cache. In that case, we end up with a dirty page in cache when we shouldn't have any. This will be flushed later and will overwrite all subsequent writes to the part of the file represented by this page. Before making any writes to the server, we need to confirm that we are not in the process of flushing data to the server and if we are, we should wait until the process is complete before we attempt the write. We should also wait for existing writes to complete before we process an oplock break request which changes oplock values. We add a version specific downgrade_oplock() operation to allow for differences in the oplock values set for the different smb versions. Cc: stable@vger.kernel.org Signed-off-by: Sachin Prabhu <sprabhu@redhat.com> Reviewed-by: Jeff Layton <jlayton@redhat.com> Reviewed-by: Pavel Shilovsky <piastry@etersoft.ru> Signed-off-by: Steve French <smfrench@gmail.com>
Diffstat (limited to 'fs/cifs')
-rw-r--r--fs/cifs/cifsfs.c14
-rw-r--r--fs/cifs/cifsglob.h8
-rw-r--r--fs/cifs/cifsproto.h3
-rw-r--r--fs/cifs/file.c31
-rw-r--r--fs/cifs/misc.c74
-rw-r--r--fs/cifs/smb1ops.c11
-rw-r--r--fs/cifs/smb2misc.c18
-rw-r--r--fs/cifs/smb2ops.c14
8 files changed, 164 insertions, 9 deletions
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index df9c9141c099..5be1f997ecde 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -253,6 +253,11 @@ cifs_alloc_inode(struct super_block *sb)
253 cifs_set_oplock_level(cifs_inode, 0); 253 cifs_set_oplock_level(cifs_inode, 0);
254 cifs_inode->delete_pending = false; 254 cifs_inode->delete_pending = false;
255 cifs_inode->invalid_mapping = false; 255 cifs_inode->invalid_mapping = false;
256 clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cifs_inode->flags);
257 clear_bit(CIFS_INODE_PENDING_WRITERS, &cifs_inode->flags);
258 clear_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cifs_inode->flags);
259 spin_lock_init(&cifs_inode->writers_lock);
260 cifs_inode->writers = 0;
256 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ 261 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
257 cifs_inode->server_eof = 0; 262 cifs_inode->server_eof = 0;
258 cifs_inode->uniqueid = 0; 263 cifs_inode->uniqueid = 0;
@@ -732,19 +737,26 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
732 unsigned long nr_segs, loff_t pos) 737 unsigned long nr_segs, loff_t pos)
733{ 738{
734 struct inode *inode = file_inode(iocb->ki_filp); 739 struct inode *inode = file_inode(iocb->ki_filp);
740 struct cifsInodeInfo *cinode = CIFS_I(inode);
735 ssize_t written; 741 ssize_t written;
736 int rc; 742 int rc;
737 743
744 written = cifs_get_writer(cinode);
745 if (written)
746 return written;
747
738 written = generic_file_aio_write(iocb, iov, nr_segs, pos); 748 written = generic_file_aio_write(iocb, iov, nr_segs, pos);
739 749
740 if (CIFS_CACHE_WRITE(CIFS_I(inode))) 750 if (CIFS_CACHE_WRITE(CIFS_I(inode)))
741 return written; 751 goto out;
742 752
743 rc = filemap_fdatawrite(inode->i_mapping); 753 rc = filemap_fdatawrite(inode->i_mapping);
744 if (rc) 754 if (rc)
745 cifs_dbg(FYI, "cifs_file_aio_write: %d rc on %p inode\n", 755 cifs_dbg(FYI, "cifs_file_aio_write: %d rc on %p inode\n",
746 rc, inode); 756 rc, inode);
747 757
758out:
759 cifs_put_writer(cinode);
748 return written; 760 return written;
749} 761}
750 762
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index c0f3718b77a8..30f6e9251a4a 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -228,6 +228,8 @@ struct smb_version_operations {
228 /* verify the message */ 228 /* verify the message */
229 int (*check_message)(char *, unsigned int); 229 int (*check_message)(char *, unsigned int);
230 bool (*is_oplock_break)(char *, struct TCP_Server_Info *); 230 bool (*is_oplock_break)(char *, struct TCP_Server_Info *);
231 void (*downgrade_oplock)(struct TCP_Server_Info *,
232 struct cifsInodeInfo *, bool);
231 /* process transaction2 response */ 233 /* process transaction2 response */
232 bool (*check_trans2)(struct mid_q_entry *, struct TCP_Server_Info *, 234 bool (*check_trans2)(struct mid_q_entry *, struct TCP_Server_Info *,
233 char *, int); 235 char *, int);
@@ -1113,6 +1115,12 @@ struct cifsInodeInfo {
1113 unsigned int epoch; /* used to track lease state changes */ 1115 unsigned int epoch; /* used to track lease state changes */
1114 bool delete_pending; /* DELETE_ON_CLOSE is set */ 1116 bool delete_pending; /* DELETE_ON_CLOSE is set */
1115 bool invalid_mapping; /* pagecache is invalid */ 1117 bool invalid_mapping; /* pagecache is invalid */
1118 unsigned long flags;
1119#define CIFS_INODE_PENDING_OPLOCK_BREAK (0) /* oplock break in progress */
1120#define CIFS_INODE_PENDING_WRITERS (1) /* Writes in progress */
1121#define CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2 (2) /* Downgrade oplock to L2 */
1122 spinlock_t writers_lock;
1123 unsigned int writers; /* Number of writers on this inode */
1116 unsigned long time; /* jiffies of last update of inode */ 1124 unsigned long time; /* jiffies of last update of inode */
1117 u64 server_eof; /* current file size on server -- protected by i_lock */ 1125 u64 server_eof; /* current file size on server -- protected by i_lock */
1118 u64 uniqueid; /* server inode number */ 1126 u64 uniqueid; /* server inode number */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index acc4ee8ed075..ca7980a1e303 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -127,6 +127,9 @@ extern u64 cifs_UnixTimeToNT(struct timespec);
127extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, 127extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
128 int offset); 128 int offset);
129extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock); 129extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock);
130extern int cifs_get_writer(struct cifsInodeInfo *cinode);
131extern void cifs_put_writer(struct cifsInodeInfo *cinode);
132extern void cifs_done_oplock_break(struct cifsInodeInfo *cinode);
130extern int cifs_unlock_range(struct cifsFileInfo *cfile, 133extern int cifs_unlock_range(struct cifsFileInfo *cfile,
131 struct file_lock *flock, const unsigned int xid); 134 struct file_lock *flock, const unsigned int xid);
132extern int cifs_push_mandatory_locks(struct cifsFileInfo *cfile); 135extern int cifs_push_mandatory_locks(struct cifsFileInfo *cfile);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8add25538a3b..d8ee76241b64 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2621,12 +2621,20 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2621 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 2621 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2622 ssize_t written; 2622 ssize_t written;
2623 2623
2624 written = cifs_get_writer(cinode);
2625 if (written)
2626 return written;
2627
2624 if (CIFS_CACHE_WRITE(cinode)) { 2628 if (CIFS_CACHE_WRITE(cinode)) {
2625 if (cap_unix(tcon->ses) && 2629 if (cap_unix(tcon->ses) &&
2626 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) 2630 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2627 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2631 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2628 return generic_file_aio_write(iocb, iov, nr_segs, pos); 2632 written = generic_file_aio_write(
2629 return cifs_writev(iocb, iov, nr_segs, pos); 2633 iocb, iov, nr_segs, pos);
2634 goto out;
2635 }
2636 written = cifs_writev(iocb, iov, nr_segs, pos);
2637 goto out;
2630 } 2638 }
2631 /* 2639 /*
2632 * For non-oplocked files in strict cache mode we need to write the data 2640 * For non-oplocked files in strict cache mode we need to write the data
@@ -2646,6 +2654,8 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2646 inode); 2654 inode);
2647 cinode->oplock = 0; 2655 cinode->oplock = 0;
2648 } 2656 }
2657out:
2658 cifs_put_writer(cinode);
2649 return written; 2659 return written;
2650} 2660}
2651 2661
@@ -3621,6 +3631,13 @@ static int cifs_launder_page(struct page *page)
3621 return rc; 3631 return rc;
3622} 3632}
3623 3633
3634static int
3635cifs_pending_writers_wait(void *unused)
3636{
3637 schedule();
3638 return 0;
3639}
3640
3624void cifs_oplock_break(struct work_struct *work) 3641void cifs_oplock_break(struct work_struct *work)
3625{ 3642{
3626 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, 3643 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
@@ -3628,8 +3645,15 @@ void cifs_oplock_break(struct work_struct *work)
3628 struct inode *inode = cfile->dentry->d_inode; 3645 struct inode *inode = cfile->dentry->d_inode;
3629 struct cifsInodeInfo *cinode = CIFS_I(inode); 3646 struct cifsInodeInfo *cinode = CIFS_I(inode);
3630 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 3647 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3648 struct TCP_Server_Info *server = tcon->ses->server;
3631 int rc = 0; 3649 int rc = 0;
3632 3650
3651 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3652 cifs_pending_writers_wait, TASK_UNINTERRUPTIBLE);
3653
3654 server->ops->downgrade_oplock(server, cinode,
3655 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3656
3633 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) && 3657 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3634 cifs_has_mand_locks(cinode)) { 3658 cifs_has_mand_locks(cinode)) {
3635 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n", 3659 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
@@ -3666,6 +3690,7 @@ void cifs_oplock_break(struct work_struct *work)
3666 cinode); 3690 cinode);
3667 cifs_dbg(FYI, "Oplock release rc = %d\n", rc); 3691 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3668 } 3692 }
3693 cifs_done_oplock_break(cinode);
3669} 3694}
3670 3695
3671/* 3696/*
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 2f9f3790679d..3b0c62e622da 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -466,8 +466,22 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
466 cifs_dbg(FYI, "file id match, oplock break\n"); 466 cifs_dbg(FYI, "file id match, oplock break\n");
467 pCifsInode = CIFS_I(netfile->dentry->d_inode); 467 pCifsInode = CIFS_I(netfile->dentry->d_inode);
468 468
469 cifs_set_oplock_level(pCifsInode, 469 set_bit(CIFS_INODE_PENDING_OPLOCK_BREAK,
470 pSMB->OplockLevel ? OPLOCK_READ : 0); 470 &pCifsInode->flags);
471
472 /*
473 * Set flag if the server downgrades the oplock
474 * to L2 else clear.
475 */
476 if (pSMB->OplockLevel)
477 set_bit(
478 CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
479 &pCifsInode->flags);
480 else
481 clear_bit(
482 CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
483 &pCifsInode->flags);
484
471 queue_work(cifsiod_wq, 485 queue_work(cifsiod_wq,
472 &netfile->oplock_break); 486 &netfile->oplock_break);
473 netfile->oplock_break_cancelled = false; 487 netfile->oplock_break_cancelled = false;
@@ -551,6 +565,62 @@ void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock)
551 cinode->oplock = 0; 565 cinode->oplock = 0;
552} 566}
553 567
568static int
569cifs_oplock_break_wait(void *unused)
570{
571 schedule();
572 return signal_pending(current) ? -ERESTARTSYS : 0;
573}
574
575/*
576 * We wait for oplock breaks to be processed before we attempt to perform
577 * writes.
578 */
579int cifs_get_writer(struct cifsInodeInfo *cinode)
580{
581 int rc;
582
583start:
584 rc = wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK,
585 cifs_oplock_break_wait, TASK_KILLABLE);
586 if (rc)
587 return rc;
588
589 spin_lock(&cinode->writers_lock);
590 if (!cinode->writers)
591 set_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags);
592 cinode->writers++;
593 /* Check to see if we have started servicing an oplock break */
594 if (test_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags)) {
595 cinode->writers--;
596 if (cinode->writers == 0) {
597 clear_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags);
598 wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS);
599 }
600 spin_unlock(&cinode->writers_lock);
601 goto start;
602 }
603 spin_unlock(&cinode->writers_lock);
604 return 0;
605}
606
607void cifs_put_writer(struct cifsInodeInfo *cinode)
608{
609 spin_lock(&cinode->writers_lock);
610 cinode->writers--;
611 if (cinode->writers == 0) {
612 clear_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags);
613 wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS);
614 }
615 spin_unlock(&cinode->writers_lock);
616}
617
618void cifs_done_oplock_break(struct cifsInodeInfo *cinode)
619{
620 clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags);
621 wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK);
622}
623
554bool 624bool
555backup_cred(struct cifs_sb_info *cifs_sb) 625backup_cred(struct cifs_sb_info *cifs_sb)
556{ 626{
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 526fb89f9230..d1fdfa848703 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -372,6 +372,16 @@ coalesce_t2(char *second_buf, struct smb_hdr *target_hdr)
372 return 0; 372 return 0;
373} 373}
374 374
375static void
376cifs_downgrade_oplock(struct TCP_Server_Info *server,
377 struct cifsInodeInfo *cinode, bool set_level2)
378{
379 if (set_level2)
380 cifs_set_oplock_level(cinode, OPLOCK_READ);
381 else
382 cifs_set_oplock_level(cinode, 0);
383}
384
375static bool 385static bool
376cifs_check_trans2(struct mid_q_entry *mid, struct TCP_Server_Info *server, 386cifs_check_trans2(struct mid_q_entry *mid, struct TCP_Server_Info *server,
377 char *buf, int malformed) 387 char *buf, int malformed)
@@ -1019,6 +1029,7 @@ struct smb_version_operations smb1_operations = {
1019 .clear_stats = cifs_clear_stats, 1029 .clear_stats = cifs_clear_stats,
1020 .print_stats = cifs_print_stats, 1030 .print_stats = cifs_print_stats,
1021 .is_oplock_break = is_valid_oplock_break, 1031 .is_oplock_break = is_valid_oplock_break,
1032 .downgrade_oplock = cifs_downgrade_oplock,
1022 .check_trans2 = cifs_check_trans2, 1033 .check_trans2 = cifs_check_trans2,
1023 .need_neg = cifs_need_neg, 1034 .need_neg = cifs_need_neg,
1024 .negotiate = cifs_negotiate, 1035 .negotiate = cifs_negotiate,
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index fb3966265b6e..b8021fde987d 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -575,9 +575,21 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
575 else 575 else
576 cfile->oplock_break_cancelled = false; 576 cfile->oplock_break_cancelled = false;
577 577
578 server->ops->set_oplock_level(cinode, 578 set_bit(CIFS_INODE_PENDING_OPLOCK_BREAK,
579 rsp->OplockLevel ? SMB2_OPLOCK_LEVEL_II : 0, 579 &cinode->flags);
580 0, NULL); 580
581 /*
582 * Set flag if the server downgrades the oplock
583 * to L2 else clear.
584 */
585 if (rsp->OplockLevel)
586 set_bit(
587 CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
588 &cinode->flags);
589 else
590 clear_bit(
591 CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
592 &cinode->flags);
581 593
582 queue_work(cifsiod_wq, &cfile->oplock_break); 594 queue_work(cifsiod_wq, &cfile->oplock_break);
583 595
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 192f51a12cf1..35ddc3ed119d 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -905,6 +905,17 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
905} 905}
906 906
907static void 907static void
908smb2_downgrade_oplock(struct TCP_Server_Info *server,
909 struct cifsInodeInfo *cinode, bool set_level2)
910{
911 if (set_level2)
912 server->ops->set_oplock_level(cinode, SMB2_OPLOCK_LEVEL_II,
913 0, NULL);
914 else
915 server->ops->set_oplock_level(cinode, 0, 0, NULL);
916}
917
918static void
908smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, 919smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
909 unsigned int epoch, bool *purge_cache) 920 unsigned int epoch, bool *purge_cache)
910{ 921{
@@ -1110,6 +1121,7 @@ struct smb_version_operations smb20_operations = {
1110 .clear_stats = smb2_clear_stats, 1121 .clear_stats = smb2_clear_stats,
1111 .print_stats = smb2_print_stats, 1122 .print_stats = smb2_print_stats,
1112 .is_oplock_break = smb2_is_valid_oplock_break, 1123 .is_oplock_break = smb2_is_valid_oplock_break,
1124 .downgrade_oplock = smb2_downgrade_oplock,
1113 .need_neg = smb2_need_neg, 1125 .need_neg = smb2_need_neg,
1114 .negotiate = smb2_negotiate, 1126 .negotiate = smb2_negotiate,
1115 .negotiate_wsize = smb2_negotiate_wsize, 1127 .negotiate_wsize = smb2_negotiate_wsize,
@@ -1184,6 +1196,7 @@ struct smb_version_operations smb21_operations = {
1184 .clear_stats = smb2_clear_stats, 1196 .clear_stats = smb2_clear_stats,
1185 .print_stats = smb2_print_stats, 1197 .print_stats = smb2_print_stats,
1186 .is_oplock_break = smb2_is_valid_oplock_break, 1198 .is_oplock_break = smb2_is_valid_oplock_break,
1199 .downgrade_oplock = smb2_downgrade_oplock,
1187 .need_neg = smb2_need_neg, 1200 .need_neg = smb2_need_neg,
1188 .negotiate = smb2_negotiate, 1201 .negotiate = smb2_negotiate,
1189 .negotiate_wsize = smb2_negotiate_wsize, 1202 .negotiate_wsize = smb2_negotiate_wsize,
@@ -1259,6 +1272,7 @@ struct smb_version_operations smb30_operations = {
1259 .print_stats = smb2_print_stats, 1272 .print_stats = smb2_print_stats,
1260 .dump_share_caps = smb2_dump_share_caps, 1273 .dump_share_caps = smb2_dump_share_caps,
1261 .is_oplock_break = smb2_is_valid_oplock_break, 1274 .is_oplock_break = smb2_is_valid_oplock_break,
1275 .downgrade_oplock = smb2_downgrade_oplock,
1262 .need_neg = smb2_need_neg, 1276 .need_neg = smb2_need_neg,
1263 .negotiate = smb2_negotiate, 1277 .negotiate = smb2_negotiate,
1264 .negotiate_wsize = smb2_negotiate_wsize, 1278 .negotiate_wsize = smb2_negotiate_wsize,