diff options
author | Sachin Prabhu <sprabhu@redhat.com> | 2014-03-11 12:11:47 -0400 |
---|---|---|
committer | Steve French <smfrench@gmail.com> | 2014-04-16 14:51:46 -0400 |
commit | c11f1df5003d534fd067f0168bfad7befffb3b5c (patch) | |
tree | 048afe1d692c6ab1e446334930c3c116a161978e /fs/cifs/misc.c | |
parent | 0f689a33ad17845363acdc6d52783befd6ad116c (diff) |
cifs: Wait for writebacks to complete before attempting write.
Problem reported in Red Hat bz 1040329 for strict writes where we cache
only when we hold oplock and write direct to the server when we don't.
When we receive an oplock break, we first change the oplock value for
the inode in cifsInodeInfo->oplock to indicate that we no longer hold
the oplock before we enqueue a task to flush changes to the backing
device. Once we have completed flushing the changes, we return the
oplock to the server.
There are 2 ways here where we can have data corruption
1) While we flush changes to the backing device as part of the oplock
break, we can have processes write to the file. These writes check for
the oplock, find none and attempt to write directly to the server.
These direct writes made while we are flushing from cache could be
overwritten by data being flushed from the cache causing data
corruption.
2) While a thread runs in cifs_strict_writev, the machine could receive
and process an oplock break after the thread has checked the oplock and
found that it allows us to cache and before we have made changes to the
cache. In that case, we end up with a dirty page in cache when we
shouldn't have any. This will be flushed later and will overwrite all
subsequent writes to the part of the file represented by this page.
Before making any writes to the server, we need to confirm that we are
not in the process of flushing data to the server and if we are, we
should wait until the process is complete before we attempt the write.
We should also wait for existing writes to complete before we process
an oplock break request which changes oplock values.
We add a version specific downgrade_oplock() operation to allow for
differences in the oplock values set for the different smb versions.
Cc: stable@vger.kernel.org
Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Pavel Shilovsky <piastry@etersoft.ru>
Signed-off-by: Steve French <smfrench@gmail.com>
Diffstat (limited to 'fs/cifs/misc.c')
-rw-r--r-- | fs/cifs/misc.c | 74 |
1 files changed, 72 insertions, 2 deletions
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 2f9f3790679d..3b0c62e622da 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c | |||
@@ -466,8 +466,22 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) | |||
466 | cifs_dbg(FYI, "file id match, oplock break\n"); | 466 | cifs_dbg(FYI, "file id match, oplock break\n"); |
467 | pCifsInode = CIFS_I(netfile->dentry->d_inode); | 467 | pCifsInode = CIFS_I(netfile->dentry->d_inode); |
468 | 468 | ||
469 | cifs_set_oplock_level(pCifsInode, | 469 | set_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, |
470 | pSMB->OplockLevel ? OPLOCK_READ : 0); | 470 | &pCifsInode->flags); |
471 | |||
472 | /* | ||
473 | * Set flag if the server downgrades the oplock | ||
474 | * to L2 else clear. | ||
475 | */ | ||
476 | if (pSMB->OplockLevel) | ||
477 | set_bit( | ||
478 | CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, | ||
479 | &pCifsInode->flags); | ||
480 | else | ||
481 | clear_bit( | ||
482 | CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, | ||
483 | &pCifsInode->flags); | ||
484 | |||
471 | queue_work(cifsiod_wq, | 485 | queue_work(cifsiod_wq, |
472 | &netfile->oplock_break); | 486 | &netfile->oplock_break); |
473 | netfile->oplock_break_cancelled = false; | 487 | netfile->oplock_break_cancelled = false; |
@@ -551,6 +565,62 @@ void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock) | |||
551 | cinode->oplock = 0; | 565 | cinode->oplock = 0; |
552 | } | 566 | } |
553 | 567 | ||
568 | static int | ||
569 | cifs_oplock_break_wait(void *unused) | ||
570 | { | ||
571 | schedule(); | ||
572 | return signal_pending(current) ? -ERESTARTSYS : 0; | ||
573 | } | ||
574 | |||
575 | /* | ||
576 | * We wait for oplock breaks to be processed before we attempt to perform | ||
577 | * writes. | ||
578 | */ | ||
579 | int cifs_get_writer(struct cifsInodeInfo *cinode) | ||
580 | { | ||
581 | int rc; | ||
582 | |||
583 | start: | ||
584 | rc = wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK, | ||
585 | cifs_oplock_break_wait, TASK_KILLABLE); | ||
586 | if (rc) | ||
587 | return rc; | ||
588 | |||
589 | spin_lock(&cinode->writers_lock); | ||
590 | if (!cinode->writers) | ||
591 | set_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags); | ||
592 | cinode->writers++; | ||
593 | /* Check to see if we have started servicing an oplock break */ | ||
594 | if (test_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags)) { | ||
595 | cinode->writers--; | ||
596 | if (cinode->writers == 0) { | ||
597 | clear_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags); | ||
598 | wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS); | ||
599 | } | ||
600 | spin_unlock(&cinode->writers_lock); | ||
601 | goto start; | ||
602 | } | ||
603 | spin_unlock(&cinode->writers_lock); | ||
604 | return 0; | ||
605 | } | ||
606 | |||
607 | void cifs_put_writer(struct cifsInodeInfo *cinode) | ||
608 | { | ||
609 | spin_lock(&cinode->writers_lock); | ||
610 | cinode->writers--; | ||
611 | if (cinode->writers == 0) { | ||
612 | clear_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags); | ||
613 | wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS); | ||
614 | } | ||
615 | spin_unlock(&cinode->writers_lock); | ||
616 | } | ||
617 | |||
618 | void cifs_done_oplock_break(struct cifsInodeInfo *cinode) | ||
619 | { | ||
620 | clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags); | ||
621 | wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK); | ||
622 | } | ||
623 | |||
554 | bool | 624 | bool |
555 | backup_cred(struct cifs_sb_info *cifs_sb) | 625 | backup_cred(struct cifs_sb_info *cifs_sb) |
556 | { | 626 | { |