diff options
author | Mark Fasheh <mark.fasheh@oracle.com> | 2006-10-17 04:31:38 -0400 |
---|---|---|
committer | Jens Axboe <axboe@nelson.home.kernel.dk> | 2006-10-19 14:53:08 -0400 |
commit | 62752ee198dca9209b7dee504763e51b11e9e0ca (patch) | |
tree | 5e340a4c690851dadc3a8aa395e4c0d14b4837eb | |
parent | ce9e3d9953c8cb67001719b5516da2928e956be4 (diff) |
[PATCH] Take i_mutex in splice_from_pipe()
The splice_actor may be calling ->prepare_write() and ->commit_write(). We
want i_mutex on the inode being written to before calling those so that we
don't race i_size changes.
The double locking behavior is done elsewhere in splice.c, and if we
eventually want _nolock variants of generic_file_splice_write(), fs modules
might have to replicate the nasty locking code. We introduce
inode_double_lock() and inode_double_unlock() to consolidate the locking
rules into one set of functions.
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r-- | fs/inode.c | 36 | ||||
-rw-r--r-- | fs/splice.c | 24 | ||||
-rw-r--r-- | include/linux/fs.h | 3 |
3 files changed, 50 insertions, 13 deletions
diff --git a/fs/inode.c b/fs/inode.c index d9a21d122926..26cdb115ce67 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -1306,6 +1306,42 @@ void wake_up_inode(struct inode *inode) | |||
1306 | wake_up_bit(&inode->i_state, __I_LOCK); | 1306 | wake_up_bit(&inode->i_state, __I_LOCK); |
1307 | } | 1307 | } |
1308 | 1308 | ||
1309 | /* | ||
1310 | * We rarely want to lock two inodes that do not have a parent/child | ||
1311 | * relationship (such as directory, child inode) simultaneously. The | ||
1312 | * vast majority of file systems should be able to get along fine | ||
1313 | * without this. Do not use these functions except as a last resort. | ||
1314 | */ | ||
1315 | void inode_double_lock(struct inode *inode1, struct inode *inode2) | ||
1316 | { | ||
1317 | if (inode1 == NULL || inode2 == NULL || inode1 == inode2) { | ||
1318 | if (inode1) | ||
1319 | mutex_lock(&inode1->i_mutex); | ||
1320 | else if (inode2) | ||
1321 | mutex_lock(&inode2->i_mutex); | ||
1322 | return; | ||
1323 | } | ||
1324 | |||
1325 | if (inode1 < inode2) { | ||
1326 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); | ||
1327 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); | ||
1328 | } else { | ||
1329 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); | ||
1330 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); | ||
1331 | } | ||
1332 | } | ||
1333 | EXPORT_SYMBOL(inode_double_lock); | ||
1334 | |||
1335 | void inode_double_unlock(struct inode *inode1, struct inode *inode2) | ||
1336 | { | ||
1337 | if (inode1) | ||
1338 | mutex_unlock(&inode1->i_mutex); | ||
1339 | |||
1340 | if (inode2 && inode2 != inode1) | ||
1341 | mutex_unlock(&inode2->i_mutex); | ||
1342 | } | ||
1343 | EXPORT_SYMBOL(inode_double_unlock); | ||
1344 | |||
1309 | static __initdata unsigned long ihash_entries; | 1345 | static __initdata unsigned long ihash_entries; |
1310 | static int __init set_ihash_entries(char *str) | 1346 | static int __init set_ihash_entries(char *str) |
1311 | { | 1347 | { |
diff --git a/fs/splice.c b/fs/splice.c index a567010b62ac..c1072b6940c3 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -713,6 +713,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, | |||
713 | { | 713 | { |
714 | int ret, do_wakeup, err; | 714 | int ret, do_wakeup, err; |
715 | struct splice_desc sd; | 715 | struct splice_desc sd; |
716 | struct inode *inode = out->f_mapping->host; | ||
716 | 717 | ||
717 | ret = 0; | 718 | ret = 0; |
718 | do_wakeup = 0; | 719 | do_wakeup = 0; |
@@ -722,8 +723,13 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, | |||
722 | sd.file = out; | 723 | sd.file = out; |
723 | sd.pos = *ppos; | 724 | sd.pos = *ppos; |
724 | 725 | ||
725 | if (pipe->inode) | 726 | /* |
726 | mutex_lock(&pipe->inode->i_mutex); | 727 | * The actor worker might be calling ->prepare_write and |
728 | * ->commit_write. Most of the time, these expect i_mutex to | ||
729 | * be held. Since this may result in an ABBA deadlock with | ||
730 | * pipe->inode, we have to order lock acquiry here. | ||
731 | */ | ||
732 | inode_double_lock(inode, pipe->inode); | ||
727 | 733 | ||
728 | for (;;) { | 734 | for (;;) { |
729 | if (pipe->nrbufs) { | 735 | if (pipe->nrbufs) { |
@@ -797,8 +803,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, | |||
797 | pipe_wait(pipe); | 803 | pipe_wait(pipe); |
798 | } | 804 | } |
799 | 805 | ||
800 | if (pipe->inode) | 806 | inode_double_unlock(inode, pipe->inode); |
801 | mutex_unlock(&pipe->inode->i_mutex); | ||
802 | 807 | ||
803 | if (do_wakeup) { | 808 | if (do_wakeup) { |
804 | smp_mb(); | 809 | smp_mb(); |
@@ -1400,13 +1405,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
1400 | * grabbing by inode address. Otherwise two different processes | 1405 | * grabbing by inode address. Otherwise two different processes |
1401 | * could deadlock (one doing tee from A -> B, the other from B -> A). | 1406 | * could deadlock (one doing tee from A -> B, the other from B -> A). |
1402 | */ | 1407 | */ |
1403 | if (ipipe->inode < opipe->inode) { | 1408 | inode_double_lock(ipipe->inode, opipe->inode); |
1404 | mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT); | ||
1405 | mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD); | ||
1406 | } else { | ||
1407 | mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT); | ||
1408 | mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD); | ||
1409 | } | ||
1410 | 1409 | ||
1411 | do { | 1410 | do { |
1412 | if (!opipe->readers) { | 1411 | if (!opipe->readers) { |
@@ -1450,8 +1449,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
1450 | i++; | 1449 | i++; |
1451 | } while (len); | 1450 | } while (len); |
1452 | 1451 | ||
1453 | mutex_unlock(&ipipe->inode->i_mutex); | 1452 | inode_double_unlock(ipipe->inode, opipe->inode); |
1454 | mutex_unlock(&opipe->inode->i_mutex); | ||
1455 | 1453 | ||
1456 | /* | 1454 | /* |
1457 | * If we put data in the output pipe, wakeup any potential readers. | 1455 | * If we put data in the output pipe, wakeup any potential readers. |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 661c7c572149..853a02f23936 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -623,6 +623,9 @@ enum inode_i_mutex_lock_class | |||
623 | I_MUTEX_QUOTA | 623 | I_MUTEX_QUOTA |
624 | }; | 624 | }; |
625 | 625 | ||
626 | extern void inode_double_lock(struct inode *inode1, struct inode *inode2); | ||
627 | extern void inode_double_unlock(struct inode *inode1, struct inode *inode2); | ||
628 | |||
626 | /* | 629 | /* |
627 | * NOTE: in a 32bit arch with a preemptable kernel and | 630 | * NOTE: in a 32bit arch with a preemptable kernel and |
628 | * an UP compile the i_size_read/write must be atomic | 631 | * an UP compile the i_size_read/write must be atomic |