diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/autofs/dirhash.c | 34 | ||||
-rw-r--r-- | fs/autofs4/dev-ioctl.c | 12 | ||||
-rw-r--r-- | fs/btrfs/async-thread.c | 60 | ||||
-rw-r--r-- | fs/btrfs/async-thread.h | 2 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 17 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 9 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 86 | ||||
-rw-r--r-- | fs/btrfs/file.c | 6 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 36 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 49 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 2 | ||||
-rw-r--r-- | fs/btrfs/super.c | 13 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 124 | ||||
-rw-r--r-- | fs/btrfs/volumes.h | 13 | ||||
-rw-r--r-- | fs/compat.c | 37 | ||||
-rw-r--r-- | fs/compat_ioctl.c | 7 | ||||
-rw-r--r-- | fs/dcache.c | 1 | ||||
-rw-r--r-- | fs/ecryptfs/miscdev.c | 15 | ||||
-rw-r--r-- | fs/filesystems.c | 2 | ||||
-rw-r--r-- | fs/gfs2/glops.c | 6 | ||||
-rw-r--r-- | fs/gfs2/ops_file.c | 4 | ||||
-rw-r--r-- | fs/hugetlbfs/inode.c | 3 | ||||
-rw-r--r-- | fs/namei.c | 2 | ||||
-rw-r--r-- | fs/namespace.c | 2 | ||||
-rw-r--r-- | fs/ncpfs/ioctl.c | 21 | ||||
-rw-r--r-- | fs/nfs/nfs3xdr.c | 3 | ||||
-rw-r--r-- | fs/nfsd/nfs4recover.c | 46 | ||||
-rw-r--r-- | fs/nfsd/vfs.c | 34 | ||||
-rw-r--r-- | fs/stat.c | 137 | ||||
-rw-r--r-- | fs/sysfs/bin.c | 13 | ||||
-rw-r--r-- | fs/xattr.c | 10 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_ioctl.c | 23 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_ioctl32.c | 12 |
33 files changed, 467 insertions, 374 deletions
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c index bf8c8af98004..4eb4d8dfb2f1 100644 --- a/fs/autofs/dirhash.c +++ b/fs/autofs/dirhash.c | |||
@@ -39,10 +39,12 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb, | |||
39 | { | 39 | { |
40 | struct autofs_dirhash *dh = &sbi->dirhash; | 40 | struct autofs_dirhash *dh = &sbi->dirhash; |
41 | struct autofs_dir_ent *ent; | 41 | struct autofs_dir_ent *ent; |
42 | struct dentry *dentry; | ||
43 | unsigned long timeout = sbi->exp_timeout; | 42 | unsigned long timeout = sbi->exp_timeout; |
44 | 43 | ||
45 | while (1) { | 44 | while (1) { |
45 | struct path path; | ||
46 | int umount_ok; | ||
47 | |||
46 | if ( list_empty(&dh->expiry_head) || sbi->catatonic ) | 48 | if ( list_empty(&dh->expiry_head) || sbi->catatonic ) |
47 | return NULL; /* No entries */ | 49 | return NULL; /* No entries */ |
48 | /* We keep the list sorted by last_usage and want old stuff */ | 50 | /* We keep the list sorted by last_usage and want old stuff */ |
@@ -57,17 +59,17 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb, | |||
57 | return ent; /* Symlinks are always expirable */ | 59 | return ent; /* Symlinks are always expirable */ |
58 | 60 | ||
59 | /* Get the dentry for the autofs subdirectory */ | 61 | /* Get the dentry for the autofs subdirectory */ |
60 | dentry = ent->dentry; | 62 | path.dentry = ent->dentry; |
61 | 63 | ||
62 | if ( !dentry ) { | 64 | if (!path.dentry) { |
63 | /* Should only happen in catatonic mode */ | 65 | /* Should only happen in catatonic mode */ |
64 | printk("autofs: dentry == NULL but inode range is directory, entry %s\n", ent->name); | 66 | printk("autofs: dentry == NULL but inode range is directory, entry %s\n", ent->name); |
65 | autofs_delete_usage(ent); | 67 | autofs_delete_usage(ent); |
66 | continue; | 68 | continue; |
67 | } | 69 | } |
68 | 70 | ||
69 | if ( !dentry->d_inode ) { | 71 | if (!path.dentry->d_inode) { |
70 | dput(dentry); | 72 | dput(path.dentry); |
71 | printk("autofs: negative dentry on expiry queue: %s\n", | 73 | printk("autofs: negative dentry on expiry queue: %s\n", |
72 | ent->name); | 74 | ent->name); |
73 | autofs_delete_usage(ent); | 75 | autofs_delete_usage(ent); |
@@ -76,29 +78,29 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb, | |||
76 | 78 | ||
77 | /* Make sure entry is mounted and unused; note that dentry will | 79 | /* Make sure entry is mounted and unused; note that dentry will |
78 | point to the mounted-on-top root. */ | 80 | point to the mounted-on-top root. */ |
79 | if (!S_ISDIR(dentry->d_inode->i_mode)||!d_mountpoint(dentry)) { | 81 | if (!S_ISDIR(path.dentry->d_inode->i_mode) || |
82 | !d_mountpoint(path.dentry)) { | ||
80 | DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); | 83 | DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); |
81 | continue; | 84 | continue; |
82 | } | 85 | } |
83 | mntget(mnt); | 86 | path.mnt = mnt; |
84 | dget(dentry); | 87 | path_get(&path); |
85 | if (!follow_down(&mnt, &dentry)) { | 88 | if (!follow_down(&path.mnt, &path.dentry)) { |
86 | dput(dentry); | 89 | path_put(&path); |
87 | mntput(mnt); | ||
88 | DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); | 90 | DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); |
89 | continue; | 91 | continue; |
90 | } | 92 | } |
91 | while (d_mountpoint(dentry) && follow_down(&mnt, &dentry)) | 93 | while (d_mountpoint(path.dentry) && |
94 | follow_down(&path.mnt, &path.dentry)) | ||
92 | ; | 95 | ; |
93 | dput(dentry); | 96 | umount_ok = may_umount(path.mnt); |
97 | path_put(&path); | ||
94 | 98 | ||
95 | if ( may_umount(mnt) ) { | 99 | if (umount_ok) { |
96 | mntput(mnt); | ||
97 | DPRINTK(("autofs: signaling expire on %s\n", ent->name)); | 100 | DPRINTK(("autofs: signaling expire on %s\n", ent->name)); |
98 | return ent; /* Expirable! */ | 101 | return ent; /* Expirable! */ |
99 | } | 102 | } |
100 | DPRINTK(("autofs: didn't expire due to may_umount: %s\n", ent->name)); | 103 | DPRINTK(("autofs: didn't expire due to may_umount: %s\n", ent->name)); |
101 | mntput(mnt); | ||
102 | } | 104 | } |
103 | return NULL; /* No expirable entries */ | 105 | return NULL; /* No expirable entries */ |
104 | } | 106 | } |
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 9e5ae8a4f5c8..84168c0dcc2d 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c | |||
@@ -54,11 +54,10 @@ static int check_name(const char *name) | |||
54 | * Check a string doesn't overrun the chunk of | 54 | * Check a string doesn't overrun the chunk of |
55 | * memory we copied from user land. | 55 | * memory we copied from user land. |
56 | */ | 56 | */ |
57 | static int invalid_str(char *str, void *end) | 57 | static int invalid_str(char *str, size_t size) |
58 | { | 58 | { |
59 | while ((void *) str <= end) | 59 | if (memchr(str, 0, size)) |
60 | if (!*str++) | 60 | return 0; |
61 | return 0; | ||
62 | return -EINVAL; | 61 | return -EINVAL; |
63 | } | 62 | } |
64 | 63 | ||
@@ -138,8 +137,7 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) | |||
138 | } | 137 | } |
139 | 138 | ||
140 | if (param->size > sizeof(*param)) { | 139 | if (param->size > sizeof(*param)) { |
141 | err = invalid_str(param->path, | 140 | err = invalid_str(param->path, param->size - sizeof(*param)); |
142 | (void *) ((size_t) param + param->size)); | ||
143 | if (err) { | 141 | if (err) { |
144 | AUTOFS_WARN( | 142 | AUTOFS_WARN( |
145 | "path string terminator missing for cmd(0x%08x)", | 143 | "path string terminator missing for cmd(0x%08x)", |
@@ -488,7 +486,7 @@ static int autofs_dev_ioctl_requester(struct file *fp, | |||
488 | } | 486 | } |
489 | 487 | ||
490 | path = param->path; | 488 | path = param->path; |
491 | devid = sbi->sb->s_dev; | 489 | devid = new_encode_dev(sbi->sb->s_dev); |
492 | 490 | ||
493 | param->requester.uid = param->requester.gid = -1; | 491 | param->requester.uid = param->requester.gid = -1; |
494 | 492 | ||
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 51bfdfc8fcda..502c3d61de62 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #define WORK_QUEUED_BIT 0 | 25 | #define WORK_QUEUED_BIT 0 |
26 | #define WORK_DONE_BIT 1 | 26 | #define WORK_DONE_BIT 1 |
27 | #define WORK_ORDER_DONE_BIT 2 | 27 | #define WORK_ORDER_DONE_BIT 2 |
28 | #define WORK_HIGH_PRIO_BIT 3 | ||
28 | 29 | ||
29 | /* | 30 | /* |
30 | * container for the kthread task pointer and the list of pending work | 31 | * container for the kthread task pointer and the list of pending work |
@@ -36,6 +37,7 @@ struct btrfs_worker_thread { | |||
36 | 37 | ||
37 | /* list of struct btrfs_work that are waiting for service */ | 38 | /* list of struct btrfs_work that are waiting for service */ |
38 | struct list_head pending; | 39 | struct list_head pending; |
40 | struct list_head prio_pending; | ||
39 | 41 | ||
40 | /* list of worker threads from struct btrfs_workers */ | 42 | /* list of worker threads from struct btrfs_workers */ |
41 | struct list_head worker_list; | 43 | struct list_head worker_list; |
@@ -103,10 +105,16 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers, | |||
103 | 105 | ||
104 | spin_lock_irqsave(&workers->lock, flags); | 106 | spin_lock_irqsave(&workers->lock, flags); |
105 | 107 | ||
106 | while (!list_empty(&workers->order_list)) { | 108 | while (1) { |
107 | work = list_entry(workers->order_list.next, | 109 | if (!list_empty(&workers->prio_order_list)) { |
108 | struct btrfs_work, order_list); | 110 | work = list_entry(workers->prio_order_list.next, |
109 | 111 | struct btrfs_work, order_list); | |
112 | } else if (!list_empty(&workers->order_list)) { | ||
113 | work = list_entry(workers->order_list.next, | ||
114 | struct btrfs_work, order_list); | ||
115 | } else { | ||
116 | break; | ||
117 | } | ||
110 | if (!test_bit(WORK_DONE_BIT, &work->flags)) | 118 | if (!test_bit(WORK_DONE_BIT, &work->flags)) |
111 | break; | 119 | break; |
112 | 120 | ||
@@ -143,8 +151,14 @@ static int worker_loop(void *arg) | |||
143 | do { | 151 | do { |
144 | spin_lock_irq(&worker->lock); | 152 | spin_lock_irq(&worker->lock); |
145 | again_locked: | 153 | again_locked: |
146 | while (!list_empty(&worker->pending)) { | 154 | while (1) { |
147 | cur = worker->pending.next; | 155 | if (!list_empty(&worker->prio_pending)) |
156 | cur = worker->prio_pending.next; | ||
157 | else if (!list_empty(&worker->pending)) | ||
158 | cur = worker->pending.next; | ||
159 | else | ||
160 | break; | ||
161 | |||
148 | work = list_entry(cur, struct btrfs_work, list); | 162 | work = list_entry(cur, struct btrfs_work, list); |
149 | list_del(&work->list); | 163 | list_del(&work->list); |
150 | clear_bit(WORK_QUEUED_BIT, &work->flags); | 164 | clear_bit(WORK_QUEUED_BIT, &work->flags); |
@@ -163,7 +177,6 @@ again_locked: | |||
163 | 177 | ||
164 | spin_lock_irq(&worker->lock); | 178 | spin_lock_irq(&worker->lock); |
165 | check_idle_worker(worker); | 179 | check_idle_worker(worker); |
166 | |||
167 | } | 180 | } |
168 | if (freezing(current)) { | 181 | if (freezing(current)) { |
169 | worker->working = 0; | 182 | worker->working = 0; |
@@ -178,7 +191,8 @@ again_locked: | |||
178 | * jump_in? | 191 | * jump_in? |
179 | */ | 192 | */ |
180 | smp_mb(); | 193 | smp_mb(); |
181 | if (!list_empty(&worker->pending)) | 194 | if (!list_empty(&worker->pending) || |
195 | !list_empty(&worker->prio_pending)) | ||
182 | continue; | 196 | continue; |
183 | 197 | ||
184 | /* | 198 | /* |
@@ -191,7 +205,8 @@ again_locked: | |||
191 | */ | 205 | */ |
192 | schedule_timeout(1); | 206 | schedule_timeout(1); |
193 | smp_mb(); | 207 | smp_mb(); |
194 | if (!list_empty(&worker->pending)) | 208 | if (!list_empty(&worker->pending) || |
209 | !list_empty(&worker->prio_pending)) | ||
195 | continue; | 210 | continue; |
196 | 211 | ||
197 | if (kthread_should_stop()) | 212 | if (kthread_should_stop()) |
@@ -200,7 +215,8 @@ again_locked: | |||
200 | /* still no more work?, sleep for real */ | 215 | /* still no more work?, sleep for real */ |
201 | spin_lock_irq(&worker->lock); | 216 | spin_lock_irq(&worker->lock); |
202 | set_current_state(TASK_INTERRUPTIBLE); | 217 | set_current_state(TASK_INTERRUPTIBLE); |
203 | if (!list_empty(&worker->pending)) | 218 | if (!list_empty(&worker->pending) || |
219 | !list_empty(&worker->prio_pending)) | ||
204 | goto again_locked; | 220 | goto again_locked; |
205 | 221 | ||
206 | /* | 222 | /* |
@@ -248,6 +264,7 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max) | |||
248 | INIT_LIST_HEAD(&workers->worker_list); | 264 | INIT_LIST_HEAD(&workers->worker_list); |
249 | INIT_LIST_HEAD(&workers->idle_list); | 265 | INIT_LIST_HEAD(&workers->idle_list); |
250 | INIT_LIST_HEAD(&workers->order_list); | 266 | INIT_LIST_HEAD(&workers->order_list); |
267 | INIT_LIST_HEAD(&workers->prio_order_list); | ||
251 | spin_lock_init(&workers->lock); | 268 | spin_lock_init(&workers->lock); |
252 | workers->max_workers = max; | 269 | workers->max_workers = max; |
253 | workers->idle_thresh = 32; | 270 | workers->idle_thresh = 32; |
@@ -273,6 +290,7 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) | |||
273 | } | 290 | } |
274 | 291 | ||
275 | INIT_LIST_HEAD(&worker->pending); | 292 | INIT_LIST_HEAD(&worker->pending); |
293 | INIT_LIST_HEAD(&worker->prio_pending); | ||
276 | INIT_LIST_HEAD(&worker->worker_list); | 294 | INIT_LIST_HEAD(&worker->worker_list); |
277 | spin_lock_init(&worker->lock); | 295 | spin_lock_init(&worker->lock); |
278 | atomic_set(&worker->num_pending, 0); | 296 | atomic_set(&worker->num_pending, 0); |
@@ -396,7 +414,10 @@ int btrfs_requeue_work(struct btrfs_work *work) | |||
396 | goto out; | 414 | goto out; |
397 | 415 | ||
398 | spin_lock_irqsave(&worker->lock, flags); | 416 | spin_lock_irqsave(&worker->lock, flags); |
399 | list_add_tail(&work->list, &worker->pending); | 417 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) |
418 | list_add_tail(&work->list, &worker->prio_pending); | ||
419 | else | ||
420 | list_add_tail(&work->list, &worker->pending); | ||
400 | atomic_inc(&worker->num_pending); | 421 | atomic_inc(&worker->num_pending); |
401 | 422 | ||
402 | /* by definition we're busy, take ourselves off the idle | 423 | /* by definition we're busy, take ourselves off the idle |
@@ -422,6 +443,11 @@ out: | |||
422 | return 0; | 443 | return 0; |
423 | } | 444 | } |
424 | 445 | ||
446 | void btrfs_set_work_high_prio(struct btrfs_work *work) | ||
447 | { | ||
448 | set_bit(WORK_HIGH_PRIO_BIT, &work->flags); | ||
449 | } | ||
450 | |||
425 | /* | 451 | /* |
426 | * places a struct btrfs_work into the pending queue of one of the kthreads | 452 | * places a struct btrfs_work into the pending queue of one of the kthreads |
427 | */ | 453 | */ |
@@ -438,7 +464,12 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
438 | worker = find_worker(workers); | 464 | worker = find_worker(workers); |
439 | if (workers->ordered) { | 465 | if (workers->ordered) { |
440 | spin_lock_irqsave(&workers->lock, flags); | 466 | spin_lock_irqsave(&workers->lock, flags); |
441 | list_add_tail(&work->order_list, &workers->order_list); | 467 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { |
468 | list_add_tail(&work->order_list, | ||
469 | &workers->prio_order_list); | ||
470 | } else { | ||
471 | list_add_tail(&work->order_list, &workers->order_list); | ||
472 | } | ||
442 | spin_unlock_irqrestore(&workers->lock, flags); | 473 | spin_unlock_irqrestore(&workers->lock, flags); |
443 | } else { | 474 | } else { |
444 | INIT_LIST_HEAD(&work->order_list); | 475 | INIT_LIST_HEAD(&work->order_list); |
@@ -446,7 +477,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
446 | 477 | ||
447 | spin_lock_irqsave(&worker->lock, flags); | 478 | spin_lock_irqsave(&worker->lock, flags); |
448 | 479 | ||
449 | list_add_tail(&work->list, &worker->pending); | 480 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) |
481 | list_add_tail(&work->list, &worker->prio_pending); | ||
482 | else | ||
483 | list_add_tail(&work->list, &worker->pending); | ||
450 | atomic_inc(&worker->num_pending); | 484 | atomic_inc(&worker->num_pending); |
451 | check_busy_worker(worker); | 485 | check_busy_worker(worker); |
452 | 486 | ||
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 31be4ed8b63e..1b511c109db6 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h | |||
@@ -85,6 +85,7 @@ struct btrfs_workers { | |||
85 | * of work items waiting for completion | 85 | * of work items waiting for completion |
86 | */ | 86 | */ |
87 | struct list_head order_list; | 87 | struct list_head order_list; |
88 | struct list_head prio_order_list; | ||
88 | 89 | ||
89 | /* lock for finding the next worker thread to queue on */ | 90 | /* lock for finding the next worker thread to queue on */ |
90 | spinlock_t lock; | 91 | spinlock_t lock; |
@@ -98,4 +99,5 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); | |||
98 | int btrfs_stop_workers(struct btrfs_workers *workers); | 99 | int btrfs_stop_workers(struct btrfs_workers *workers); |
99 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max); | 100 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max); |
100 | int btrfs_requeue_work(struct btrfs_work *work); | 101 | int btrfs_requeue_work(struct btrfs_work *work); |
102 | void btrfs_set_work_high_prio(struct btrfs_work *work); | ||
101 | #endif | 103 | #endif |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e5b2533b691a..a99f1c2a710d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -1325,12 +1325,12 @@ static noinline int reada_for_balance(struct btrfs_root *root, | |||
1325 | int ret = 0; | 1325 | int ret = 0; |
1326 | int blocksize; | 1326 | int blocksize; |
1327 | 1327 | ||
1328 | parent = path->nodes[level - 1]; | 1328 | parent = path->nodes[level + 1]; |
1329 | if (!parent) | 1329 | if (!parent) |
1330 | return 0; | 1330 | return 0; |
1331 | 1331 | ||
1332 | nritems = btrfs_header_nritems(parent); | 1332 | nritems = btrfs_header_nritems(parent); |
1333 | slot = path->slots[level]; | 1333 | slot = path->slots[level + 1]; |
1334 | blocksize = btrfs_level_size(root, level); | 1334 | blocksize = btrfs_level_size(root, level); |
1335 | 1335 | ||
1336 | if (slot > 0) { | 1336 | if (slot > 0) { |
@@ -1341,7 +1341,7 @@ static noinline int reada_for_balance(struct btrfs_root *root, | |||
1341 | block1 = 0; | 1341 | block1 = 0; |
1342 | free_extent_buffer(eb); | 1342 | free_extent_buffer(eb); |
1343 | } | 1343 | } |
1344 | if (slot < nritems) { | 1344 | if (slot + 1 < nritems) { |
1345 | block2 = btrfs_node_blockptr(parent, slot + 1); | 1345 | block2 = btrfs_node_blockptr(parent, slot + 1); |
1346 | gen = btrfs_node_ptr_generation(parent, slot + 1); | 1346 | gen = btrfs_node_ptr_generation(parent, slot + 1); |
1347 | eb = btrfs_find_tree_block(root, block2, blocksize); | 1347 | eb = btrfs_find_tree_block(root, block2, blocksize); |
@@ -1351,7 +1351,11 @@ static noinline int reada_for_balance(struct btrfs_root *root, | |||
1351 | } | 1351 | } |
1352 | if (block1 || block2) { | 1352 | if (block1 || block2) { |
1353 | ret = -EAGAIN; | 1353 | ret = -EAGAIN; |
1354 | |||
1355 | /* release the whole path */ | ||
1354 | btrfs_release_path(root, path); | 1356 | btrfs_release_path(root, path); |
1357 | |||
1358 | /* read the blocks */ | ||
1355 | if (block1) | 1359 | if (block1) |
1356 | readahead_tree_block(root, block1, blocksize, 0); | 1360 | readahead_tree_block(root, block1, blocksize, 0); |
1357 | if (block2) | 1361 | if (block2) |
@@ -1361,7 +1365,7 @@ static noinline int reada_for_balance(struct btrfs_root *root, | |||
1361 | eb = read_tree_block(root, block1, blocksize, 0); | 1365 | eb = read_tree_block(root, block1, blocksize, 0); |
1362 | free_extent_buffer(eb); | 1366 | free_extent_buffer(eb); |
1363 | } | 1367 | } |
1364 | if (block1) { | 1368 | if (block2) { |
1365 | eb = read_tree_block(root, block2, blocksize, 0); | 1369 | eb = read_tree_block(root, block2, blocksize, 0); |
1366 | free_extent_buffer(eb); | 1370 | free_extent_buffer(eb); |
1367 | } | 1371 | } |
@@ -1481,12 +1485,15 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
1481 | * of the btree by dropping locks before | 1485 | * of the btree by dropping locks before |
1482 | * we read. | 1486 | * we read. |
1483 | */ | 1487 | */ |
1484 | btrfs_release_path(NULL, p); | 1488 | btrfs_unlock_up_safe(p, level + 1); |
1489 | btrfs_set_path_blocking(p); | ||
1490 | |||
1485 | if (tmp) | 1491 | if (tmp) |
1486 | free_extent_buffer(tmp); | 1492 | free_extent_buffer(tmp); |
1487 | if (p->reada) | 1493 | if (p->reada) |
1488 | reada_for_search(root, p, level, slot, key->objectid); | 1494 | reada_for_search(root, p, level, slot, key->objectid); |
1489 | 1495 | ||
1496 | btrfs_release_path(NULL, p); | ||
1490 | tmp = read_tree_block(root, blocknr, blocksize, gen); | 1497 | tmp = read_tree_block(root, blocknr, blocksize, gen); |
1491 | if (tmp) | 1498 | if (tmp) |
1492 | free_extent_buffer(tmp); | 1499 | free_extent_buffer(tmp); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 92caa8035f36..a6b83744b05d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -579,6 +579,10 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
579 | async->bio_flags = bio_flags; | 579 | async->bio_flags = bio_flags; |
580 | 580 | ||
581 | atomic_inc(&fs_info->nr_async_submits); | 581 | atomic_inc(&fs_info->nr_async_submits); |
582 | |||
583 | if (rw & (1 << BIO_RW_SYNCIO)) | ||
584 | btrfs_set_work_high_prio(&async->work); | ||
585 | |||
582 | btrfs_queue_worker(&fs_info->workers, &async->work); | 586 | btrfs_queue_worker(&fs_info->workers, &async->work); |
583 | #if 0 | 587 | #if 0 |
584 | int limit = btrfs_async_submit_limit(fs_info); | 588 | int limit = btrfs_async_submit_limit(fs_info); |
@@ -656,6 +660,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
656 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | 660 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, |
657 | mirror_num, 0); | 661 | mirror_num, 0); |
658 | } | 662 | } |
663 | |||
659 | /* | 664 | /* |
660 | * kthread helpers are used to submit writes so that checksumming | 665 | * kthread helpers are used to submit writes so that checksumming |
661 | * can happen in parallel across all CPUs | 666 | * can happen in parallel across all CPUs |
@@ -2095,10 +2100,10 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2095 | device->barriers = 0; | 2100 | device->barriers = 0; |
2096 | get_bh(bh); | 2101 | get_bh(bh); |
2097 | lock_buffer(bh); | 2102 | lock_buffer(bh); |
2098 | ret = submit_bh(WRITE, bh); | 2103 | ret = submit_bh(WRITE_SYNC, bh); |
2099 | } | 2104 | } |
2100 | } else { | 2105 | } else { |
2101 | ret = submit_bh(WRITE, bh); | 2106 | ret = submit_bh(WRITE_SYNC, bh); |
2102 | } | 2107 | } |
2103 | 2108 | ||
2104 | if (!ret && wait) { | 2109 | if (!ret && wait) { |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index eb2bee8b7fbf..05a1c42e25bf 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -50,7 +50,10 @@ struct extent_page_data { | |||
50 | /* tells writepage not to lock the state bits for this range | 50 | /* tells writepage not to lock the state bits for this range |
51 | * it still does the unlocking | 51 | * it still does the unlocking |
52 | */ | 52 | */ |
53 | int extent_locked; | 53 | unsigned int extent_locked:1; |
54 | |||
55 | /* tells the submit_bio code to use a WRITE_SYNC */ | ||
56 | unsigned int sync_io:1; | ||
54 | }; | 57 | }; |
55 | 58 | ||
56 | int __init extent_io_init(void) | 59 | int __init extent_io_init(void) |
@@ -2101,6 +2104,16 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, | |||
2101 | return ret; | 2104 | return ret; |
2102 | } | 2105 | } |
2103 | 2106 | ||
2107 | static noinline void update_nr_written(struct page *page, | ||
2108 | struct writeback_control *wbc, | ||
2109 | unsigned long nr_written) | ||
2110 | { | ||
2111 | wbc->nr_to_write -= nr_written; | ||
2112 | if (wbc->range_cyclic || (wbc->nr_to_write > 0 && | ||
2113 | wbc->range_start == 0 && wbc->range_end == LLONG_MAX)) | ||
2114 | page->mapping->writeback_index = page->index + nr_written; | ||
2115 | } | ||
2116 | |||
2104 | /* | 2117 | /* |
2105 | * the writepage semantics are similar to regular writepage. extent | 2118 | * the writepage semantics are similar to regular writepage. extent |
2106 | * records are inserted to lock ranges in the tree, and as dirty areas | 2119 | * records are inserted to lock ranges in the tree, and as dirty areas |
@@ -2136,8 +2149,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2136 | u64 delalloc_end; | 2149 | u64 delalloc_end; |
2137 | int page_started; | 2150 | int page_started; |
2138 | int compressed; | 2151 | int compressed; |
2152 | int write_flags; | ||
2139 | unsigned long nr_written = 0; | 2153 | unsigned long nr_written = 0; |
2140 | 2154 | ||
2155 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
2156 | write_flags = WRITE_SYNC_PLUG; | ||
2157 | else | ||
2158 | write_flags = WRITE; | ||
2159 | |||
2141 | WARN_ON(!PageLocked(page)); | 2160 | WARN_ON(!PageLocked(page)); |
2142 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | 2161 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); |
2143 | if (page->index > end_index || | 2162 | if (page->index > end_index || |
@@ -2164,6 +2183,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2164 | delalloc_end = 0; | 2183 | delalloc_end = 0; |
2165 | page_started = 0; | 2184 | page_started = 0; |
2166 | if (!epd->extent_locked) { | 2185 | if (!epd->extent_locked) { |
2186 | /* | ||
2187 | * make sure the wbc mapping index is at least updated | ||
2188 | * to this page. | ||
2189 | */ | ||
2190 | update_nr_written(page, wbc, 0); | ||
2191 | |||
2167 | while (delalloc_end < page_end) { | 2192 | while (delalloc_end < page_end) { |
2168 | nr_delalloc = find_lock_delalloc_range(inode, tree, | 2193 | nr_delalloc = find_lock_delalloc_range(inode, tree, |
2169 | page, | 2194 | page, |
@@ -2185,7 +2210,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2185 | */ | 2210 | */ |
2186 | if (page_started) { | 2211 | if (page_started) { |
2187 | ret = 0; | 2212 | ret = 0; |
2188 | goto update_nr_written; | 2213 | /* |
2214 | * we've unlocked the page, so we can't update | ||
2215 | * the mapping's writeback index, just update | ||
2216 | * nr_to_write. | ||
2217 | */ | ||
2218 | wbc->nr_to_write -= nr_written; | ||
2219 | goto done_unlocked; | ||
2189 | } | 2220 | } |
2190 | } | 2221 | } |
2191 | lock_extent(tree, start, page_end, GFP_NOFS); | 2222 | lock_extent(tree, start, page_end, GFP_NOFS); |
@@ -2198,13 +2229,18 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2198 | if (ret == -EAGAIN) { | 2229 | if (ret == -EAGAIN) { |
2199 | unlock_extent(tree, start, page_end, GFP_NOFS); | 2230 | unlock_extent(tree, start, page_end, GFP_NOFS); |
2200 | redirty_page_for_writepage(wbc, page); | 2231 | redirty_page_for_writepage(wbc, page); |
2232 | update_nr_written(page, wbc, nr_written); | ||
2201 | unlock_page(page); | 2233 | unlock_page(page); |
2202 | ret = 0; | 2234 | ret = 0; |
2203 | goto update_nr_written; | 2235 | goto done_unlocked; |
2204 | } | 2236 | } |
2205 | } | 2237 | } |
2206 | 2238 | ||
2207 | nr_written++; | 2239 | /* |
2240 | * we don't want to touch the inode after unlocking the page, | ||
2241 | * so we update the mapping writeback index now | ||
2242 | */ | ||
2243 | update_nr_written(page, wbc, nr_written + 1); | ||
2208 | 2244 | ||
2209 | end = page_end; | 2245 | end = page_end; |
2210 | if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) | 2246 | if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) |
@@ -2314,9 +2350,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2314 | (unsigned long long)end); | 2350 | (unsigned long long)end); |
2315 | } | 2351 | } |
2316 | 2352 | ||
2317 | ret = submit_extent_page(WRITE, tree, page, sector, | 2353 | ret = submit_extent_page(write_flags, tree, page, |
2318 | iosize, pg_offset, bdev, | 2354 | sector, iosize, pg_offset, |
2319 | &epd->bio, max_nr, | 2355 | bdev, &epd->bio, max_nr, |
2320 | end_bio_extent_writepage, | 2356 | end_bio_extent_writepage, |
2321 | 0, 0, 0); | 2357 | 0, 0, 0); |
2322 | if (ret) | 2358 | if (ret) |
@@ -2336,11 +2372,8 @@ done: | |||
2336 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); | 2372 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); |
2337 | unlock_page(page); | 2373 | unlock_page(page); |
2338 | 2374 | ||
2339 | update_nr_written: | 2375 | done_unlocked: |
2340 | wbc->nr_to_write -= nr_written; | 2376 | |
2341 | if (wbc->range_cyclic || (wbc->nr_to_write > 0 && | ||
2342 | wbc->range_start == 0 && wbc->range_end == LLONG_MAX)) | ||
2343 | page->mapping->writeback_index = page->index + nr_written; | ||
2344 | return 0; | 2377 | return 0; |
2345 | } | 2378 | } |
2346 | 2379 | ||
@@ -2460,15 +2493,23 @@ retry: | |||
2460 | return ret; | 2493 | return ret; |
2461 | } | 2494 | } |
2462 | 2495 | ||
2463 | static noinline void flush_write_bio(void *data) | 2496 | static void flush_epd_write_bio(struct extent_page_data *epd) |
2464 | { | 2497 | { |
2465 | struct extent_page_data *epd = data; | ||
2466 | if (epd->bio) { | 2498 | if (epd->bio) { |
2467 | submit_one_bio(WRITE, epd->bio, 0, 0); | 2499 | if (epd->sync_io) |
2500 | submit_one_bio(WRITE_SYNC, epd->bio, 0, 0); | ||
2501 | else | ||
2502 | submit_one_bio(WRITE, epd->bio, 0, 0); | ||
2468 | epd->bio = NULL; | 2503 | epd->bio = NULL; |
2469 | } | 2504 | } |
2470 | } | 2505 | } |
2471 | 2506 | ||
2507 | static noinline void flush_write_bio(void *data) | ||
2508 | { | ||
2509 | struct extent_page_data *epd = data; | ||
2510 | flush_epd_write_bio(epd); | ||
2511 | } | ||
2512 | |||
2472 | int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | 2513 | int extent_write_full_page(struct extent_io_tree *tree, struct page *page, |
2473 | get_extent_t *get_extent, | 2514 | get_extent_t *get_extent, |
2474 | struct writeback_control *wbc) | 2515 | struct writeback_control *wbc) |
@@ -2480,23 +2521,22 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
2480 | .tree = tree, | 2521 | .tree = tree, |
2481 | .get_extent = get_extent, | 2522 | .get_extent = get_extent, |
2482 | .extent_locked = 0, | 2523 | .extent_locked = 0, |
2524 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | ||
2483 | }; | 2525 | }; |
2484 | struct writeback_control wbc_writepages = { | 2526 | struct writeback_control wbc_writepages = { |
2485 | .bdi = wbc->bdi, | 2527 | .bdi = wbc->bdi, |
2486 | .sync_mode = WB_SYNC_NONE, | 2528 | .sync_mode = wbc->sync_mode, |
2487 | .older_than_this = NULL, | 2529 | .older_than_this = NULL, |
2488 | .nr_to_write = 64, | 2530 | .nr_to_write = 64, |
2489 | .range_start = page_offset(page) + PAGE_CACHE_SIZE, | 2531 | .range_start = page_offset(page) + PAGE_CACHE_SIZE, |
2490 | .range_end = (loff_t)-1, | 2532 | .range_end = (loff_t)-1, |
2491 | }; | 2533 | }; |
2492 | 2534 | ||
2493 | |||
2494 | ret = __extent_writepage(page, wbc, &epd); | 2535 | ret = __extent_writepage(page, wbc, &epd); |
2495 | 2536 | ||
2496 | extent_write_cache_pages(tree, mapping, &wbc_writepages, | 2537 | extent_write_cache_pages(tree, mapping, &wbc_writepages, |
2497 | __extent_writepage, &epd, flush_write_bio); | 2538 | __extent_writepage, &epd, flush_write_bio); |
2498 | if (epd.bio) | 2539 | flush_epd_write_bio(&epd); |
2499 | submit_one_bio(WRITE, epd.bio, 0, 0); | ||
2500 | return ret; | 2540 | return ret; |
2501 | } | 2541 | } |
2502 | 2542 | ||
@@ -2515,6 +2555,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
2515 | .tree = tree, | 2555 | .tree = tree, |
2516 | .get_extent = get_extent, | 2556 | .get_extent = get_extent, |
2517 | .extent_locked = 1, | 2557 | .extent_locked = 1, |
2558 | .sync_io = mode == WB_SYNC_ALL, | ||
2518 | }; | 2559 | }; |
2519 | struct writeback_control wbc_writepages = { | 2560 | struct writeback_control wbc_writepages = { |
2520 | .bdi = inode->i_mapping->backing_dev_info, | 2561 | .bdi = inode->i_mapping->backing_dev_info, |
@@ -2540,8 +2581,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
2540 | start += PAGE_CACHE_SIZE; | 2581 | start += PAGE_CACHE_SIZE; |
2541 | } | 2582 | } |
2542 | 2583 | ||
2543 | if (epd.bio) | 2584 | flush_epd_write_bio(&epd); |
2544 | submit_one_bio(WRITE, epd.bio, 0, 0); | ||
2545 | return ret; | 2585 | return ret; |
2546 | } | 2586 | } |
2547 | 2587 | ||
@@ -2556,13 +2596,13 @@ int extent_writepages(struct extent_io_tree *tree, | |||
2556 | .tree = tree, | 2596 | .tree = tree, |
2557 | .get_extent = get_extent, | 2597 | .get_extent = get_extent, |
2558 | .extent_locked = 0, | 2598 | .extent_locked = 0, |
2599 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | ||
2559 | }; | 2600 | }; |
2560 | 2601 | ||
2561 | ret = extent_write_cache_pages(tree, mapping, wbc, | 2602 | ret = extent_write_cache_pages(tree, mapping, wbc, |
2562 | __extent_writepage, &epd, | 2603 | __extent_writepage, &epd, |
2563 | flush_write_bio); | 2604 | flush_write_bio); |
2564 | if (epd.bio) | 2605 | flush_epd_write_bio(&epd); |
2565 | submit_one_bio(WRITE, epd.bio, 0, 0); | ||
2566 | return ret; | 2606 | return ret; |
2567 | } | 2607 | } |
2568 | 2608 | ||
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9c9fb46ccd08..482f8db2cfd0 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -830,7 +830,7 @@ again: | |||
830 | 830 | ||
831 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); | 831 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); |
832 | BUG_ON(ret); | 832 | BUG_ON(ret); |
833 | goto done; | 833 | goto release; |
834 | } else if (split == start) { | 834 | } else if (split == start) { |
835 | if (locked_end < extent_end) { | 835 | if (locked_end < extent_end) { |
836 | ret = try_lock_extent(&BTRFS_I(inode)->io_tree, | 836 | ret = try_lock_extent(&BTRFS_I(inode)->io_tree, |
@@ -926,6 +926,8 @@ again: | |||
926 | } | 926 | } |
927 | done: | 927 | done: |
928 | btrfs_mark_buffer_dirty(leaf); | 928 | btrfs_mark_buffer_dirty(leaf); |
929 | |||
930 | release: | ||
929 | btrfs_release_path(root, path); | 931 | btrfs_release_path(root, path); |
930 | if (split_end && split == start) { | 932 | if (split_end && split == start) { |
931 | split = end; | 933 | split = end; |
@@ -1131,7 +1133,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
1131 | if (will_write) { | 1133 | if (will_write) { |
1132 | btrfs_fdatawrite_range(inode->i_mapping, pos, | 1134 | btrfs_fdatawrite_range(inode->i_mapping, pos, |
1133 | pos + write_bytes - 1, | 1135 | pos + write_bytes - 1, |
1134 | WB_SYNC_NONE); | 1136 | WB_SYNC_ALL); |
1135 | } else { | 1137 | } else { |
1136 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | 1138 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, |
1137 | num_pages); | 1139 | num_pages); |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a0d1dd492a58..65219f6a16a1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -4970,10 +4970,10 @@ out_fail: | |||
4970 | return err; | 4970 | return err; |
4971 | } | 4971 | } |
4972 | 4972 | ||
4973 | static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | 4973 | static int prealloc_file_range(struct btrfs_trans_handle *trans, |
4974 | struct inode *inode, u64 start, u64 end, | ||
4974 | u64 alloc_hint, int mode) | 4975 | u64 alloc_hint, int mode) |
4975 | { | 4976 | { |
4976 | struct btrfs_trans_handle *trans; | ||
4977 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4977 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4978 | struct btrfs_key ins; | 4978 | struct btrfs_key ins; |
4979 | u64 alloc_size; | 4979 | u64 alloc_size; |
@@ -4981,10 +4981,6 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | |||
4981 | u64 num_bytes = end - start; | 4981 | u64 num_bytes = end - start; |
4982 | int ret = 0; | 4982 | int ret = 0; |
4983 | 4983 | ||
4984 | trans = btrfs_join_transaction(root, 1); | ||
4985 | BUG_ON(!trans); | ||
4986 | btrfs_set_trans_block_group(trans, inode); | ||
4987 | |||
4988 | while (num_bytes > 0) { | 4984 | while (num_bytes > 0) { |
4989 | alloc_size = min(num_bytes, root->fs_info->max_extent); | 4985 | alloc_size = min(num_bytes, root->fs_info->max_extent); |
4990 | ret = btrfs_reserve_extent(trans, root, alloc_size, | 4986 | ret = btrfs_reserve_extent(trans, root, alloc_size, |
@@ -5015,7 +5011,6 @@ out: | |||
5015 | BUG_ON(ret); | 5011 | BUG_ON(ret); |
5016 | } | 5012 | } |
5017 | 5013 | ||
5018 | btrfs_end_transaction(trans, root); | ||
5019 | return ret; | 5014 | return ret; |
5020 | } | 5015 | } |
5021 | 5016 | ||
@@ -5029,11 +5024,18 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5029 | u64 alloc_hint = 0; | 5024 | u64 alloc_hint = 0; |
5030 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | 5025 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; |
5031 | struct extent_map *em; | 5026 | struct extent_map *em; |
5027 | struct btrfs_trans_handle *trans; | ||
5032 | int ret; | 5028 | int ret; |
5033 | 5029 | ||
5034 | alloc_start = offset & ~mask; | 5030 | alloc_start = offset & ~mask; |
5035 | alloc_end = (offset + len + mask) & ~mask; | 5031 | alloc_end = (offset + len + mask) & ~mask; |
5036 | 5032 | ||
5033 | /* | ||
5034 | * wait for ordered IO before we have any locks. We'll loop again | ||
5035 | * below with the locks held. | ||
5036 | */ | ||
5037 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); | ||
5038 | |||
5037 | mutex_lock(&inode->i_mutex); | 5039 | mutex_lock(&inode->i_mutex); |
5038 | if (alloc_start > inode->i_size) { | 5040 | if (alloc_start > inode->i_size) { |
5039 | ret = btrfs_cont_expand(inode, alloc_start); | 5041 | ret = btrfs_cont_expand(inode, alloc_start); |
@@ -5043,6 +5045,16 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5043 | 5045 | ||
5044 | while (1) { | 5046 | while (1) { |
5045 | struct btrfs_ordered_extent *ordered; | 5047 | struct btrfs_ordered_extent *ordered; |
5048 | |||
5049 | trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); | ||
5050 | if (!trans) { | ||
5051 | ret = -EIO; | ||
5052 | goto out; | ||
5053 | } | ||
5054 | |||
5055 | /* the extent lock is ordered inside the running | ||
5056 | * transaction | ||
5057 | */ | ||
5046 | lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, | 5058 | lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, |
5047 | alloc_end - 1, GFP_NOFS); | 5059 | alloc_end - 1, GFP_NOFS); |
5048 | ordered = btrfs_lookup_first_ordered_extent(inode, | 5060 | ordered = btrfs_lookup_first_ordered_extent(inode, |
@@ -5053,6 +5065,12 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5053 | btrfs_put_ordered_extent(ordered); | 5065 | btrfs_put_ordered_extent(ordered); |
5054 | unlock_extent(&BTRFS_I(inode)->io_tree, | 5066 | unlock_extent(&BTRFS_I(inode)->io_tree, |
5055 | alloc_start, alloc_end - 1, GFP_NOFS); | 5067 | alloc_start, alloc_end - 1, GFP_NOFS); |
5068 | btrfs_end_transaction(trans, BTRFS_I(inode)->root); | ||
5069 | |||
5070 | /* | ||
5071 | * we can't wait on the range with the transaction | ||
5072 | * running or with the extent lock held | ||
5073 | */ | ||
5056 | btrfs_wait_ordered_range(inode, alloc_start, | 5074 | btrfs_wait_ordered_range(inode, alloc_start, |
5057 | alloc_end - alloc_start); | 5075 | alloc_end - alloc_start); |
5058 | } else { | 5076 | } else { |
@@ -5070,7 +5088,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5070 | last_byte = min(extent_map_end(em), alloc_end); | 5088 | last_byte = min(extent_map_end(em), alloc_end); |
5071 | last_byte = (last_byte + mask) & ~mask; | 5089 | last_byte = (last_byte + mask) & ~mask; |
5072 | if (em->block_start == EXTENT_MAP_HOLE) { | 5090 | if (em->block_start == EXTENT_MAP_HOLE) { |
5073 | ret = prealloc_file_range(inode, cur_offset, | 5091 | ret = prealloc_file_range(trans, inode, cur_offset, |
5074 | last_byte, alloc_hint, mode); | 5092 | last_byte, alloc_hint, mode); |
5075 | if (ret < 0) { | 5093 | if (ret < 0) { |
5076 | free_extent_map(em); | 5094 | free_extent_map(em); |
@@ -5089,6 +5107,8 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5089 | } | 5107 | } |
5090 | unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1, | 5108 | unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1, |
5091 | GFP_NOFS); | 5109 | GFP_NOFS); |
5110 | |||
5111 | btrfs_end_transaction(trans, BTRFS_I(inode)->root); | ||
5092 | out: | 5112 | out: |
5093 | mutex_unlock(&inode->i_mutex); | 5113 | mutex_unlock(&inode->i_mutex); |
5094 | return ret; | 5114 | return ret; |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7594bec1be10..9f135e878507 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -461,15 +461,9 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) | |||
461 | if (!capable(CAP_SYS_ADMIN)) | 461 | if (!capable(CAP_SYS_ADMIN)) |
462 | return -EPERM; | 462 | return -EPERM; |
463 | 463 | ||
464 | vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); | 464 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
465 | 465 | if (IS_ERR(vol_args)) | |
466 | if (!vol_args) | 466 | return PTR_ERR(vol_args); |
467 | return -ENOMEM; | ||
468 | |||
469 | if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { | ||
470 | ret = -EFAULT; | ||
471 | goto out; | ||
472 | } | ||
473 | 467 | ||
474 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 468 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
475 | namelen = strlen(vol_args->name); | 469 | namelen = strlen(vol_args->name); |
@@ -545,7 +539,6 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) | |||
545 | 539 | ||
546 | out_unlock: | 540 | out_unlock: |
547 | mutex_unlock(&root->fs_info->volume_mutex); | 541 | mutex_unlock(&root->fs_info->volume_mutex); |
548 | out: | ||
549 | kfree(vol_args); | 542 | kfree(vol_args); |
550 | return ret; | 543 | return ret; |
551 | } | 544 | } |
@@ -565,15 +558,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
565 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 558 | if (root->fs_info->sb->s_flags & MS_RDONLY) |
566 | return -EROFS; | 559 | return -EROFS; |
567 | 560 | ||
568 | vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); | 561 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
569 | 562 | if (IS_ERR(vol_args)) | |
570 | if (!vol_args) | 563 | return PTR_ERR(vol_args); |
571 | return -ENOMEM; | ||
572 | |||
573 | if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { | ||
574 | ret = -EFAULT; | ||
575 | goto out; | ||
576 | } | ||
577 | 564 | ||
578 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 565 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
579 | namelen = strlen(vol_args->name); | 566 | namelen = strlen(vol_args->name); |
@@ -675,19 +662,13 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) | |||
675 | if (!capable(CAP_SYS_ADMIN)) | 662 | if (!capable(CAP_SYS_ADMIN)) |
676 | return -EPERM; | 663 | return -EPERM; |
677 | 664 | ||
678 | vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); | 665 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
666 | if (IS_ERR(vol_args)) | ||
667 | return PTR_ERR(vol_args); | ||
679 | 668 | ||
680 | if (!vol_args) | ||
681 | return -ENOMEM; | ||
682 | |||
683 | if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { | ||
684 | ret = -EFAULT; | ||
685 | goto out; | ||
686 | } | ||
687 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 669 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
688 | ret = btrfs_init_new_device(root, vol_args->name); | 670 | ret = btrfs_init_new_device(root, vol_args->name); |
689 | 671 | ||
690 | out: | ||
691 | kfree(vol_args); | 672 | kfree(vol_args); |
692 | return ret; | 673 | return ret; |
693 | } | 674 | } |
@@ -703,19 +684,13 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) | |||
703 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 684 | if (root->fs_info->sb->s_flags & MS_RDONLY) |
704 | return -EROFS; | 685 | return -EROFS; |
705 | 686 | ||
706 | vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); | 687 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
688 | if (IS_ERR(vol_args)) | ||
689 | return PTR_ERR(vol_args); | ||
707 | 690 | ||
708 | if (!vol_args) | ||
709 | return -ENOMEM; | ||
710 | |||
711 | if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { | ||
712 | ret = -EFAULT; | ||
713 | goto out; | ||
714 | } | ||
715 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 691 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
716 | ret = btrfs_rm_device(root, vol_args->name); | 692 | ret = btrfs_rm_device(root, vol_args->name); |
717 | 693 | ||
718 | out: | ||
719 | kfree(vol_args); | 694 | kfree(vol_args); |
720 | return ret; | 695 | return ret; |
721 | } | 696 | } |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 53c87b197d70..d6f0806c682f 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -489,7 +489,7 @@ again: | |||
489 | /* start IO across the range first to instantiate any delalloc | 489 | /* start IO across the range first to instantiate any delalloc |
490 | * extents | 490 | * extents |
491 | */ | 491 | */ |
492 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE); | 492 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); |
493 | 493 | ||
494 | /* The compression code will leave pages locked but return from | 494 | /* The compression code will leave pages locked but return from |
495 | * writepage without setting the page writeback. Starting again | 495 | * writepage without setting the page writeback. Starting again |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9744af9d71e9..a7acfe639a44 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -635,14 +635,9 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, | |||
635 | if (!capable(CAP_SYS_ADMIN)) | 635 | if (!capable(CAP_SYS_ADMIN)) |
636 | return -EPERM; | 636 | return -EPERM; |
637 | 637 | ||
638 | vol = kmalloc(sizeof(*vol), GFP_KERNEL); | 638 | vol = memdup_user((void __user *)arg, sizeof(*vol)); |
639 | if (!vol) | 639 | if (IS_ERR(vol)) |
640 | return -ENOMEM; | 640 | return PTR_ERR(vol); |
641 | |||
642 | if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) { | ||
643 | ret = -EFAULT; | ||
644 | goto out; | ||
645 | } | ||
646 | 641 | ||
647 | switch (cmd) { | 642 | switch (cmd) { |
648 | case BTRFS_IOC_SCAN_DEV: | 643 | case BTRFS_IOC_SCAN_DEV: |
@@ -650,7 +645,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, | |||
650 | &btrfs_fs_type, &fs_devices); | 645 | &btrfs_fs_type, &fs_devices); |
651 | break; | 646 | break; |
652 | } | 647 | } |
653 | out: | 648 | |
654 | kfree(vol); | 649 | kfree(vol); |
655 | return ret; | 650 | return ret; |
656 | } | 651 | } |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e0913e469728..e53835b88594 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -125,6 +125,20 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid) | |||
125 | return NULL; | 125 | return NULL; |
126 | } | 126 | } |
127 | 127 | ||
128 | static void requeue_list(struct btrfs_pending_bios *pending_bios, | ||
129 | struct bio *head, struct bio *tail) | ||
130 | { | ||
131 | |||
132 | struct bio *old_head; | ||
133 | |||
134 | old_head = pending_bios->head; | ||
135 | pending_bios->head = head; | ||
136 | if (pending_bios->tail) | ||
137 | tail->bi_next = old_head; | ||
138 | else | ||
139 | pending_bios->tail = tail; | ||
140 | } | ||
141 | |||
128 | /* | 142 | /* |
129 | * we try to collect pending bios for a device so we don't get a large | 143 | * we try to collect pending bios for a device so we don't get a large |
130 | * number of procs sending bios down to the same device. This greatly | 144 | * number of procs sending bios down to the same device. This greatly |
@@ -141,10 +155,12 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
141 | struct bio *pending; | 155 | struct bio *pending; |
142 | struct backing_dev_info *bdi; | 156 | struct backing_dev_info *bdi; |
143 | struct btrfs_fs_info *fs_info; | 157 | struct btrfs_fs_info *fs_info; |
158 | struct btrfs_pending_bios *pending_bios; | ||
144 | struct bio *tail; | 159 | struct bio *tail; |
145 | struct bio *cur; | 160 | struct bio *cur; |
146 | int again = 0; | 161 | int again = 0; |
147 | unsigned long num_run = 0; | 162 | unsigned long num_run; |
163 | unsigned long num_sync_run; | ||
148 | unsigned long limit; | 164 | unsigned long limit; |
149 | unsigned long last_waited = 0; | 165 | unsigned long last_waited = 0; |
150 | 166 | ||
@@ -153,20 +169,30 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
153 | limit = btrfs_async_submit_limit(fs_info); | 169 | limit = btrfs_async_submit_limit(fs_info); |
154 | limit = limit * 2 / 3; | 170 | limit = limit * 2 / 3; |
155 | 171 | ||
172 | /* we want to make sure that every time we switch from the sync | ||
173 | * list to the normal list, we unplug | ||
174 | */ | ||
175 | num_sync_run = 0; | ||
176 | |||
156 | loop: | 177 | loop: |
157 | spin_lock(&device->io_lock); | 178 | spin_lock(&device->io_lock); |
179 | num_run = 0; | ||
158 | 180 | ||
159 | loop_lock: | 181 | loop_lock: |
182 | |||
160 | /* take all the bios off the list at once and process them | 183 | /* take all the bios off the list at once and process them |
161 | * later on (without the lock held). But, remember the | 184 | * later on (without the lock held). But, remember the |
162 | * tail and other pointers so the bios can be properly reinserted | 185 | * tail and other pointers so the bios can be properly reinserted |
163 | * into the list if we hit congestion | 186 | * into the list if we hit congestion |
164 | */ | 187 | */ |
165 | pending = device->pending_bios; | 188 | if (device->pending_sync_bios.head) |
166 | tail = device->pending_bio_tail; | 189 | pending_bios = &device->pending_sync_bios; |
190 | else | ||
191 | pending_bios = &device->pending_bios; | ||
192 | |||
193 | pending = pending_bios->head; | ||
194 | tail = pending_bios->tail; | ||
167 | WARN_ON(pending && !tail); | 195 | WARN_ON(pending && !tail); |
168 | device->pending_bios = NULL; | ||
169 | device->pending_bio_tail = NULL; | ||
170 | 196 | ||
171 | /* | 197 | /* |
172 | * if pending was null this time around, no bios need processing | 198 | * if pending was null this time around, no bios need processing |
@@ -176,16 +202,41 @@ loop_lock: | |||
176 | * device->running_pending is used to synchronize with the | 202 | * device->running_pending is used to synchronize with the |
177 | * schedule_bio code. | 203 | * schedule_bio code. |
178 | */ | 204 | */ |
179 | if (pending) { | 205 | if (device->pending_sync_bios.head == NULL && |
180 | again = 1; | 206 | device->pending_bios.head == NULL) { |
181 | device->running_pending = 1; | ||
182 | } else { | ||
183 | again = 0; | 207 | again = 0; |
184 | device->running_pending = 0; | 208 | device->running_pending = 0; |
209 | } else { | ||
210 | again = 1; | ||
211 | device->running_pending = 1; | ||
185 | } | 212 | } |
213 | |||
214 | pending_bios->head = NULL; | ||
215 | pending_bios->tail = NULL; | ||
216 | |||
186 | spin_unlock(&device->io_lock); | 217 | spin_unlock(&device->io_lock); |
187 | 218 | ||
219 | /* | ||
220 | * if we're doing the regular priority list, make sure we unplug | ||
221 | * for any high prio bios we've sent down | ||
222 | */ | ||
223 | if (pending_bios == &device->pending_bios && num_sync_run > 0) { | ||
224 | num_sync_run = 0; | ||
225 | blk_run_backing_dev(bdi, NULL); | ||
226 | } | ||
227 | |||
188 | while (pending) { | 228 | while (pending) { |
229 | |||
230 | rmb(); | ||
231 | if (pending_bios != &device->pending_sync_bios && | ||
232 | device->pending_sync_bios.head && | ||
233 | num_run > 16) { | ||
234 | cond_resched(); | ||
235 | spin_lock(&device->io_lock); | ||
236 | requeue_list(pending_bios, pending, tail); | ||
237 | goto loop_lock; | ||
238 | } | ||
239 | |||
189 | cur = pending; | 240 | cur = pending; |
190 | pending = pending->bi_next; | 241 | pending = pending->bi_next; |
191 | cur->bi_next = NULL; | 242 | cur->bi_next = NULL; |
@@ -196,10 +247,18 @@ loop_lock: | |||
196 | wake_up(&fs_info->async_submit_wait); | 247 | wake_up(&fs_info->async_submit_wait); |
197 | 248 | ||
198 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); | 249 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); |
199 | bio_get(cur); | ||
200 | submit_bio(cur->bi_rw, cur); | 250 | submit_bio(cur->bi_rw, cur); |
201 | bio_put(cur); | ||
202 | num_run++; | 251 | num_run++; |
252 | if (bio_sync(cur)) | ||
253 | num_sync_run++; | ||
254 | |||
255 | if (need_resched()) { | ||
256 | if (num_sync_run) { | ||
257 | blk_run_backing_dev(bdi, NULL); | ||
258 | num_sync_run = 0; | ||
259 | } | ||
260 | cond_resched(); | ||
261 | } | ||
203 | 262 | ||
204 | /* | 263 | /* |
205 | * we made progress, there is more work to do and the bdi | 264 | * we made progress, there is more work to do and the bdi |
@@ -208,7 +267,6 @@ loop_lock: | |||
208 | */ | 267 | */ |
209 | if (pending && bdi_write_congested(bdi) && num_run > 16 && | 268 | if (pending && bdi_write_congested(bdi) && num_run > 16 && |
210 | fs_info->fs_devices->open_devices > 1) { | 269 | fs_info->fs_devices->open_devices > 1) { |
211 | struct bio *old_head; | ||
212 | struct io_context *ioc; | 270 | struct io_context *ioc; |
213 | 271 | ||
214 | ioc = current->io_context; | 272 | ioc = current->io_context; |
@@ -233,17 +291,17 @@ loop_lock: | |||
233 | * against it before looping | 291 | * against it before looping |
234 | */ | 292 | */ |
235 | last_waited = ioc->last_waited; | 293 | last_waited = ioc->last_waited; |
294 | if (need_resched()) { | ||
295 | if (num_sync_run) { | ||
296 | blk_run_backing_dev(bdi, NULL); | ||
297 | num_sync_run = 0; | ||
298 | } | ||
299 | cond_resched(); | ||
300 | } | ||
236 | continue; | 301 | continue; |
237 | } | 302 | } |
238 | spin_lock(&device->io_lock); | 303 | spin_lock(&device->io_lock); |
239 | 304 | requeue_list(pending_bios, pending, tail); | |
240 | old_head = device->pending_bios; | ||
241 | device->pending_bios = pending; | ||
242 | if (device->pending_bio_tail) | ||
243 | tail->bi_next = old_head; | ||
244 | else | ||
245 | device->pending_bio_tail = tail; | ||
246 | |||
247 | device->running_pending = 1; | 305 | device->running_pending = 1; |
248 | 306 | ||
249 | spin_unlock(&device->io_lock); | 307 | spin_unlock(&device->io_lock); |
@@ -251,11 +309,18 @@ loop_lock: | |||
251 | goto done; | 309 | goto done; |
252 | } | 310 | } |
253 | } | 311 | } |
312 | |||
313 | if (num_sync_run) { | ||
314 | num_sync_run = 0; | ||
315 | blk_run_backing_dev(bdi, NULL); | ||
316 | } | ||
317 | |||
318 | cond_resched(); | ||
254 | if (again) | 319 | if (again) |
255 | goto loop; | 320 | goto loop; |
256 | 321 | ||
257 | spin_lock(&device->io_lock); | 322 | spin_lock(&device->io_lock); |
258 | if (device->pending_bios) | 323 | if (device->pending_bios.head || device->pending_sync_bios.head) |
259 | goto loop_lock; | 324 | goto loop_lock; |
260 | spin_unlock(&device->io_lock); | 325 | spin_unlock(&device->io_lock); |
261 | 326 | ||
@@ -2497,7 +2562,7 @@ again: | |||
2497 | max_errors = 1; | 2562 | max_errors = 1; |
2498 | } | 2563 | } |
2499 | } | 2564 | } |
2500 | if (multi_ret && rw == WRITE && | 2565 | if (multi_ret && (rw & (1 << BIO_RW)) && |
2501 | stripes_allocated < stripes_required) { | 2566 | stripes_allocated < stripes_required) { |
2502 | stripes_allocated = map->num_stripes; | 2567 | stripes_allocated = map->num_stripes; |
2503 | free_extent_map(em); | 2568 | free_extent_map(em); |
@@ -2762,6 +2827,7 @@ static noinline int schedule_bio(struct btrfs_root *root, | |||
2762 | int rw, struct bio *bio) | 2827 | int rw, struct bio *bio) |
2763 | { | 2828 | { |
2764 | int should_queue = 1; | 2829 | int should_queue = 1; |
2830 | struct btrfs_pending_bios *pending_bios; | ||
2765 | 2831 | ||
2766 | /* don't bother with additional async steps for reads, right now */ | 2832 | /* don't bother with additional async steps for reads, right now */ |
2767 | if (!(rw & (1 << BIO_RW))) { | 2833 | if (!(rw & (1 << BIO_RW))) { |
@@ -2783,13 +2849,17 @@ static noinline int schedule_bio(struct btrfs_root *root, | |||
2783 | bio->bi_rw |= rw; | 2849 | bio->bi_rw |= rw; |
2784 | 2850 | ||
2785 | spin_lock(&device->io_lock); | 2851 | spin_lock(&device->io_lock); |
2852 | if (bio_sync(bio)) | ||
2853 | pending_bios = &device->pending_sync_bios; | ||
2854 | else | ||
2855 | pending_bios = &device->pending_bios; | ||
2786 | 2856 | ||
2787 | if (device->pending_bio_tail) | 2857 | if (pending_bios->tail) |
2788 | device->pending_bio_tail->bi_next = bio; | 2858 | pending_bios->tail->bi_next = bio; |
2789 | 2859 | ||
2790 | device->pending_bio_tail = bio; | 2860 | pending_bios->tail = bio; |
2791 | if (!device->pending_bios) | 2861 | if (!pending_bios->head) |
2792 | device->pending_bios = bio; | 2862 | pending_bios->head = bio; |
2793 | if (device->running_pending) | 2863 | if (device->running_pending) |
2794 | should_queue = 0; | 2864 | should_queue = 0; |
2795 | 2865 | ||
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 2185de72ff7d..5836327ba5dd 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -23,13 +23,22 @@ | |||
23 | #include "async-thread.h" | 23 | #include "async-thread.h" |
24 | 24 | ||
25 | struct buffer_head; | 25 | struct buffer_head; |
26 | struct btrfs_pending_bios { | ||
27 | struct bio *head; | ||
28 | struct bio *tail; | ||
29 | }; | ||
30 | |||
26 | struct btrfs_device { | 31 | struct btrfs_device { |
27 | struct list_head dev_list; | 32 | struct list_head dev_list; |
28 | struct list_head dev_alloc_list; | 33 | struct list_head dev_alloc_list; |
29 | struct btrfs_fs_devices *fs_devices; | 34 | struct btrfs_fs_devices *fs_devices; |
30 | struct btrfs_root *dev_root; | 35 | struct btrfs_root *dev_root; |
31 | struct bio *pending_bios; | 36 | |
32 | struct bio *pending_bio_tail; | 37 | /* regular prio bios */ |
38 | struct btrfs_pending_bios pending_bios; | ||
39 | /* WRITE_SYNC bios */ | ||
40 | struct btrfs_pending_bios pending_sync_bios; | ||
41 | |||
33 | int running_pending; | 42 | int running_pending; |
34 | u64 generation; | 43 | u64 generation; |
35 | 44 | ||
diff --git a/fs/compat.c b/fs/compat.c index 3f84d5f15889..379a399bf5c3 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -181,22 +181,24 @@ asmlinkage long compat_sys_newstat(char __user * filename, | |||
181 | struct compat_stat __user *statbuf) | 181 | struct compat_stat __user *statbuf) |
182 | { | 182 | { |
183 | struct kstat stat; | 183 | struct kstat stat; |
184 | int error = vfs_stat_fd(AT_FDCWD, filename, &stat); | 184 | int error; |
185 | 185 | ||
186 | if (!error) | 186 | error = vfs_stat(filename, &stat); |
187 | error = cp_compat_stat(&stat, statbuf); | 187 | if (error) |
188 | return error; | 188 | return error; |
189 | return cp_compat_stat(&stat, statbuf); | ||
189 | } | 190 | } |
190 | 191 | ||
191 | asmlinkage long compat_sys_newlstat(char __user * filename, | 192 | asmlinkage long compat_sys_newlstat(char __user * filename, |
192 | struct compat_stat __user *statbuf) | 193 | struct compat_stat __user *statbuf) |
193 | { | 194 | { |
194 | struct kstat stat; | 195 | struct kstat stat; |
195 | int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); | 196 | int error; |
196 | 197 | ||
197 | if (!error) | 198 | error = vfs_lstat(filename, &stat); |
198 | error = cp_compat_stat(&stat, statbuf); | 199 | if (error) |
199 | return error; | 200 | return error; |
201 | return cp_compat_stat(&stat, statbuf); | ||
200 | } | 202 | } |
201 | 203 | ||
202 | #ifndef __ARCH_WANT_STAT64 | 204 | #ifndef __ARCH_WANT_STAT64 |
@@ -204,21 +206,12 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user *filename, | |||
204 | struct compat_stat __user *statbuf, int flag) | 206 | struct compat_stat __user *statbuf, int flag) |
205 | { | 207 | { |
206 | struct kstat stat; | 208 | struct kstat stat; |
207 | int error = -EINVAL; | 209 | int error; |
208 | |||
209 | if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) | ||
210 | goto out; | ||
211 | |||
212 | if (flag & AT_SYMLINK_NOFOLLOW) | ||
213 | error = vfs_lstat_fd(dfd, filename, &stat); | ||
214 | else | ||
215 | error = vfs_stat_fd(dfd, filename, &stat); | ||
216 | |||
217 | if (!error) | ||
218 | error = cp_compat_stat(&stat, statbuf); | ||
219 | 210 | ||
220 | out: | 211 | error = vfs_fstatat(dfd, filename, &stat, flag); |
221 | return error; | 212 | if (error) |
213 | return error; | ||
214 | return cp_compat_stat(&stat, statbuf); | ||
222 | } | 215 | } |
223 | #endif | 216 | #endif |
224 | 217 | ||
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 3e87ce443ea2..b83f6bcfa51a 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -58,7 +58,6 @@ | |||
58 | #include <linux/i2c.h> | 58 | #include <linux/i2c.h> |
59 | #include <linux/i2c-dev.h> | 59 | #include <linux/i2c-dev.h> |
60 | #include <linux/atalk.h> | 60 | #include <linux/atalk.h> |
61 | #include <linux/loop.h> | ||
62 | 61 | ||
63 | #include <net/bluetooth/bluetooth.h> | 62 | #include <net/bluetooth/bluetooth.h> |
64 | #include <net/bluetooth/hci.h> | 63 | #include <net/bluetooth/hci.h> |
@@ -68,6 +67,7 @@ | |||
68 | #include <linux/gigaset_dev.h> | 67 | #include <linux/gigaset_dev.h> |
69 | 68 | ||
70 | #ifdef CONFIG_BLOCK | 69 | #ifdef CONFIG_BLOCK |
70 | #include <linux/loop.h> | ||
71 | #include <scsi/scsi.h> | 71 | #include <scsi/scsi.h> |
72 | #include <scsi/scsi_ioctl.h> | 72 | #include <scsi/scsi_ioctl.h> |
73 | #include <scsi/sg.h> | 73 | #include <scsi/sg.h> |
@@ -2660,6 +2660,8 @@ HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl) | |||
2660 | HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl) | 2660 | HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl) |
2661 | /* block stuff */ | 2661 | /* block stuff */ |
2662 | #ifdef CONFIG_BLOCK | 2662 | #ifdef CONFIG_BLOCK |
2663 | /* loop */ | ||
2664 | IGNORE_IOCTL(LOOP_CLR_FD) | ||
2663 | /* Raw devices */ | 2665 | /* Raw devices */ |
2664 | HANDLE_IOCTL(RAW_SETBIND, raw_ioctl) | 2666 | HANDLE_IOCTL(RAW_SETBIND, raw_ioctl) |
2665 | HANDLE_IOCTL(RAW_GETBIND, raw_ioctl) | 2667 | HANDLE_IOCTL(RAW_GETBIND, raw_ioctl) |
@@ -2728,9 +2730,6 @@ HANDLE_IOCTL(LPSETTIMEOUT, lp_timeout_trans) | |||
2728 | IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32) | 2730 | IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32) |
2729 | IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32) | 2731 | IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32) |
2730 | 2732 | ||
2731 | /* loop */ | ||
2732 | IGNORE_IOCTL(LOOP_CLR_FD) | ||
2733 | |||
2734 | #ifdef CONFIG_SPARC | 2733 | #ifdef CONFIG_SPARC |
2735 | /* Sparc framebuffers, handled in sbusfb_compat_ioctl() */ | 2734 | /* Sparc framebuffers, handled in sbusfb_compat_ioctl() */ |
2736 | IGNORE_IOCTL(FBIOGTYPE) | 2735 | IGNORE_IOCTL(FBIOGTYPE) |
diff --git a/fs/dcache.c b/fs/dcache.c index 761d30be2683..1fcffebfb44f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -2149,7 +2149,6 @@ int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) | |||
2149 | int result; | 2149 | int result; |
2150 | unsigned long seq; | 2150 | unsigned long seq; |
2151 | 2151 | ||
2152 | /* FIXME: This is old behavior, needed? Please check callers. */ | ||
2153 | if (new_dentry == old_dentry) | 2152 | if (new_dentry == old_dentry) |
2154 | return 1; | 2153 | return 1; |
2155 | 2154 | ||
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index a67fea655f49..dda3c58eefc0 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c | |||
@@ -418,18 +418,13 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf, | |||
418 | 418 | ||
419 | if (count == 0) | 419 | if (count == 0) |
420 | goto out; | 420 | goto out; |
421 | data = kmalloc(count, GFP_KERNEL); | 421 | |
422 | if (!data) { | 422 | data = memdup_user(buf, count); |
423 | printk(KERN_ERR "%s: Out of memory whilst attempting to " | 423 | if (IS_ERR(data)) { |
424 | "kmalloc([%zd], GFP_KERNEL)\n", __func__, count); | 424 | printk(KERN_ERR "%s: memdup_user returned error [%ld]\n", |
425 | __func__, PTR_ERR(data)); | ||
425 | goto out; | 426 | goto out; |
426 | } | 427 | } |
427 | rc = copy_from_user(data, buf, count); | ||
428 | if (rc) { | ||
429 | printk(KERN_ERR "%s: copy_from_user returned error [%d]\n", | ||
430 | __func__, rc); | ||
431 | goto out_free; | ||
432 | } | ||
433 | sz = count; | 428 | sz = count; |
434 | i = 0; | 429 | i = 0; |
435 | switch (data[i++]) { | 430 | switch (data[i++]) { |
diff --git a/fs/filesystems.c b/fs/filesystems.c index 1aa70260e6d1..a24c58e181db 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c | |||
@@ -199,7 +199,7 @@ SYSCALL_DEFINE3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2) | |||
199 | return retval; | 199 | return retval; |
200 | } | 200 | } |
201 | 201 | ||
202 | int get_filesystem_list(char * buf) | 202 | int __init get_filesystem_list(char *buf) |
203 | { | 203 | { |
204 | int len = 0; | 204 | int len = 0; |
205 | struct file_system_type * tmp; | 205 | struct file_system_type * tmp; |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index bf23a62aa925..70f87f43afa2 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -156,6 +156,12 @@ static void inode_go_sync(struct gfs2_glock *gl) | |||
156 | error = filemap_fdatawait(metamapping); | 156 | error = filemap_fdatawait(metamapping); |
157 | mapping_set_error(metamapping, error); | 157 | mapping_set_error(metamapping, error); |
158 | gfs2_ail_empty_gl(gl); | 158 | gfs2_ail_empty_gl(gl); |
159 | /* | ||
160 | * Writeback of the data mapping may cause the dirty flag to be set | ||
161 | * so we have to clear it again here. | ||
162 | */ | ||
163 | smp_mb__before_clear_bit(); | ||
164 | clear_bit(GLF_DIRTY, &gl->gl_flags); | ||
159 | } | 165 | } |
160 | 166 | ||
161 | /** | 167 | /** |
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 101caf3ee861..5d82e91887e3 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c | |||
@@ -413,7 +413,9 @@ out_unlock: | |||
413 | gfs2_glock_dq(&gh); | 413 | gfs2_glock_dq(&gh); |
414 | out: | 414 | out: |
415 | gfs2_holder_uninit(&gh); | 415 | gfs2_holder_uninit(&gh); |
416 | if (ret) | 416 | if (ret == -ENOMEM) |
417 | ret = VM_FAULT_OOM; | ||
418 | else if (ret) | ||
417 | ret = VM_FAULT_SIGBUS; | 419 | ret = VM_FAULT_SIGBUS; |
418 | return ret; | 420 | return ret; |
419 | } | 421 | } |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 23a3c76711e0..153d9681192b 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -26,7 +26,6 @@ | |||
26 | #include <linux/pagevec.h> | 26 | #include <linux/pagevec.h> |
27 | #include <linux/parser.h> | 27 | #include <linux/parser.h> |
28 | #include <linux/mman.h> | 28 | #include <linux/mman.h> |
29 | #include <linux/quotaops.h> | ||
30 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
31 | #include <linux/dnotify.h> | 30 | #include <linux/dnotify.h> |
32 | #include <linux/statfs.h> | 31 | #include <linux/statfs.h> |
@@ -842,7 +841,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) | |||
842 | bad_val: | 841 | bad_val: |
843 | printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n", | 842 | printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n", |
844 | args[0].from, p); | 843 | args[0].from, p); |
845 | return 1; | 844 | return -EINVAL; |
846 | } | 845 | } |
847 | 846 | ||
848 | static int | 847 | static int |
diff --git a/fs/namei.c b/fs/namei.c index b8433ebfae05..78f253cd2d4f 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1248,6 +1248,8 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) | |||
1248 | int err; | 1248 | int err; |
1249 | struct qstr this; | 1249 | struct qstr this; |
1250 | 1250 | ||
1251 | WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); | ||
1252 | |||
1251 | err = __lookup_one_len(name, &this, base, len); | 1253 | err = __lookup_one_len(name, &this, base, len); |
1252 | if (err) | 1254 | if (err) |
1253 | return ERR_PTR(err); | 1255 | return ERR_PTR(err); |
diff --git a/fs/namespace.c b/fs/namespace.c index d9138f81ec10..41196209a906 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -1377,7 +1377,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, | |||
1377 | if (parent_path) { | 1377 | if (parent_path) { |
1378 | detach_mnt(source_mnt, parent_path); | 1378 | detach_mnt(source_mnt, parent_path); |
1379 | attach_mnt(source_mnt, path); | 1379 | attach_mnt(source_mnt, path); |
1380 | touch_mnt_namespace(current->nsproxy->mnt_ns); | 1380 | touch_mnt_namespace(parent_path->mnt->mnt_ns); |
1381 | } else { | 1381 | } else { |
1382 | mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); | 1382 | mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); |
1383 | commit_tree(source_mnt); | 1383 | commit_tree(source_mnt); |
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index f54360f50a9c..fa038df63ac8 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c | |||
@@ -660,13 +660,10 @@ outrel: | |||
660 | if (user.object_name_len > NCP_OBJECT_NAME_MAX_LEN) | 660 | if (user.object_name_len > NCP_OBJECT_NAME_MAX_LEN) |
661 | return -ENOMEM; | 661 | return -ENOMEM; |
662 | if (user.object_name_len) { | 662 | if (user.object_name_len) { |
663 | newname = kmalloc(user.object_name_len, GFP_USER); | 663 | newname = memdup_user(user.object_name, |
664 | if (!newname) | 664 | user.object_name_len); |
665 | return -ENOMEM; | 665 | if (IS_ERR(newname)) |
666 | if (copy_from_user(newname, user.object_name, user.object_name_len)) { | 666 | return PTR_ERR(newname); |
667 | kfree(newname); | ||
668 | return -EFAULT; | ||
669 | } | ||
670 | } else { | 667 | } else { |
671 | newname = NULL; | 668 | newname = NULL; |
672 | } | 669 | } |
@@ -760,13 +757,9 @@ outrel: | |||
760 | if (user.len > NCP_PRIVATE_DATA_MAX_LEN) | 757 | if (user.len > NCP_PRIVATE_DATA_MAX_LEN) |
761 | return -ENOMEM; | 758 | return -ENOMEM; |
762 | if (user.len) { | 759 | if (user.len) { |
763 | new = kmalloc(user.len, GFP_USER); | 760 | new = memdup_user(user.data, user.len); |
764 | if (!new) | 761 | if (IS_ERR(new)) |
765 | return -ENOMEM; | 762 | return PTR_ERR(new); |
766 | if (copy_from_user(new, user.data, user.len)) { | ||
767 | kfree(new); | ||
768 | return -EFAULT; | ||
769 | } | ||
770 | } else { | 763 | } else { |
771 | new = NULL; | 764 | new = NULL; |
772 | } | 765 | } |
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index e6a1932c7110..35869a4921f1 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c | |||
@@ -713,7 +713,8 @@ nfs3_xdr_setaclargs(struct rpc_rqst *req, __be32 *p, | |||
713 | if (args->npages != 0) | 713 | if (args->npages != 0) |
714 | xdr_encode_pages(buf, args->pages, 0, args->len); | 714 | xdr_encode_pages(buf, args->pages, 0, args->len); |
715 | else | 715 | else |
716 | req->rq_slen += args->len; | 716 | req->rq_slen = xdr_adjust_iovec(req->rq_svec, |
717 | p + XDR_QUADLEN(args->len)); | ||
717 | 718 | ||
718 | err = nfsacl_encode(buf, base, args->inode, | 719 | err = nfsacl_encode(buf, base, args->inode, |
719 | (args->mask & NFS_ACL) ? | 720 | (args->mask & NFS_ACL) ? |
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 3444c0052a87..5275097a7565 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c | |||
@@ -229,21 +229,23 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) | |||
229 | goto out; | 229 | goto out; |
230 | status = vfs_readdir(filp, nfsd4_build_namelist, &names); | 230 | status = vfs_readdir(filp, nfsd4_build_namelist, &names); |
231 | fput(filp); | 231 | fput(filp); |
232 | mutex_lock(&dir->d_inode->i_mutex); | ||
232 | while (!list_empty(&names)) { | 233 | while (!list_empty(&names)) { |
233 | entry = list_entry(names.next, struct name_list, list); | 234 | entry = list_entry(names.next, struct name_list, list); |
234 | 235 | ||
235 | dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); | 236 | dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); |
236 | if (IS_ERR(dentry)) { | 237 | if (IS_ERR(dentry)) { |
237 | status = PTR_ERR(dentry); | 238 | status = PTR_ERR(dentry); |
238 | goto out; | 239 | break; |
239 | } | 240 | } |
240 | status = f(dir, dentry); | 241 | status = f(dir, dentry); |
241 | dput(dentry); | 242 | dput(dentry); |
242 | if (status) | 243 | if (status) |
243 | goto out; | 244 | break; |
244 | list_del(&entry->list); | 245 | list_del(&entry->list); |
245 | kfree(entry); | 246 | kfree(entry); |
246 | } | 247 | } |
248 | mutex_unlock(&dir->d_inode->i_mutex); | ||
247 | out: | 249 | out: |
248 | while (!list_empty(&names)) { | 250 | while (!list_empty(&names)) { |
249 | entry = list_entry(names.next, struct name_list, list); | 251 | entry = list_entry(names.next, struct name_list, list); |
@@ -255,36 +257,6 @@ out: | |||
255 | } | 257 | } |
256 | 258 | ||
257 | static int | 259 | static int |
258 | nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry) | ||
259 | { | ||
260 | int status; | ||
261 | |||
262 | if (!S_ISREG(dir->d_inode->i_mode)) { | ||
263 | printk("nfsd4: non-file found in client recovery directory\n"); | ||
264 | return -EINVAL; | ||
265 | } | ||
266 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); | ||
267 | status = vfs_unlink(dir->d_inode, dentry); | ||
268 | mutex_unlock(&dir->d_inode->i_mutex); | ||
269 | return status; | ||
270 | } | ||
271 | |||
272 | static int | ||
273 | nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry) | ||
274 | { | ||
275 | int status; | ||
276 | |||
277 | /* For now this directory should already be empty, but we empty it of | ||
278 | * any regular files anyway, just in case the directory was created by | ||
279 | * a kernel from the future.... */ | ||
280 | nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file); | ||
281 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); | ||
282 | status = vfs_rmdir(dir->d_inode, dentry); | ||
283 | mutex_unlock(&dir->d_inode->i_mutex); | ||
284 | return status; | ||
285 | } | ||
286 | |||
287 | static int | ||
288 | nfsd4_unlink_clid_dir(char *name, int namlen) | 260 | nfsd4_unlink_clid_dir(char *name, int namlen) |
289 | { | 261 | { |
290 | struct dentry *dentry; | 262 | struct dentry *dentry; |
@@ -294,18 +266,18 @@ nfsd4_unlink_clid_dir(char *name, int namlen) | |||
294 | 266 | ||
295 | mutex_lock(&rec_dir.dentry->d_inode->i_mutex); | 267 | mutex_lock(&rec_dir.dentry->d_inode->i_mutex); |
296 | dentry = lookup_one_len(name, rec_dir.dentry, namlen); | 268 | dentry = lookup_one_len(name, rec_dir.dentry, namlen); |
297 | mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); | ||
298 | if (IS_ERR(dentry)) { | 269 | if (IS_ERR(dentry)) { |
299 | status = PTR_ERR(dentry); | 270 | status = PTR_ERR(dentry); |
300 | return status; | 271 | goto out_unlock; |
301 | } | 272 | } |
302 | status = -ENOENT; | 273 | status = -ENOENT; |
303 | if (!dentry->d_inode) | 274 | if (!dentry->d_inode) |
304 | goto out; | 275 | goto out; |
305 | 276 | status = vfs_rmdir(rec_dir.dentry->d_inode, dentry); | |
306 | status = nfsd4_clear_clid_dir(rec_dir.dentry, dentry); | ||
307 | out: | 277 | out: |
308 | dput(dentry); | 278 | dput(dentry); |
279 | out_unlock: | ||
280 | mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); | ||
309 | return status; | 281 | return status; |
310 | } | 282 | } |
311 | 283 | ||
@@ -348,7 +320,7 @@ purge_old(struct dentry *parent, struct dentry *child) | |||
348 | if (nfs4_has_reclaimed_state(child->d_name.name, false)) | 320 | if (nfs4_has_reclaimed_state(child->d_name.name, false)) |
349 | return 0; | 321 | return 0; |
350 | 322 | ||
351 | status = nfsd4_clear_clid_dir(parent, child); | 323 | status = vfs_rmdir(parent->d_inode, child); |
352 | if (status) | 324 | if (status) |
353 | printk("failed to remove client recovery directory %s\n", | 325 | printk("failed to remove client recovery directory %s\n", |
354 | child->d_name.name); | 326 | child->d_name.name); |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ab93fcfef254..6c68ffd6b4bb 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -116,10 +116,15 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, | |||
116 | } | 116 | } |
117 | if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { | 117 | if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { |
118 | /* successfully crossed mount point */ | 118 | /* successfully crossed mount point */ |
119 | exp_put(exp); | 119 | /* |
120 | *expp = exp2; | 120 | * This is subtle: dentry is *not* under mnt at this point. |
121 | * The only reason we are safe is that original mnt is pinned | ||
122 | * down by exp, so we should dput before putting exp. | ||
123 | */ | ||
121 | dput(dentry); | 124 | dput(dentry); |
122 | *dpp = mounts; | 125 | *dpp = mounts; |
126 | exp_put(exp); | ||
127 | *expp = exp2; | ||
123 | } else { | 128 | } else { |
124 | exp_put(exp2); | 129 | exp_put(exp2); |
125 | dput(mounts); | 130 | dput(mounts); |
@@ -1885,8 +1890,8 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, | |||
1885 | return 0; | 1890 | return 0; |
1886 | } | 1891 | } |
1887 | 1892 | ||
1888 | static int nfsd_buffered_readdir(struct file *file, filldir_t func, | 1893 | static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, |
1889 | struct readdir_cd *cdp, loff_t *offsetp) | 1894 | struct readdir_cd *cdp, loff_t *offsetp) |
1890 | { | 1895 | { |
1891 | struct readdir_data buf; | 1896 | struct readdir_data buf; |
1892 | struct buffered_dirent *de; | 1897 | struct buffered_dirent *de; |
@@ -1896,11 +1901,12 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func, | |||
1896 | 1901 | ||
1897 | buf.dirent = (void *)__get_free_page(GFP_KERNEL); | 1902 | buf.dirent = (void *)__get_free_page(GFP_KERNEL); |
1898 | if (!buf.dirent) | 1903 | if (!buf.dirent) |
1899 | return -ENOMEM; | 1904 | return nfserrno(-ENOMEM); |
1900 | 1905 | ||
1901 | offset = *offsetp; | 1906 | offset = *offsetp; |
1902 | 1907 | ||
1903 | while (1) { | 1908 | while (1) { |
1909 | struct inode *dir_inode = file->f_path.dentry->d_inode; | ||
1904 | unsigned int reclen; | 1910 | unsigned int reclen; |
1905 | 1911 | ||
1906 | cdp->err = nfserr_eof; /* will be cleared on successful read */ | 1912 | cdp->err = nfserr_eof; /* will be cleared on successful read */ |
@@ -1919,26 +1925,38 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func, | |||
1919 | if (!size) | 1925 | if (!size) |
1920 | break; | 1926 | break; |
1921 | 1927 | ||
1928 | /* | ||
1929 | * Various filldir functions may end up calling back into | ||
1930 | * lookup_one_len() and the file system's ->lookup() method. | ||
1931 | * These expect i_mutex to be held, as it would within readdir. | ||
1932 | */ | ||
1933 | host_err = mutex_lock_killable(&dir_inode->i_mutex); | ||
1934 | if (host_err) | ||
1935 | break; | ||
1936 | |||
1922 | de = (struct buffered_dirent *)buf.dirent; | 1937 | de = (struct buffered_dirent *)buf.dirent; |
1923 | while (size > 0) { | 1938 | while (size > 0) { |
1924 | offset = de->offset; | 1939 | offset = de->offset; |
1925 | 1940 | ||
1926 | if (func(cdp, de->name, de->namlen, de->offset, | 1941 | if (func(cdp, de->name, de->namlen, de->offset, |
1927 | de->ino, de->d_type)) | 1942 | de->ino, de->d_type)) |
1928 | goto done; | 1943 | break; |
1929 | 1944 | ||
1930 | if (cdp->err != nfs_ok) | 1945 | if (cdp->err != nfs_ok) |
1931 | goto done; | 1946 | break; |
1932 | 1947 | ||
1933 | reclen = ALIGN(sizeof(*de) + de->namlen, | 1948 | reclen = ALIGN(sizeof(*de) + de->namlen, |
1934 | sizeof(u64)); | 1949 | sizeof(u64)); |
1935 | size -= reclen; | 1950 | size -= reclen; |
1936 | de = (struct buffered_dirent *)((char *)de + reclen); | 1951 | de = (struct buffered_dirent *)((char *)de + reclen); |
1937 | } | 1952 | } |
1953 | mutex_unlock(&dir_inode->i_mutex); | ||
1954 | if (size > 0) /* We bailed out early */ | ||
1955 | break; | ||
1956 | |||
1938 | offset = vfs_llseek(file, 0, SEEK_CUR); | 1957 | offset = vfs_llseek(file, 0, SEEK_CUR); |
1939 | } | 1958 | } |
1940 | 1959 | ||
1941 | done: | ||
1942 | free_page((unsigned long)(buf.dirent)); | 1960 | free_page((unsigned long)(buf.dirent)); |
1943 | 1961 | ||
1944 | if (host_err) | 1962 | if (host_err) |
@@ -55,59 +55,54 @@ int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | |||
55 | 55 | ||
56 | EXPORT_SYMBOL(vfs_getattr); | 56 | EXPORT_SYMBOL(vfs_getattr); |
57 | 57 | ||
58 | int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat) | 58 | int vfs_fstat(unsigned int fd, struct kstat *stat) |
59 | { | 59 | { |
60 | struct path path; | 60 | struct file *f = fget(fd); |
61 | int error; | 61 | int error = -EBADF; |
62 | 62 | ||
63 | error = user_path_at(dfd, name, LOOKUP_FOLLOW, &path); | 63 | if (f) { |
64 | if (!error) { | 64 | error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat); |
65 | error = vfs_getattr(path.mnt, path.dentry, stat); | 65 | fput(f); |
66 | path_put(&path); | ||
67 | } | 66 | } |
68 | return error; | 67 | return error; |
69 | } | 68 | } |
69 | EXPORT_SYMBOL(vfs_fstat); | ||
70 | 70 | ||
71 | int vfs_stat(char __user *name, struct kstat *stat) | 71 | int vfs_fstatat(int dfd, char __user *filename, struct kstat *stat, int flag) |
72 | { | 72 | { |
73 | return vfs_stat_fd(AT_FDCWD, name, stat); | 73 | struct path path; |
74 | } | 74 | int error = -EINVAL; |
75 | int lookup_flags = 0; | ||
75 | 76 | ||
76 | EXPORT_SYMBOL(vfs_stat); | 77 | if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) |
78 | goto out; | ||
77 | 79 | ||
78 | int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat) | 80 | if (!(flag & AT_SYMLINK_NOFOLLOW)) |
79 | { | 81 | lookup_flags |= LOOKUP_FOLLOW; |
80 | struct path path; | ||
81 | int error; | ||
82 | 82 | ||
83 | error = user_path_at(dfd, name, 0, &path); | 83 | error = user_path_at(dfd, filename, lookup_flags, &path); |
84 | if (!error) { | 84 | if (error) |
85 | error = vfs_getattr(path.mnt, path.dentry, stat); | 85 | goto out; |
86 | path_put(&path); | 86 | |
87 | } | 87 | error = vfs_getattr(path.mnt, path.dentry, stat); |
88 | path_put(&path); | ||
89 | out: | ||
88 | return error; | 90 | return error; |
89 | } | 91 | } |
92 | EXPORT_SYMBOL(vfs_fstatat); | ||
90 | 93 | ||
91 | int vfs_lstat(char __user *name, struct kstat *stat) | 94 | int vfs_stat(char __user *name, struct kstat *stat) |
92 | { | 95 | { |
93 | return vfs_lstat_fd(AT_FDCWD, name, stat); | 96 | return vfs_fstatat(AT_FDCWD, name, stat, 0); |
94 | } | 97 | } |
98 | EXPORT_SYMBOL(vfs_stat); | ||
95 | 99 | ||
96 | EXPORT_SYMBOL(vfs_lstat); | 100 | int vfs_lstat(char __user *name, struct kstat *stat) |
97 | |||
98 | int vfs_fstat(unsigned int fd, struct kstat *stat) | ||
99 | { | 101 | { |
100 | struct file *f = fget(fd); | 102 | return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW); |
101 | int error = -EBADF; | ||
102 | |||
103 | if (f) { | ||
104 | error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat); | ||
105 | fput(f); | ||
106 | } | ||
107 | return error; | ||
108 | } | 103 | } |
104 | EXPORT_SYMBOL(vfs_lstat); | ||
109 | 105 | ||
110 | EXPORT_SYMBOL(vfs_fstat); | ||
111 | 106 | ||
112 | #ifdef __ARCH_WANT_OLD_STAT | 107 | #ifdef __ARCH_WANT_OLD_STAT |
113 | 108 | ||
@@ -155,23 +150,25 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta | |||
155 | SYSCALL_DEFINE2(stat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) | 150 | SYSCALL_DEFINE2(stat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) |
156 | { | 151 | { |
157 | struct kstat stat; | 152 | struct kstat stat; |
158 | int error = vfs_stat_fd(AT_FDCWD, filename, &stat); | 153 | int error; |
159 | 154 | ||
160 | if (!error) | 155 | error = vfs_stat(filename, &stat); |
161 | error = cp_old_stat(&stat, statbuf); | 156 | if (error) |
157 | return error; | ||
162 | 158 | ||
163 | return error; | 159 | return cp_old_stat(&stat, statbuf); |
164 | } | 160 | } |
165 | 161 | ||
166 | SYSCALL_DEFINE2(lstat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) | 162 | SYSCALL_DEFINE2(lstat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) |
167 | { | 163 | { |
168 | struct kstat stat; | 164 | struct kstat stat; |
169 | int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); | 165 | int error; |
170 | 166 | ||
171 | if (!error) | 167 | error = vfs_lstat(filename, &stat); |
172 | error = cp_old_stat(&stat, statbuf); | 168 | if (error) |
169 | return error; | ||
173 | 170 | ||
174 | return error; | 171 | return cp_old_stat(&stat, statbuf); |
175 | } | 172 | } |
176 | 173 | ||
177 | SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf) | 174 | SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf) |
@@ -240,23 +237,23 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) | |||
240 | SYSCALL_DEFINE2(newstat, char __user *, filename, struct stat __user *, statbuf) | 237 | SYSCALL_DEFINE2(newstat, char __user *, filename, struct stat __user *, statbuf) |
241 | { | 238 | { |
242 | struct kstat stat; | 239 | struct kstat stat; |
243 | int error = vfs_stat_fd(AT_FDCWD, filename, &stat); | 240 | int error = vfs_stat(filename, &stat); |
244 | |||
245 | if (!error) | ||
246 | error = cp_new_stat(&stat, statbuf); | ||
247 | 241 | ||
248 | return error; | 242 | if (error) |
243 | return error; | ||
244 | return cp_new_stat(&stat, statbuf); | ||
249 | } | 245 | } |
250 | 246 | ||
251 | SYSCALL_DEFINE2(newlstat, char __user *, filename, struct stat __user *, statbuf) | 247 | SYSCALL_DEFINE2(newlstat, char __user *, filename, struct stat __user *, statbuf) |
252 | { | 248 | { |
253 | struct kstat stat; | 249 | struct kstat stat; |
254 | int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); | 250 | int error; |
255 | 251 | ||
256 | if (!error) | 252 | error = vfs_lstat(filename, &stat); |
257 | error = cp_new_stat(&stat, statbuf); | 253 | if (error) |
254 | return error; | ||
258 | 255 | ||
259 | return error; | 256 | return cp_new_stat(&stat, statbuf); |
260 | } | 257 | } |
261 | 258 | ||
262 | #if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT) | 259 | #if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT) |
@@ -264,21 +261,12 @@ SYSCALL_DEFINE4(newfstatat, int, dfd, char __user *, filename, | |||
264 | struct stat __user *, statbuf, int, flag) | 261 | struct stat __user *, statbuf, int, flag) |
265 | { | 262 | { |
266 | struct kstat stat; | 263 | struct kstat stat; |
267 | int error = -EINVAL; | 264 | int error; |
268 | |||
269 | if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) | ||
270 | goto out; | ||
271 | |||
272 | if (flag & AT_SYMLINK_NOFOLLOW) | ||
273 | error = vfs_lstat_fd(dfd, filename, &stat); | ||
274 | else | ||
275 | error = vfs_stat_fd(dfd, filename, &stat); | ||
276 | |||
277 | if (!error) | ||
278 | error = cp_new_stat(&stat, statbuf); | ||
279 | 265 | ||
280 | out: | 266 | error = vfs_fstatat(dfd, filename, &stat, flag); |
281 | return error; | 267 | if (error) |
268 | return error; | ||
269 | return cp_new_stat(&stat, statbuf); | ||
282 | } | 270 | } |
283 | #endif | 271 | #endif |
284 | 272 | ||
@@ -404,21 +392,12 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename, | |||
404 | struct stat64 __user *, statbuf, int, flag) | 392 | struct stat64 __user *, statbuf, int, flag) |
405 | { | 393 | { |
406 | struct kstat stat; | 394 | struct kstat stat; |
407 | int error = -EINVAL; | 395 | int error; |
408 | |||
409 | if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) | ||
410 | goto out; | ||
411 | |||
412 | if (flag & AT_SYMLINK_NOFOLLOW) | ||
413 | error = vfs_lstat_fd(dfd, filename, &stat); | ||
414 | else | ||
415 | error = vfs_stat_fd(dfd, filename, &stat); | ||
416 | |||
417 | if (!error) | ||
418 | error = cp_new_stat64(&stat, statbuf); | ||
419 | 396 | ||
420 | out: | 397 | error = vfs_fstatat(dfd, filename, &stat, flag); |
421 | return error; | 398 | if (error) |
399 | return error; | ||
400 | return cp_new_stat64(&stat, statbuf); | ||
422 | } | 401 | } |
423 | #endif /* __ARCH_WANT_STAT64 */ | 402 | #endif /* __ARCH_WANT_STAT64 */ |
424 | 403 | ||
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 93e0c0281d45..9345806c8853 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c | |||
@@ -157,14 +157,9 @@ static ssize_t write(struct file *file, const char __user *userbuf, | |||
157 | count = size - offs; | 157 | count = size - offs; |
158 | } | 158 | } |
159 | 159 | ||
160 | temp = kmalloc(count, GFP_KERNEL); | 160 | temp = memdup_user(userbuf, count); |
161 | if (!temp) | 161 | if (IS_ERR(temp)) |
162 | return -ENOMEM; | 162 | return PTR_ERR(temp); |
163 | |||
164 | if (copy_from_user(temp, userbuf, count)) { | ||
165 | count = -EFAULT; | ||
166 | goto out_free; | ||
167 | } | ||
168 | 163 | ||
169 | mutex_lock(&bb->mutex); | 164 | mutex_lock(&bb->mutex); |
170 | 165 | ||
@@ -176,8 +171,6 @@ static ssize_t write(struct file *file, const char __user *userbuf, | |||
176 | if (count > 0) | 171 | if (count > 0) |
177 | *off = offs + count; | 172 | *off = offs + count; |
178 | 173 | ||
179 | out_free: | ||
180 | kfree(temp); | ||
181 | return count; | 174 | return count; |
182 | } | 175 | } |
183 | 176 | ||
diff --git a/fs/xattr.c b/fs/xattr.c index 197c4fcac032..d51b8f9db921 100644 --- a/fs/xattr.c +++ b/fs/xattr.c | |||
@@ -237,13 +237,9 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value, | |||
237 | if (size) { | 237 | if (size) { |
238 | if (size > XATTR_SIZE_MAX) | 238 | if (size > XATTR_SIZE_MAX) |
239 | return -E2BIG; | 239 | return -E2BIG; |
240 | kvalue = kmalloc(size, GFP_KERNEL); | 240 | kvalue = memdup_user(value, size); |
241 | if (!kvalue) | 241 | if (IS_ERR(kvalue)) |
242 | return -ENOMEM; | 242 | return PTR_ERR(kvalue); |
243 | if (copy_from_user(kvalue, value, size)) { | ||
244 | kfree(kvalue); | ||
245 | return -EFAULT; | ||
246 | } | ||
247 | } | 243 | } |
248 | 244 | ||
249 | error = vfs_setxattr(d, kname, kvalue, size, flags); | 245 | error = vfs_setxattr(d, kname, kvalue, size, flags); |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index d0b499418a7d..34eaab608e6e 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
@@ -489,17 +489,12 @@ xfs_attrmulti_attr_set( | |||
489 | if (len > XATTR_SIZE_MAX) | 489 | if (len > XATTR_SIZE_MAX) |
490 | return EINVAL; | 490 | return EINVAL; |
491 | 491 | ||
492 | kbuf = kmalloc(len, GFP_KERNEL); | 492 | kbuf = memdup_user(ubuf, len); |
493 | if (!kbuf) | 493 | if (IS_ERR(kbuf)) |
494 | return ENOMEM; | 494 | return PTR_ERR(kbuf); |
495 | |||
496 | if (copy_from_user(kbuf, ubuf, len)) | ||
497 | goto out_kfree; | ||
498 | 495 | ||
499 | error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); | 496 | error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); |
500 | 497 | ||
501 | out_kfree: | ||
502 | kfree(kbuf); | ||
503 | return error; | 498 | return error; |
504 | } | 499 | } |
505 | 500 | ||
@@ -540,20 +535,16 @@ xfs_attrmulti_by_handle( | |||
540 | if (!size || size > 16 * PAGE_SIZE) | 535 | if (!size || size > 16 * PAGE_SIZE) |
541 | goto out_dput; | 536 | goto out_dput; |
542 | 537 | ||
543 | error = ENOMEM; | 538 | ops = memdup_user(am_hreq.ops, size); |
544 | ops = kmalloc(size, GFP_KERNEL); | 539 | if (IS_ERR(ops)) { |
545 | if (!ops) | 540 | error = PTR_ERR(ops); |
546 | goto out_dput; | 541 | goto out_dput; |
547 | 542 | } | |
548 | error = EFAULT; | ||
549 | if (copy_from_user(ops, am_hreq.ops, size)) | ||
550 | goto out_kfree_ops; | ||
551 | 543 | ||
552 | attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); | 544 | attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); |
553 | if (!attr_name) | 545 | if (!attr_name) |
554 | goto out_kfree_ops; | 546 | goto out_kfree_ops; |
555 | 547 | ||
556 | |||
557 | error = 0; | 548 | error = 0; |
558 | for (i = 0; i < am_hreq.opcount; i++) { | 549 | for (i = 0; i < am_hreq.opcount; i++) { |
559 | ops[i].am_error = strncpy_from_user(attr_name, | 550 | ops[i].am_error = strncpy_from_user(attr_name, |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index c70c4e3db790..0882d166239a 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c | |||
@@ -427,20 +427,16 @@ xfs_compat_attrmulti_by_handle( | |||
427 | if (!size || size > 16 * PAGE_SIZE) | 427 | if (!size || size > 16 * PAGE_SIZE) |
428 | goto out_dput; | 428 | goto out_dput; |
429 | 429 | ||
430 | error = ENOMEM; | 430 | ops = memdup_user(compat_ptr(am_hreq.ops), size); |
431 | ops = kmalloc(size, GFP_KERNEL); | 431 | if (IS_ERR(ops)) { |
432 | if (!ops) | 432 | error = PTR_ERR(ops); |
433 | goto out_dput; | 433 | goto out_dput; |
434 | 434 | } | |
435 | error = EFAULT; | ||
436 | if (copy_from_user(ops, compat_ptr(am_hreq.ops), size)) | ||
437 | goto out_kfree_ops; | ||
438 | 435 | ||
439 | attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); | 436 | attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); |
440 | if (!attr_name) | 437 | if (!attr_name) |
441 | goto out_kfree_ops; | 438 | goto out_kfree_ops; |
442 | 439 | ||
443 | |||
444 | error = 0; | 440 | error = 0; |
445 | for (i = 0; i < am_hreq.opcount; i++) { | 441 | for (i = 0; i < am_hreq.opcount; i++) { |
446 | ops[i].am_error = strncpy_from_user(attr_name, | 442 | ops[i].am_error = strncpy_from_user(attr_name, |