diff options
author | Jiri Kosina <jkosina@suse.cz> | 2011-02-15 04:24:31 -0500 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2011-02-15 04:24:31 -0500 |
commit | 0a9d59a2461477bd9ed143c01af9df3f8f00fa81 (patch) | |
tree | df997d1cfb0786427a0df1fbd6f0640fa4248cf4 /fs | |
parent | a23ce6da9677d245aa0aadc99f4197030350ab54 (diff) | |
parent | 795abaf1e4e188c4171e3cd3dbb11a9fcacaf505 (diff) |
Merge branch 'master' into for-next
Diffstat (limited to 'fs')
244 files changed, 8470 insertions, 5175 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 771f457402d4..3db9caa57edc 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -30,15 +30,6 @@ config FS_MBCACHE | |||
30 | source "fs/reiserfs/Kconfig" | 30 | source "fs/reiserfs/Kconfig" |
31 | source "fs/jfs/Kconfig" | 31 | source "fs/jfs/Kconfig" |
32 | 32 | ||
33 | config FS_POSIX_ACL | ||
34 | # Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs/nfs4) | ||
35 | # | ||
36 | # NOTE: you can implement Posix ACLs without these helpers (XFS does). | ||
37 | # Never use this symbol for ifdefs. | ||
38 | # | ||
39 | bool | ||
40 | default n | ||
41 | |||
42 | source "fs/xfs/Kconfig" | 33 | source "fs/xfs/Kconfig" |
43 | source "fs/gfs2/Kconfig" | 34 | source "fs/gfs2/Kconfig" |
44 | source "fs/ocfs2/Kconfig" | 35 | source "fs/ocfs2/Kconfig" |
@@ -47,11 +38,19 @@ source "fs/nilfs2/Kconfig" | |||
47 | 38 | ||
48 | endif # BLOCK | 39 | endif # BLOCK |
49 | 40 | ||
41 | # Posix ACL utility routines | ||
42 | # | ||
43 | # Note: Posix ACLs can be implemented without these helpers. Never use | ||
44 | # this symbol for ifdefs in core code. | ||
45 | # | ||
46 | config FS_POSIX_ACL | ||
47 | def_bool n | ||
48 | |||
50 | config EXPORTFS | 49 | config EXPORTFS |
51 | tristate | 50 | tristate |
52 | 51 | ||
53 | config FILE_LOCKING | 52 | config FILE_LOCKING |
54 | bool "Enable POSIX file locking API" if EMBEDDED | 53 | bool "Enable POSIX file locking API" if EXPERT |
55 | default y | 54 | default y |
56 | help | 55 | help |
57 | This option enables standard file locking support, required | 56 | This option enables standard file locking support, required |
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index a3bcec75c54a..1c8c6cc6de30 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c | |||
@@ -289,7 +289,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, | |||
289 | call->server = server; | 289 | call->server = server; |
290 | 290 | ||
291 | INIT_WORK(&call->work, SRXAFSCB_CallBack); | 291 | INIT_WORK(&call->work, SRXAFSCB_CallBack); |
292 | schedule_work(&call->work); | 292 | queue_work(afs_wq, &call->work); |
293 | return 0; | 293 | return 0; |
294 | } | 294 | } |
295 | 295 | ||
@@ -336,7 +336,7 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call, | |||
336 | call->server = server; | 336 | call->server = server; |
337 | 337 | ||
338 | INIT_WORK(&call->work, SRXAFSCB_InitCallBackState); | 338 | INIT_WORK(&call->work, SRXAFSCB_InitCallBackState); |
339 | schedule_work(&call->work); | 339 | queue_work(afs_wq, &call->work); |
340 | return 0; | 340 | return 0; |
341 | } | 341 | } |
342 | 342 | ||
@@ -367,7 +367,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call, | |||
367 | call->server = server; | 367 | call->server = server; |
368 | 368 | ||
369 | INIT_WORK(&call->work, SRXAFSCB_InitCallBackState); | 369 | INIT_WORK(&call->work, SRXAFSCB_InitCallBackState); |
370 | schedule_work(&call->work); | 370 | queue_work(afs_wq, &call->work); |
371 | return 0; | 371 | return 0; |
372 | } | 372 | } |
373 | 373 | ||
@@ -400,7 +400,7 @@ static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb, | |||
400 | call->state = AFS_CALL_REPLYING; | 400 | call->state = AFS_CALL_REPLYING; |
401 | 401 | ||
402 | INIT_WORK(&call->work, SRXAFSCB_Probe); | 402 | INIT_WORK(&call->work, SRXAFSCB_Probe); |
403 | schedule_work(&call->work); | 403 | queue_work(afs_wq, &call->work); |
404 | return 0; | 404 | return 0; |
405 | } | 405 | } |
406 | 406 | ||
@@ -496,7 +496,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb, | |||
496 | call->state = AFS_CALL_REPLYING; | 496 | call->state = AFS_CALL_REPLYING; |
497 | 497 | ||
498 | INIT_WORK(&call->work, SRXAFSCB_ProbeUuid); | 498 | INIT_WORK(&call->work, SRXAFSCB_ProbeUuid); |
499 | schedule_work(&call->work); | 499 | queue_work(afs_wq, &call->work); |
500 | return 0; | 500 | return 0; |
501 | } | 501 | } |
502 | 502 | ||
@@ -580,6 +580,6 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call, | |||
580 | call->state = AFS_CALL_REPLYING; | 580 | call->state = AFS_CALL_REPLYING; |
581 | 581 | ||
582 | INIT_WORK(&call->work, SRXAFSCB_TellMeAboutYourself); | 582 | INIT_WORK(&call->work, SRXAFSCB_TellMeAboutYourself); |
583 | schedule_work(&call->work); | 583 | queue_work(afs_wq, &call->work); |
584 | return 0; | 584 | return 0; |
585 | } | 585 | } |
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index e6a4ab980e31..20c106f24927 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c | |||
@@ -66,6 +66,7 @@ const struct dentry_operations afs_fs_dentry_operations = { | |||
66 | .d_revalidate = afs_d_revalidate, | 66 | .d_revalidate = afs_d_revalidate, |
67 | .d_delete = afs_d_delete, | 67 | .d_delete = afs_d_delete, |
68 | .d_release = afs_d_release, | 68 | .d_release = afs_d_release, |
69 | .d_automount = afs_d_automount, | ||
69 | }; | 70 | }; |
70 | 71 | ||
71 | #define AFS_DIR_HASHTBL_SIZE 128 | 72 | #define AFS_DIR_HASHTBL_SIZE 128 |
diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 0747339011c3..db66c5201474 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c | |||
@@ -184,7 +184,8 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name, | |||
184 | inode->i_generation = 0; | 184 | inode->i_generation = 0; |
185 | 185 | ||
186 | set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags); | 186 | set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags); |
187 | inode->i_flags |= S_NOATIME; | 187 | set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); |
188 | inode->i_flags |= S_AUTOMOUNT | S_NOATIME; | ||
188 | unlock_new_inode(inode); | 189 | unlock_new_inode(inode); |
189 | _leave(" = %p", inode); | 190 | _leave(" = %p", inode); |
190 | return inode; | 191 | return inode; |
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index ab6db5abaf53..5a9b6843bac1 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h | |||
@@ -577,6 +577,7 @@ extern int afs_drop_inode(struct inode *); | |||
577 | /* | 577 | /* |
578 | * main.c | 578 | * main.c |
579 | */ | 579 | */ |
580 | extern struct workqueue_struct *afs_wq; | ||
580 | extern struct afs_uuid afs_uuid; | 581 | extern struct afs_uuid afs_uuid; |
581 | 582 | ||
582 | /* | 583 | /* |
@@ -591,6 +592,7 @@ extern const struct inode_operations afs_mntpt_inode_operations; | |||
591 | extern const struct inode_operations afs_autocell_inode_operations; | 592 | extern const struct inode_operations afs_autocell_inode_operations; |
592 | extern const struct file_operations afs_mntpt_file_operations; | 593 | extern const struct file_operations afs_mntpt_file_operations; |
593 | 594 | ||
595 | extern struct vfsmount *afs_d_automount(struct path *); | ||
594 | extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *); | 596 | extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *); |
595 | extern void afs_mntpt_kill_timer(void); | 597 | extern void afs_mntpt_kill_timer(void); |
596 | 598 | ||
diff --git a/fs/afs/main.c b/fs/afs/main.c index cfd1cbe25b22..42dd2e499ed8 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c | |||
@@ -30,6 +30,7 @@ module_param(rootcell, charp, 0); | |||
30 | MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list"); | 30 | MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list"); |
31 | 31 | ||
32 | struct afs_uuid afs_uuid; | 32 | struct afs_uuid afs_uuid; |
33 | struct workqueue_struct *afs_wq; | ||
33 | 34 | ||
34 | /* | 35 | /* |
35 | * get a client UUID | 36 | * get a client UUID |
@@ -87,10 +88,16 @@ static int __init afs_init(void) | |||
87 | if (ret < 0) | 88 | if (ret < 0) |
88 | return ret; | 89 | return ret; |
89 | 90 | ||
91 | /* create workqueue */ | ||
92 | ret = -ENOMEM; | ||
93 | afs_wq = alloc_workqueue("afs", 0, 0); | ||
94 | if (!afs_wq) | ||
95 | return ret; | ||
96 | |||
90 | /* register the /proc stuff */ | 97 | /* register the /proc stuff */ |
91 | ret = afs_proc_init(); | 98 | ret = afs_proc_init(); |
92 | if (ret < 0) | 99 | if (ret < 0) |
93 | return ret; | 100 | goto error_proc; |
94 | 101 | ||
95 | #ifdef CONFIG_AFS_FSCACHE | 102 | #ifdef CONFIG_AFS_FSCACHE |
96 | /* we want to be able to cache */ | 103 | /* we want to be able to cache */ |
@@ -140,6 +147,8 @@ error_cell_init: | |||
140 | error_cache: | 147 | error_cache: |
141 | #endif | 148 | #endif |
142 | afs_proc_cleanup(); | 149 | afs_proc_cleanup(); |
150 | error_proc: | ||
151 | destroy_workqueue(afs_wq); | ||
143 | rcu_barrier(); | 152 | rcu_barrier(); |
144 | printk(KERN_ERR "kAFS: failed to register: %d\n", ret); | 153 | printk(KERN_ERR "kAFS: failed to register: %d\n", ret); |
145 | return ret; | 154 | return ret; |
@@ -163,7 +172,7 @@ static void __exit afs_exit(void) | |||
163 | afs_purge_servers(); | 172 | afs_purge_servers(); |
164 | afs_callback_update_kill(); | 173 | afs_callback_update_kill(); |
165 | afs_vlocation_purge(); | 174 | afs_vlocation_purge(); |
166 | flush_scheduled_work(); | 175 | destroy_workqueue(afs_wq); |
167 | afs_cell_purge(); | 176 | afs_cell_purge(); |
168 | #ifdef CONFIG_AFS_FSCACHE | 177 | #ifdef CONFIG_AFS_FSCACHE |
169 | fscache_unregister_netfs(&afs_cache_netfs); | 178 | fscache_unregister_netfs(&afs_cache_netfs); |
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 6153417caf57..aa59184151d0 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c | |||
@@ -24,7 +24,6 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir, | |||
24 | struct dentry *dentry, | 24 | struct dentry *dentry, |
25 | struct nameidata *nd); | 25 | struct nameidata *nd); |
26 | static int afs_mntpt_open(struct inode *inode, struct file *file); | 26 | static int afs_mntpt_open(struct inode *inode, struct file *file); |
27 | static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd); | ||
28 | static void afs_mntpt_expiry_timed_out(struct work_struct *work); | 27 | static void afs_mntpt_expiry_timed_out(struct work_struct *work); |
29 | 28 | ||
30 | const struct file_operations afs_mntpt_file_operations = { | 29 | const struct file_operations afs_mntpt_file_operations = { |
@@ -34,13 +33,11 @@ const struct file_operations afs_mntpt_file_operations = { | |||
34 | 33 | ||
35 | const struct inode_operations afs_mntpt_inode_operations = { | 34 | const struct inode_operations afs_mntpt_inode_operations = { |
36 | .lookup = afs_mntpt_lookup, | 35 | .lookup = afs_mntpt_lookup, |
37 | .follow_link = afs_mntpt_follow_link, | ||
38 | .readlink = page_readlink, | 36 | .readlink = page_readlink, |
39 | .getattr = afs_getattr, | 37 | .getattr = afs_getattr, |
40 | }; | 38 | }; |
41 | 39 | ||
42 | const struct inode_operations afs_autocell_inode_operations = { | 40 | const struct inode_operations afs_autocell_inode_operations = { |
43 | .follow_link = afs_mntpt_follow_link, | ||
44 | .getattr = afs_getattr, | 41 | .getattr = afs_getattr, |
45 | }; | 42 | }; |
46 | 43 | ||
@@ -88,6 +85,7 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key) | |||
88 | _debug("symlink is a mountpoint"); | 85 | _debug("symlink is a mountpoint"); |
89 | spin_lock(&vnode->lock); | 86 | spin_lock(&vnode->lock); |
90 | set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); | 87 | set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); |
88 | vnode->vfs_inode.i_flags |= S_AUTOMOUNT; | ||
91 | spin_unlock(&vnode->lock); | 89 | spin_unlock(&vnode->lock); |
92 | } | 90 | } |
93 | 91 | ||
@@ -238,52 +236,24 @@ error_no_devname: | |||
238 | } | 236 | } |
239 | 237 | ||
240 | /* | 238 | /* |
241 | * follow a link from a mountpoint directory, thus causing it to be mounted | 239 | * handle an automount point |
242 | */ | 240 | */ |
243 | static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd) | 241 | struct vfsmount *afs_d_automount(struct path *path) |
244 | { | 242 | { |
245 | struct vfsmount *newmnt; | 243 | struct vfsmount *newmnt; |
246 | int err; | ||
247 | 244 | ||
248 | _enter("%p{%s},{%s:%p{%s},}", | 245 | _enter("{%s,%s}", path->mnt->mnt_devname, path->dentry->d_name.name); |
249 | dentry, | ||
250 | dentry->d_name.name, | ||
251 | nd->path.mnt->mnt_devname, | ||
252 | dentry, | ||
253 | nd->path.dentry->d_name.name); | ||
254 | |||
255 | dput(nd->path.dentry); | ||
256 | nd->path.dentry = dget(dentry); | ||
257 | 246 | ||
258 | newmnt = afs_mntpt_do_automount(nd->path.dentry); | 247 | newmnt = afs_mntpt_do_automount(path->dentry); |
259 | if (IS_ERR(newmnt)) { | 248 | if (IS_ERR(newmnt)) |
260 | path_put(&nd->path); | 249 | return newmnt; |
261 | return (void *)newmnt; | ||
262 | } | ||
263 | |||
264 | mntget(newmnt); | ||
265 | err = do_add_mount(newmnt, &nd->path, MNT_SHRINKABLE, &afs_vfsmounts); | ||
266 | switch (err) { | ||
267 | case 0: | ||
268 | path_put(&nd->path); | ||
269 | nd->path.mnt = newmnt; | ||
270 | nd->path.dentry = dget(newmnt->mnt_root); | ||
271 | schedule_delayed_work(&afs_mntpt_expiry_timer, | ||
272 | afs_mntpt_expiry_timeout * HZ); | ||
273 | break; | ||
274 | case -EBUSY: | ||
275 | /* someone else made a mount here whilst we were busy */ | ||
276 | while (d_mountpoint(nd->path.dentry) && | ||
277 | follow_down(&nd->path)) | ||
278 | ; | ||
279 | err = 0; | ||
280 | default: | ||
281 | mntput(newmnt); | ||
282 | break; | ||
283 | } | ||
284 | 250 | ||
285 | _leave(" = %d", err); | 251 | mntget(newmnt); /* prevent immediate expiration */ |
286 | return ERR_PTR(err); | 252 | mnt_set_expiry(newmnt, &afs_vfsmounts); |
253 | queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer, | ||
254 | afs_mntpt_expiry_timeout * HZ); | ||
255 | _leave(" = %p {%s}", newmnt, newmnt->mnt_devname); | ||
256 | return newmnt; | ||
287 | } | 257 | } |
288 | 258 | ||
289 | /* | 259 | /* |
@@ -295,8 +265,8 @@ static void afs_mntpt_expiry_timed_out(struct work_struct *work) | |||
295 | 265 | ||
296 | if (!list_empty(&afs_vfsmounts)) { | 266 | if (!list_empty(&afs_vfsmounts)) { |
297 | mark_mounts_for_expiry(&afs_vfsmounts); | 267 | mark_mounts_for_expiry(&afs_vfsmounts); |
298 | schedule_delayed_work(&afs_mntpt_expiry_timer, | 268 | queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer, |
299 | afs_mntpt_expiry_timeout * HZ); | 269 | afs_mntpt_expiry_timeout * HZ); |
300 | } | 270 | } |
301 | 271 | ||
302 | _leave(""); | 272 | _leave(""); |
@@ -310,6 +280,5 @@ void afs_mntpt_kill_timer(void) | |||
310 | _enter(""); | 280 | _enter(""); |
311 | 281 | ||
312 | ASSERT(list_empty(&afs_vfsmounts)); | 282 | ASSERT(list_empty(&afs_vfsmounts)); |
313 | cancel_delayed_work(&afs_mntpt_expiry_timer); | 283 | cancel_delayed_work_sync(&afs_mntpt_expiry_timer); |
314 | flush_scheduled_work(); | ||
315 | } | 284 | } |
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 654d8fdbf01f..e45a323aebb4 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c | |||
@@ -410,7 +410,7 @@ static void afs_rx_interceptor(struct sock *sk, unsigned long user_call_ID, | |||
410 | if (!call) { | 410 | if (!call) { |
411 | /* its an incoming call for our callback service */ | 411 | /* its an incoming call for our callback service */ |
412 | skb_queue_tail(&afs_incoming_calls, skb); | 412 | skb_queue_tail(&afs_incoming_calls, skb); |
413 | schedule_work(&afs_collect_incoming_call_work); | 413 | queue_work(afs_wq, &afs_collect_incoming_call_work); |
414 | } else { | 414 | } else { |
415 | /* route the messages directly to the appropriate call */ | 415 | /* route the messages directly to the appropriate call */ |
416 | skb_queue_tail(&call->rx_queue, skb); | 416 | skb_queue_tail(&call->rx_queue, skb); |
diff --git a/fs/afs/server.c b/fs/afs/server.c index 9fdc7fe3a7bc..d59b7516e943 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c | |||
@@ -238,8 +238,8 @@ void afs_put_server(struct afs_server *server) | |||
238 | if (atomic_read(&server->usage) == 0) { | 238 | if (atomic_read(&server->usage) == 0) { |
239 | list_move_tail(&server->grave, &afs_server_graveyard); | 239 | list_move_tail(&server->grave, &afs_server_graveyard); |
240 | server->time_of_death = get_seconds(); | 240 | server->time_of_death = get_seconds(); |
241 | schedule_delayed_work(&afs_server_reaper, | 241 | queue_delayed_work(afs_wq, &afs_server_reaper, |
242 | afs_server_timeout * HZ); | 242 | afs_server_timeout * HZ); |
243 | } | 243 | } |
244 | spin_unlock(&afs_server_graveyard_lock); | 244 | spin_unlock(&afs_server_graveyard_lock); |
245 | _leave(" [dead]"); | 245 | _leave(" [dead]"); |
@@ -285,10 +285,11 @@ static void afs_reap_server(struct work_struct *work) | |||
285 | expiry = server->time_of_death + afs_server_timeout; | 285 | expiry = server->time_of_death + afs_server_timeout; |
286 | if (expiry > now) { | 286 | if (expiry > now) { |
287 | delay = (expiry - now) * HZ; | 287 | delay = (expiry - now) * HZ; |
288 | if (!schedule_delayed_work(&afs_server_reaper, delay)) { | 288 | if (!queue_delayed_work(afs_wq, &afs_server_reaper, |
289 | delay)) { | ||
289 | cancel_delayed_work(&afs_server_reaper); | 290 | cancel_delayed_work(&afs_server_reaper); |
290 | schedule_delayed_work(&afs_server_reaper, | 291 | queue_delayed_work(afs_wq, &afs_server_reaper, |
291 | delay); | 292 | delay); |
292 | } | 293 | } |
293 | break; | 294 | break; |
294 | } | 295 | } |
@@ -323,5 +324,5 @@ void __exit afs_purge_servers(void) | |||
323 | { | 324 | { |
324 | afs_server_timeout = 0; | 325 | afs_server_timeout = 0; |
325 | cancel_delayed_work(&afs_server_reaper); | 326 | cancel_delayed_work(&afs_server_reaper); |
326 | schedule_delayed_work(&afs_server_reaper, 0); | 327 | queue_delayed_work(afs_wq, &afs_server_reaper, 0); |
327 | } | 328 | } |
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index 9ac260d1361d..431984d2e372 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c | |||
@@ -507,8 +507,8 @@ void afs_put_vlocation(struct afs_vlocation *vl) | |||
507 | _debug("buried"); | 507 | _debug("buried"); |
508 | list_move_tail(&vl->grave, &afs_vlocation_graveyard); | 508 | list_move_tail(&vl->grave, &afs_vlocation_graveyard); |
509 | vl->time_of_death = get_seconds(); | 509 | vl->time_of_death = get_seconds(); |
510 | schedule_delayed_work(&afs_vlocation_reap, | 510 | queue_delayed_work(afs_wq, &afs_vlocation_reap, |
511 | afs_vlocation_timeout * HZ); | 511 | afs_vlocation_timeout * HZ); |
512 | 512 | ||
513 | /* suspend updates on this record */ | 513 | /* suspend updates on this record */ |
514 | if (!list_empty(&vl->update)) { | 514 | if (!list_empty(&vl->update)) { |
@@ -561,11 +561,11 @@ static void afs_vlocation_reaper(struct work_struct *work) | |||
561 | if (expiry > now) { | 561 | if (expiry > now) { |
562 | delay = (expiry - now) * HZ; | 562 | delay = (expiry - now) * HZ; |
563 | _debug("delay %lu", delay); | 563 | _debug("delay %lu", delay); |
564 | if (!schedule_delayed_work(&afs_vlocation_reap, | 564 | if (!queue_delayed_work(afs_wq, &afs_vlocation_reap, |
565 | delay)) { | 565 | delay)) { |
566 | cancel_delayed_work(&afs_vlocation_reap); | 566 | cancel_delayed_work(&afs_vlocation_reap); |
567 | schedule_delayed_work(&afs_vlocation_reap, | 567 | queue_delayed_work(afs_wq, &afs_vlocation_reap, |
568 | delay); | 568 | delay); |
569 | } | 569 | } |
570 | break; | 570 | break; |
571 | } | 571 | } |
@@ -620,7 +620,7 @@ void afs_vlocation_purge(void) | |||
620 | destroy_workqueue(afs_vlocation_update_worker); | 620 | destroy_workqueue(afs_vlocation_update_worker); |
621 | 621 | ||
622 | cancel_delayed_work(&afs_vlocation_reap); | 622 | cancel_delayed_work(&afs_vlocation_reap); |
623 | schedule_delayed_work(&afs_vlocation_reap, 0); | 623 | queue_delayed_work(afs_wq, &afs_vlocation_reap, 0); |
624 | } | 624 | } |
625 | 625 | ||
626 | /* | 626 | /* |
@@ -87,7 +87,7 @@ static int __init aio_setup(void) | |||
87 | 87 | ||
88 | aio_wq = create_workqueue("aio"); | 88 | aio_wq = create_workqueue("aio"); |
89 | abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry)); | 89 | abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry)); |
90 | BUG_ON(!abe_pool); | 90 | BUG_ON(!aio_wq || !abe_pool); |
91 | 91 | ||
92 | pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page)); | 92 | pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page)); |
93 | 93 | ||
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index cbe57f3c4d89..c5567cb78432 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c | |||
@@ -233,7 +233,7 @@ static int __init anon_inode_init(void) | |||
233 | return 0; | 233 | return 0; |
234 | 234 | ||
235 | err_mntput: | 235 | err_mntput: |
236 | mntput_long(anon_inode_mnt); | 236 | mntput(anon_inode_mnt); |
237 | err_unregister_filesystem: | 237 | err_unregister_filesystem: |
238 | unregister_filesystem(&anon_inode_fs_type); | 238 | unregister_filesystem(&anon_inode_fs_type); |
239 | err_exit: | 239 | err_exit: |
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 0fffe1c24cec..54f923792728 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h | |||
@@ -88,18 +88,9 @@ struct autofs_info { | |||
88 | 88 | ||
89 | uid_t uid; | 89 | uid_t uid; |
90 | gid_t gid; | 90 | gid_t gid; |
91 | |||
92 | mode_t mode; | ||
93 | size_t size; | ||
94 | |||
95 | void (*free)(struct autofs_info *); | ||
96 | union { | ||
97 | const char *symlink; | ||
98 | } u; | ||
99 | }; | 91 | }; |
100 | 92 | ||
101 | #define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ | 93 | #define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ |
102 | #define AUTOFS_INF_MOUNTPOINT (1<<1) /* mountpoint status for direct expire */ | ||
103 | #define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */ | 94 | #define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */ |
104 | 95 | ||
105 | struct autofs_wait_queue { | 96 | struct autofs_wait_queue { |
@@ -176,14 +167,7 @@ static inline int autofs4_ispending(struct dentry *dentry) | |||
176 | return 0; | 167 | return 0; |
177 | } | 168 | } |
178 | 169 | ||
179 | static inline void autofs4_copy_atime(struct file *src, struct file *dst) | 170 | struct inode *autofs4_get_inode(struct super_block *, mode_t); |
180 | { | ||
181 | dst->f_path.dentry->d_inode->i_atime = | ||
182 | src->f_path.dentry->d_inode->i_atime; | ||
183 | return; | ||
184 | } | ||
185 | |||
186 | struct inode *autofs4_get_inode(struct super_block *, struct autofs_info *); | ||
187 | void autofs4_free_ino(struct autofs_info *); | 171 | void autofs4_free_ino(struct autofs_info *); |
188 | 172 | ||
189 | /* Expiration */ | 173 | /* Expiration */ |
@@ -212,16 +196,89 @@ void autofs_dev_ioctl_exit(void); | |||
212 | 196 | ||
213 | extern const struct inode_operations autofs4_symlink_inode_operations; | 197 | extern const struct inode_operations autofs4_symlink_inode_operations; |
214 | extern const struct inode_operations autofs4_dir_inode_operations; | 198 | extern const struct inode_operations autofs4_dir_inode_operations; |
215 | extern const struct inode_operations autofs4_root_inode_operations; | ||
216 | extern const struct inode_operations autofs4_indirect_root_inode_operations; | ||
217 | extern const struct inode_operations autofs4_direct_root_inode_operations; | ||
218 | extern const struct file_operations autofs4_dir_operations; | 199 | extern const struct file_operations autofs4_dir_operations; |
219 | extern const struct file_operations autofs4_root_operations; | 200 | extern const struct file_operations autofs4_root_operations; |
201 | extern const struct dentry_operations autofs4_dentry_operations; | ||
202 | |||
203 | /* VFS automount flags management functions */ | ||
204 | |||
205 | static inline void __managed_dentry_set_automount(struct dentry *dentry) | ||
206 | { | ||
207 | dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; | ||
208 | } | ||
209 | |||
210 | static inline void managed_dentry_set_automount(struct dentry *dentry) | ||
211 | { | ||
212 | spin_lock(&dentry->d_lock); | ||
213 | __managed_dentry_set_automount(dentry); | ||
214 | spin_unlock(&dentry->d_lock); | ||
215 | } | ||
216 | |||
217 | static inline void __managed_dentry_clear_automount(struct dentry *dentry) | ||
218 | { | ||
219 | dentry->d_flags &= ~DCACHE_NEED_AUTOMOUNT; | ||
220 | } | ||
221 | |||
222 | static inline void managed_dentry_clear_automount(struct dentry *dentry) | ||
223 | { | ||
224 | spin_lock(&dentry->d_lock); | ||
225 | __managed_dentry_clear_automount(dentry); | ||
226 | spin_unlock(&dentry->d_lock); | ||
227 | } | ||
228 | |||
229 | static inline void __managed_dentry_set_transit(struct dentry *dentry) | ||
230 | { | ||
231 | dentry->d_flags |= DCACHE_MANAGE_TRANSIT; | ||
232 | } | ||
233 | |||
234 | static inline void managed_dentry_set_transit(struct dentry *dentry) | ||
235 | { | ||
236 | spin_lock(&dentry->d_lock); | ||
237 | __managed_dentry_set_transit(dentry); | ||
238 | spin_unlock(&dentry->d_lock); | ||
239 | } | ||
240 | |||
241 | static inline void __managed_dentry_clear_transit(struct dentry *dentry) | ||
242 | { | ||
243 | dentry->d_flags &= ~DCACHE_MANAGE_TRANSIT; | ||
244 | } | ||
245 | |||
246 | static inline void managed_dentry_clear_transit(struct dentry *dentry) | ||
247 | { | ||
248 | spin_lock(&dentry->d_lock); | ||
249 | __managed_dentry_clear_transit(dentry); | ||
250 | spin_unlock(&dentry->d_lock); | ||
251 | } | ||
252 | |||
253 | static inline void __managed_dentry_set_managed(struct dentry *dentry) | ||
254 | { | ||
255 | dentry->d_flags |= (DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT); | ||
256 | } | ||
257 | |||
258 | static inline void managed_dentry_set_managed(struct dentry *dentry) | ||
259 | { | ||
260 | spin_lock(&dentry->d_lock); | ||
261 | __managed_dentry_set_managed(dentry); | ||
262 | spin_unlock(&dentry->d_lock); | ||
263 | } | ||
264 | |||
265 | static inline void __managed_dentry_clear_managed(struct dentry *dentry) | ||
266 | { | ||
267 | dentry->d_flags &= ~(DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT); | ||
268 | } | ||
269 | |||
270 | static inline void managed_dentry_clear_managed(struct dentry *dentry) | ||
271 | { | ||
272 | spin_lock(&dentry->d_lock); | ||
273 | __managed_dentry_clear_managed(dentry); | ||
274 | spin_unlock(&dentry->d_lock); | ||
275 | } | ||
220 | 276 | ||
221 | /* Initializing function */ | 277 | /* Initializing function */ |
222 | 278 | ||
223 | int autofs4_fill_super(struct super_block *, void *, int); | 279 | int autofs4_fill_super(struct super_block *, void *, int); |
224 | struct autofs_info *autofs4_init_ino(struct autofs_info *, struct autofs_sb_info *sbi, mode_t mode); | 280 | struct autofs_info *autofs4_new_ino(struct autofs_sb_info *); |
281 | void autofs4_clean_ino(struct autofs_info *); | ||
225 | 282 | ||
226 | /* Queue management functions */ | 283 | /* Queue management functions */ |
227 | 284 | ||
@@ -229,19 +286,6 @@ int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify); | |||
229 | int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int); | 286 | int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int); |
230 | void autofs4_catatonic_mode(struct autofs_sb_info *); | 287 | void autofs4_catatonic_mode(struct autofs_sb_info *); |
231 | 288 | ||
232 | static inline int autofs4_follow_mount(struct path *path) | ||
233 | { | ||
234 | int res = 0; | ||
235 | |||
236 | while (d_mountpoint(path->dentry)) { | ||
237 | int followed = follow_down(path); | ||
238 | if (!followed) | ||
239 | break; | ||
240 | res = 1; | ||
241 | } | ||
242 | return res; | ||
243 | } | ||
244 | |||
245 | static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi) | 289 | static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi) |
246 | { | 290 | { |
247 | return new_encode_dev(sbi->sb->s_dev); | 291 | return new_encode_dev(sbi->sb->s_dev); |
@@ -294,5 +338,4 @@ static inline void autofs4_del_expiring(struct dentry *dentry) | |||
294 | return; | 338 | return; |
295 | } | 339 | } |
296 | 340 | ||
297 | void autofs4_dentry_release(struct dentry *); | ||
298 | extern void autofs4_kill_sb(struct super_block *); | 341 | extern void autofs4_kill_sb(struct super_block *); |
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index eff9a419469a..1442da4860e5 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c | |||
@@ -551,7 +551,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp, | |||
551 | 551 | ||
552 | err = have_submounts(path.dentry); | 552 | err = have_submounts(path.dentry); |
553 | 553 | ||
554 | if (follow_down(&path)) | 554 | if (follow_down_one(&path)) |
555 | magic = path.mnt->mnt_sb->s_magic; | 555 | magic = path.mnt->mnt_sb->s_magic; |
556 | } | 556 | } |
557 | 557 | ||
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index cc1d01365905..f43100b9662b 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c | |||
@@ -26,10 +26,6 @@ static inline int autofs4_can_expire(struct dentry *dentry, | |||
26 | if (ino == NULL) | 26 | if (ino == NULL) |
27 | return 0; | 27 | return 0; |
28 | 28 | ||
29 | /* No point expiring a pending mount */ | ||
30 | if (ino->flags & AUTOFS_INF_PENDING) | ||
31 | return 0; | ||
32 | |||
33 | if (!do_now) { | 29 | if (!do_now) { |
34 | /* Too young to die */ | 30 | /* Too young to die */ |
35 | if (!timeout || time_after(ino->last_used + timeout, now)) | 31 | if (!timeout || time_after(ino->last_used + timeout, now)) |
@@ -56,7 +52,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) | |||
56 | 52 | ||
57 | path_get(&path); | 53 | path_get(&path); |
58 | 54 | ||
59 | if (!follow_down(&path)) | 55 | if (!follow_down_one(&path)) |
60 | goto done; | 56 | goto done; |
61 | 57 | ||
62 | if (is_autofs4_dentry(path.dentry)) { | 58 | if (is_autofs4_dentry(path.dentry)) { |
@@ -100,7 +96,7 @@ static struct dentry *get_next_positive_dentry(struct dentry *prev, | |||
100 | struct dentry *p, *ret; | 96 | struct dentry *p, *ret; |
101 | 97 | ||
102 | if (prev == NULL) | 98 | if (prev == NULL) |
103 | return dget(prev); | 99 | return dget(root); |
104 | 100 | ||
105 | spin_lock(&autofs4_lock); | 101 | spin_lock(&autofs4_lock); |
106 | relock: | 102 | relock: |
@@ -137,7 +133,7 @@ again: | |||
137 | spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED); | 133 | spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED); |
138 | /* Negative dentry - try next */ | 134 | /* Negative dentry - try next */ |
139 | if (!simple_positive(ret)) { | 135 | if (!simple_positive(ret)) { |
140 | spin_unlock(&ret->d_lock); | 136 | spin_unlock(&p->d_lock); |
141 | p = ret; | 137 | p = ret; |
142 | goto again; | 138 | goto again; |
143 | } | 139 | } |
@@ -283,6 +279,7 @@ struct dentry *autofs4_expire_direct(struct super_block *sb, | |||
283 | unsigned long timeout; | 279 | unsigned long timeout; |
284 | struct dentry *root = dget(sb->s_root); | 280 | struct dentry *root = dget(sb->s_root); |
285 | int do_now = how & AUTOFS_EXP_IMMEDIATE; | 281 | int do_now = how & AUTOFS_EXP_IMMEDIATE; |
282 | struct autofs_info *ino; | ||
286 | 283 | ||
287 | if (!root) | 284 | if (!root) |
288 | return NULL; | 285 | return NULL; |
@@ -291,19 +288,21 @@ struct dentry *autofs4_expire_direct(struct super_block *sb, | |||
291 | timeout = sbi->exp_timeout; | 288 | timeout = sbi->exp_timeout; |
292 | 289 | ||
293 | spin_lock(&sbi->fs_lock); | 290 | spin_lock(&sbi->fs_lock); |
291 | ino = autofs4_dentry_ino(root); | ||
292 | /* No point expiring a pending mount */ | ||
293 | if (ino->flags & AUTOFS_INF_PENDING) { | ||
294 | spin_unlock(&sbi->fs_lock); | ||
295 | return NULL; | ||
296 | } | ||
297 | managed_dentry_set_transit(root); | ||
294 | if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { | 298 | if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { |
295 | struct autofs_info *ino = autofs4_dentry_ino(root); | 299 | struct autofs_info *ino = autofs4_dentry_ino(root); |
296 | if (d_mountpoint(root)) { | ||
297 | ino->flags |= AUTOFS_INF_MOUNTPOINT; | ||
298 | spin_lock(&root->d_lock); | ||
299 | root->d_flags &= ~DCACHE_MOUNTED; | ||
300 | spin_unlock(&root->d_lock); | ||
301 | } | ||
302 | ino->flags |= AUTOFS_INF_EXPIRING; | 300 | ino->flags |= AUTOFS_INF_EXPIRING; |
303 | init_completion(&ino->expire_complete); | 301 | init_completion(&ino->expire_complete); |
304 | spin_unlock(&sbi->fs_lock); | 302 | spin_unlock(&sbi->fs_lock); |
305 | return root; | 303 | return root; |
306 | } | 304 | } |
305 | managed_dentry_clear_transit(root); | ||
307 | spin_unlock(&sbi->fs_lock); | 306 | spin_unlock(&sbi->fs_lock); |
308 | dput(root); | 307 | dput(root); |
309 | 308 | ||
@@ -340,6 +339,10 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, | |||
340 | while ((dentry = get_next_positive_dentry(dentry, root))) { | 339 | while ((dentry = get_next_positive_dentry(dentry, root))) { |
341 | spin_lock(&sbi->fs_lock); | 340 | spin_lock(&sbi->fs_lock); |
342 | ino = autofs4_dentry_ino(dentry); | 341 | ino = autofs4_dentry_ino(dentry); |
342 | /* No point expiring a pending mount */ | ||
343 | if (ino->flags & AUTOFS_INF_PENDING) | ||
344 | goto cont; | ||
345 | managed_dentry_set_transit(dentry); | ||
343 | 346 | ||
344 | /* | 347 | /* |
345 | * Case 1: (i) indirect mount or top level pseudo direct mount | 348 | * Case 1: (i) indirect mount or top level pseudo direct mount |
@@ -399,6 +402,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, | |||
399 | } | 402 | } |
400 | } | 403 | } |
401 | next: | 404 | next: |
405 | managed_dentry_clear_transit(dentry); | ||
406 | cont: | ||
402 | spin_unlock(&sbi->fs_lock); | 407 | spin_unlock(&sbi->fs_lock); |
403 | } | 408 | } |
404 | return NULL; | 409 | return NULL; |
@@ -479,6 +484,8 @@ int autofs4_expire_run(struct super_block *sb, | |||
479 | spin_lock(&sbi->fs_lock); | 484 | spin_lock(&sbi->fs_lock); |
480 | ino = autofs4_dentry_ino(dentry); | 485 | ino = autofs4_dentry_ino(dentry); |
481 | ino->flags &= ~AUTOFS_INF_EXPIRING; | 486 | ino->flags &= ~AUTOFS_INF_EXPIRING; |
487 | if (!d_unhashed(dentry)) | ||
488 | managed_dentry_clear_transit(dentry); | ||
482 | complete_all(&ino->expire_complete); | 489 | complete_all(&ino->expire_complete); |
483 | spin_unlock(&sbi->fs_lock); | 490 | spin_unlock(&sbi->fs_lock); |
484 | 491 | ||
@@ -504,18 +511,18 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, | |||
504 | ret = autofs4_wait(sbi, dentry, NFY_EXPIRE); | 511 | ret = autofs4_wait(sbi, dentry, NFY_EXPIRE); |
505 | 512 | ||
506 | spin_lock(&sbi->fs_lock); | 513 | spin_lock(&sbi->fs_lock); |
507 | if (ino->flags & AUTOFS_INF_MOUNTPOINT) { | ||
508 | spin_lock(&sb->s_root->d_lock); | ||
509 | /* | ||
510 | * If we haven't been expired away, then reset | ||
511 | * mounted status. | ||
512 | */ | ||
513 | if (mnt->mnt_parent != mnt) | ||
514 | sb->s_root->d_flags |= DCACHE_MOUNTED; | ||
515 | spin_unlock(&sb->s_root->d_lock); | ||
516 | ino->flags &= ~AUTOFS_INF_MOUNTPOINT; | ||
517 | } | ||
518 | ino->flags &= ~AUTOFS_INF_EXPIRING; | 514 | ino->flags &= ~AUTOFS_INF_EXPIRING; |
515 | spin_lock(&dentry->d_lock); | ||
516 | if (ret) | ||
517 | __managed_dentry_clear_transit(dentry); | ||
518 | else { | ||
519 | if ((IS_ROOT(dentry) || | ||
520 | (autofs_type_indirect(sbi->type) && | ||
521 | IS_ROOT(dentry->d_parent))) && | ||
522 | !(dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) | ||
523 | __managed_dentry_set_automount(dentry); | ||
524 | } | ||
525 | spin_unlock(&dentry->d_lock); | ||
519 | complete_all(&ino->expire_complete); | 526 | complete_all(&ino->expire_complete); |
520 | spin_unlock(&sbi->fs_lock); | 527 | spin_unlock(&sbi->fs_lock); |
521 | dput(dentry); | 528 | dput(dentry); |
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index a7bdb9dcac84..180fa2425e49 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c | |||
@@ -22,77 +22,27 @@ | |||
22 | #include "autofs_i.h" | 22 | #include "autofs_i.h" |
23 | #include <linux/module.h> | 23 | #include <linux/module.h> |
24 | 24 | ||
25 | static void ino_lnkfree(struct autofs_info *ino) | 25 | struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi) |
26 | { | 26 | { |
27 | if (ino->u.symlink) { | 27 | struct autofs_info *ino = kzalloc(sizeof(*ino), GFP_KERNEL); |
28 | kfree(ino->u.symlink); | 28 | if (ino) { |
29 | ino->u.symlink = NULL; | ||
30 | } | ||
31 | } | ||
32 | |||
33 | struct autofs_info *autofs4_init_ino(struct autofs_info *ino, | ||
34 | struct autofs_sb_info *sbi, mode_t mode) | ||
35 | { | ||
36 | int reinit = 1; | ||
37 | |||
38 | if (ino == NULL) { | ||
39 | reinit = 0; | ||
40 | ino = kmalloc(sizeof(*ino), GFP_KERNEL); | ||
41 | } | ||
42 | |||
43 | if (ino == NULL) | ||
44 | return NULL; | ||
45 | |||
46 | if (!reinit) { | ||
47 | ino->flags = 0; | ||
48 | ino->inode = NULL; | ||
49 | ino->dentry = NULL; | ||
50 | ino->size = 0; | ||
51 | INIT_LIST_HEAD(&ino->active); | 29 | INIT_LIST_HEAD(&ino->active); |
52 | ino->active_count = 0; | ||
53 | INIT_LIST_HEAD(&ino->expiring); | 30 | INIT_LIST_HEAD(&ino->expiring); |
54 | atomic_set(&ino->count, 0); | 31 | ino->last_used = jiffies; |
32 | ino->sbi = sbi; | ||
55 | } | 33 | } |
34 | return ino; | ||
35 | } | ||
56 | 36 | ||
37 | void autofs4_clean_ino(struct autofs_info *ino) | ||
38 | { | ||
57 | ino->uid = 0; | 39 | ino->uid = 0; |
58 | ino->gid = 0; | 40 | ino->gid = 0; |
59 | ino->mode = mode; | ||
60 | ino->last_used = jiffies; | 41 | ino->last_used = jiffies; |
61 | |||
62 | ino->sbi = sbi; | ||
63 | |||
64 | if (reinit && ino->free) | ||
65 | (ino->free)(ino); | ||
66 | |||
67 | memset(&ino->u, 0, sizeof(ino->u)); | ||
68 | |||
69 | ino->free = NULL; | ||
70 | |||
71 | if (S_ISLNK(mode)) | ||
72 | ino->free = ino_lnkfree; | ||
73 | |||
74 | return ino; | ||
75 | } | 42 | } |
76 | 43 | ||
77 | void autofs4_free_ino(struct autofs_info *ino) | 44 | void autofs4_free_ino(struct autofs_info *ino) |
78 | { | 45 | { |
79 | struct autofs_info *p_ino; | ||
80 | |||
81 | if (ino->dentry) { | ||
82 | ino->dentry->d_fsdata = NULL; | ||
83 | if (ino->dentry->d_inode) { | ||
84 | struct dentry *parent = ino->dentry->d_parent; | ||
85 | if (atomic_dec_and_test(&ino->count)) { | ||
86 | p_ino = autofs4_dentry_ino(parent); | ||
87 | if (p_ino && parent != ino->dentry) | ||
88 | atomic_dec(&p_ino->count); | ||
89 | } | ||
90 | dput(ino->dentry); | ||
91 | } | ||
92 | ino->dentry = NULL; | ||
93 | } | ||
94 | if (ino->free) | ||
95 | (ino->free)(ino); | ||
96 | kfree(ino); | 46 | kfree(ino); |
97 | } | 47 | } |
98 | 48 | ||
@@ -148,9 +98,16 @@ static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
148 | return 0; | 98 | return 0; |
149 | } | 99 | } |
150 | 100 | ||
101 | static void autofs4_evict_inode(struct inode *inode) | ||
102 | { | ||
103 | end_writeback(inode); | ||
104 | kfree(inode->i_private); | ||
105 | } | ||
106 | |||
151 | static const struct super_operations autofs4_sops = { | 107 | static const struct super_operations autofs4_sops = { |
152 | .statfs = simple_statfs, | 108 | .statfs = simple_statfs, |
153 | .show_options = autofs4_show_options, | 109 | .show_options = autofs4_show_options, |
110 | .evict_inode = autofs4_evict_inode, | ||
154 | }; | 111 | }; |
155 | 112 | ||
156 | enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto, | 113 | enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto, |
@@ -240,21 +197,6 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, | |||
240 | return (*pipefd < 0); | 197 | return (*pipefd < 0); |
241 | } | 198 | } |
242 | 199 | ||
243 | static struct autofs_info *autofs4_mkroot(struct autofs_sb_info *sbi) | ||
244 | { | ||
245 | struct autofs_info *ino; | ||
246 | |||
247 | ino = autofs4_init_ino(NULL, sbi, S_IFDIR | 0755); | ||
248 | if (!ino) | ||
249 | return NULL; | ||
250 | |||
251 | return ino; | ||
252 | } | ||
253 | |||
254 | static const struct dentry_operations autofs4_sb_dentry_operations = { | ||
255 | .d_release = autofs4_dentry_release, | ||
256 | }; | ||
257 | |||
258 | int autofs4_fill_super(struct super_block *s, void *data, int silent) | 200 | int autofs4_fill_super(struct super_block *s, void *data, int silent) |
259 | { | 201 | { |
260 | struct inode * root_inode; | 202 | struct inode * root_inode; |
@@ -292,15 +234,16 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
292 | s->s_blocksize_bits = 10; | 234 | s->s_blocksize_bits = 10; |
293 | s->s_magic = AUTOFS_SUPER_MAGIC; | 235 | s->s_magic = AUTOFS_SUPER_MAGIC; |
294 | s->s_op = &autofs4_sops; | 236 | s->s_op = &autofs4_sops; |
237 | s->s_d_op = &autofs4_dentry_operations; | ||
295 | s->s_time_gran = 1; | 238 | s->s_time_gran = 1; |
296 | 239 | ||
297 | /* | 240 | /* |
298 | * Get the root inode and dentry, but defer checking for errors. | 241 | * Get the root inode and dentry, but defer checking for errors. |
299 | */ | 242 | */ |
300 | ino = autofs4_mkroot(sbi); | 243 | ino = autofs4_new_ino(sbi); |
301 | if (!ino) | 244 | if (!ino) |
302 | goto fail_free; | 245 | goto fail_free; |
303 | root_inode = autofs4_get_inode(s, ino); | 246 | root_inode = autofs4_get_inode(s, S_IFDIR | 0755); |
304 | if (!root_inode) | 247 | if (!root_inode) |
305 | goto fail_ino; | 248 | goto fail_ino; |
306 | 249 | ||
@@ -309,7 +252,6 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
309 | goto fail_iput; | 252 | goto fail_iput; |
310 | pipe = NULL; | 253 | pipe = NULL; |
311 | 254 | ||
312 | d_set_d_op(root, &autofs4_sb_dentry_operations); | ||
313 | root->d_fsdata = ino; | 255 | root->d_fsdata = ino; |
314 | 256 | ||
315 | /* Can this call block? */ | 257 | /* Can this call block? */ |
@@ -320,10 +262,11 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
320 | goto fail_dput; | 262 | goto fail_dput; |
321 | } | 263 | } |
322 | 264 | ||
265 | if (autofs_type_trigger(sbi->type)) | ||
266 | __managed_dentry_set_managed(root); | ||
267 | |||
323 | root_inode->i_fop = &autofs4_root_operations; | 268 | root_inode->i_fop = &autofs4_root_operations; |
324 | root_inode->i_op = autofs_type_trigger(sbi->type) ? | 269 | root_inode->i_op = &autofs4_dir_inode_operations; |
325 | &autofs4_direct_root_inode_operations : | ||
326 | &autofs4_indirect_root_inode_operations; | ||
327 | 270 | ||
328 | /* Couldn't this be tested earlier? */ | 271 | /* Couldn't this be tested earlier? */ |
329 | if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION || | 272 | if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION || |
@@ -383,16 +326,14 @@ fail_unlock: | |||
383 | return -EINVAL; | 326 | return -EINVAL; |
384 | } | 327 | } |
385 | 328 | ||
386 | struct inode *autofs4_get_inode(struct super_block *sb, | 329 | struct inode *autofs4_get_inode(struct super_block *sb, mode_t mode) |
387 | struct autofs_info *inf) | ||
388 | { | 330 | { |
389 | struct inode *inode = new_inode(sb); | 331 | struct inode *inode = new_inode(sb); |
390 | 332 | ||
391 | if (inode == NULL) | 333 | if (inode == NULL) |
392 | return NULL; | 334 | return NULL; |
393 | 335 | ||
394 | inf->inode = inode; | 336 | inode->i_mode = mode; |
395 | inode->i_mode = inf->mode; | ||
396 | if (sb->s_root) { | 337 | if (sb->s_root) { |
397 | inode->i_uid = sb->s_root->d_inode->i_uid; | 338 | inode->i_uid = sb->s_root->d_inode->i_uid; |
398 | inode->i_gid = sb->s_root->d_inode->i_gid; | 339 | inode->i_gid = sb->s_root->d_inode->i_gid; |
@@ -400,12 +341,11 @@ struct inode *autofs4_get_inode(struct super_block *sb, | |||
400 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 341 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
401 | inode->i_ino = get_next_ino(); | 342 | inode->i_ino = get_next_ino(); |
402 | 343 | ||
403 | if (S_ISDIR(inf->mode)) { | 344 | if (S_ISDIR(mode)) { |
404 | inode->i_nlink = 2; | 345 | inode->i_nlink = 2; |
405 | inode->i_op = &autofs4_dir_inode_operations; | 346 | inode->i_op = &autofs4_dir_inode_operations; |
406 | inode->i_fop = &autofs4_dir_operations; | 347 | inode->i_fop = &autofs4_dir_operations; |
407 | } else if (S_ISLNK(inf->mode)) { | 348 | } else if (S_ISLNK(mode)) { |
408 | inode->i_size = inf->size; | ||
409 | inode->i_op = &autofs4_symlink_inode_operations; | 349 | inode->i_op = &autofs4_symlink_inode_operations; |
410 | } | 350 | } |
411 | 351 | ||
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 651e4ef563b1..014e7aba3b08 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
@@ -35,10 +35,9 @@ static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long); | |||
35 | #endif | 35 | #endif |
36 | static int autofs4_dir_open(struct inode *inode, struct file *file); | 36 | static int autofs4_dir_open(struct inode *inode, struct file *file); |
37 | static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); | 37 | static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); |
38 | static void *autofs4_follow_link(struct dentry *, struct nameidata *); | 38 | static struct vfsmount *autofs4_d_automount(struct path *); |
39 | 39 | static int autofs4_d_manage(struct dentry *, bool, bool); | |
40 | #define TRIGGER_FLAGS (LOOKUP_CONTINUE | LOOKUP_DIRECTORY) | 40 | static void autofs4_dentry_release(struct dentry *); |
41 | #define TRIGGER_INTENTS (LOOKUP_OPEN | LOOKUP_CREATE) | ||
42 | 41 | ||
43 | const struct file_operations autofs4_root_operations = { | 42 | const struct file_operations autofs4_root_operations = { |
44 | .open = dcache_dir_open, | 43 | .open = dcache_dir_open, |
@@ -60,7 +59,7 @@ const struct file_operations autofs4_dir_operations = { | |||
60 | .llseek = dcache_dir_lseek, | 59 | .llseek = dcache_dir_lseek, |
61 | }; | 60 | }; |
62 | 61 | ||
63 | const struct inode_operations autofs4_indirect_root_inode_operations = { | 62 | const struct inode_operations autofs4_dir_inode_operations = { |
64 | .lookup = autofs4_lookup, | 63 | .lookup = autofs4_lookup, |
65 | .unlink = autofs4_dir_unlink, | 64 | .unlink = autofs4_dir_unlink, |
66 | .symlink = autofs4_dir_symlink, | 65 | .symlink = autofs4_dir_symlink, |
@@ -68,20 +67,10 @@ const struct inode_operations autofs4_indirect_root_inode_operations = { | |||
68 | .rmdir = autofs4_dir_rmdir, | 67 | .rmdir = autofs4_dir_rmdir, |
69 | }; | 68 | }; |
70 | 69 | ||
71 | const struct inode_operations autofs4_direct_root_inode_operations = { | 70 | const struct dentry_operations autofs4_dentry_operations = { |
72 | .lookup = autofs4_lookup, | 71 | .d_automount = autofs4_d_automount, |
73 | .unlink = autofs4_dir_unlink, | 72 | .d_manage = autofs4_d_manage, |
74 | .mkdir = autofs4_dir_mkdir, | 73 | .d_release = autofs4_dentry_release, |
75 | .rmdir = autofs4_dir_rmdir, | ||
76 | .follow_link = autofs4_follow_link, | ||
77 | }; | ||
78 | |||
79 | const struct inode_operations autofs4_dir_inode_operations = { | ||
80 | .lookup = autofs4_lookup, | ||
81 | .unlink = autofs4_dir_unlink, | ||
82 | .symlink = autofs4_dir_symlink, | ||
83 | .mkdir = autofs4_dir_mkdir, | ||
84 | .rmdir = autofs4_dir_rmdir, | ||
85 | }; | 74 | }; |
86 | 75 | ||
87 | static void autofs4_add_active(struct dentry *dentry) | 76 | static void autofs4_add_active(struct dentry *dentry) |
@@ -116,14 +105,6 @@ static void autofs4_del_active(struct dentry *dentry) | |||
116 | return; | 105 | return; |
117 | } | 106 | } |
118 | 107 | ||
119 | static unsigned int autofs4_need_mount(unsigned int flags) | ||
120 | { | ||
121 | unsigned int res = 0; | ||
122 | if (flags & (TRIGGER_FLAGS | TRIGGER_INTENTS)) | ||
123 | res = 1; | ||
124 | return res; | ||
125 | } | ||
126 | |||
127 | static int autofs4_dir_open(struct inode *inode, struct file *file) | 108 | static int autofs4_dir_open(struct inode *inode, struct file *file) |
128 | { | 109 | { |
129 | struct dentry *dentry = file->f_path.dentry; | 110 | struct dentry *dentry = file->f_path.dentry; |
@@ -158,278 +139,27 @@ out: | |||
158 | return dcache_dir_open(inode, file); | 139 | return dcache_dir_open(inode, file); |
159 | } | 140 | } |
160 | 141 | ||
161 | static int try_to_fill_dentry(struct dentry *dentry, int flags) | 142 | static void autofs4_dentry_release(struct dentry *de) |
162 | { | ||
163 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | ||
164 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | ||
165 | int status; | ||
166 | |||
167 | DPRINTK("dentry=%p %.*s ino=%p", | ||
168 | dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); | ||
169 | |||
170 | /* | ||
171 | * Wait for a pending mount, triggering one if there | ||
172 | * isn't one already | ||
173 | */ | ||
174 | if (dentry->d_inode == NULL) { | ||
175 | DPRINTK("waiting for mount name=%.*s", | ||
176 | dentry->d_name.len, dentry->d_name.name); | ||
177 | |||
178 | status = autofs4_wait(sbi, dentry, NFY_MOUNT); | ||
179 | |||
180 | DPRINTK("mount done status=%d", status); | ||
181 | |||
182 | /* Turn this into a real negative dentry? */ | ||
183 | if (status == -ENOENT) { | ||
184 | spin_lock(&sbi->fs_lock); | ||
185 | ino->flags &= ~AUTOFS_INF_PENDING; | ||
186 | spin_unlock(&sbi->fs_lock); | ||
187 | return status; | ||
188 | } else if (status) { | ||
189 | /* Return a negative dentry, but leave it "pending" */ | ||
190 | return status; | ||
191 | } | ||
192 | /* Trigger mount for path component or follow link */ | ||
193 | } else if (ino->flags & AUTOFS_INF_PENDING || | ||
194 | autofs4_need_mount(flags)) { | ||
195 | DPRINTK("waiting for mount name=%.*s", | ||
196 | dentry->d_name.len, dentry->d_name.name); | ||
197 | |||
198 | spin_lock(&sbi->fs_lock); | ||
199 | ino->flags |= AUTOFS_INF_PENDING; | ||
200 | spin_unlock(&sbi->fs_lock); | ||
201 | status = autofs4_wait(sbi, dentry, NFY_MOUNT); | ||
202 | |||
203 | DPRINTK("mount done status=%d", status); | ||
204 | |||
205 | if (status) { | ||
206 | spin_lock(&sbi->fs_lock); | ||
207 | ino->flags &= ~AUTOFS_INF_PENDING; | ||
208 | spin_unlock(&sbi->fs_lock); | ||
209 | return status; | ||
210 | } | ||
211 | } | ||
212 | |||
213 | /* Initialize expiry counter after successful mount */ | ||
214 | ino->last_used = jiffies; | ||
215 | |||
216 | spin_lock(&sbi->fs_lock); | ||
217 | ino->flags &= ~AUTOFS_INF_PENDING; | ||
218 | spin_unlock(&sbi->fs_lock); | ||
219 | |||
220 | return 0; | ||
221 | } | ||
222 | |||
223 | /* For autofs direct mounts the follow link triggers the mount */ | ||
224 | static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
225 | { | ||
226 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | ||
227 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | ||
228 | int oz_mode = autofs4_oz_mode(sbi); | ||
229 | unsigned int lookup_type; | ||
230 | int status; | ||
231 | |||
232 | DPRINTK("dentry=%p %.*s oz_mode=%d nd->flags=%d", | ||
233 | dentry, dentry->d_name.len, dentry->d_name.name, oz_mode, | ||
234 | nd->flags); | ||
235 | /* | ||
236 | * For an expire of a covered direct or offset mount we need | ||
237 | * to break out of follow_down() at the autofs mount trigger | ||
238 | * (d_mounted--), so we can see the expiring flag, and manage | ||
239 | * the blocking and following here until the expire is completed. | ||
240 | */ | ||
241 | if (oz_mode) { | ||
242 | spin_lock(&sbi->fs_lock); | ||
243 | if (ino->flags & AUTOFS_INF_EXPIRING) { | ||
244 | spin_unlock(&sbi->fs_lock); | ||
245 | /* Follow down to our covering mount. */ | ||
246 | if (!follow_down(&nd->path)) | ||
247 | goto done; | ||
248 | goto follow; | ||
249 | } | ||
250 | spin_unlock(&sbi->fs_lock); | ||
251 | goto done; | ||
252 | } | ||
253 | |||
254 | /* If an expire request is pending everyone must wait. */ | ||
255 | autofs4_expire_wait(dentry); | ||
256 | |||
257 | /* We trigger a mount for almost all flags */ | ||
258 | lookup_type = autofs4_need_mount(nd->flags); | ||
259 | spin_lock(&sbi->fs_lock); | ||
260 | spin_lock(&autofs4_lock); | ||
261 | spin_lock(&dentry->d_lock); | ||
262 | if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) { | ||
263 | spin_unlock(&dentry->d_lock); | ||
264 | spin_unlock(&autofs4_lock); | ||
265 | spin_unlock(&sbi->fs_lock); | ||
266 | goto follow; | ||
267 | } | ||
268 | |||
269 | /* | ||
270 | * If the dentry contains directories then it is an autofs | ||
271 | * multi-mount with no root mount offset. So don't try to | ||
272 | * mount it again. | ||
273 | */ | ||
274 | if (ino->flags & AUTOFS_INF_PENDING || | ||
275 | (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) { | ||
276 | spin_unlock(&dentry->d_lock); | ||
277 | spin_unlock(&autofs4_lock); | ||
278 | spin_unlock(&sbi->fs_lock); | ||
279 | |||
280 | status = try_to_fill_dentry(dentry, nd->flags); | ||
281 | if (status) | ||
282 | goto out_error; | ||
283 | |||
284 | goto follow; | ||
285 | } | ||
286 | spin_unlock(&dentry->d_lock); | ||
287 | spin_unlock(&autofs4_lock); | ||
288 | spin_unlock(&sbi->fs_lock); | ||
289 | follow: | ||
290 | /* | ||
291 | * If there is no root mount it must be an autofs | ||
292 | * multi-mount with no root offset so we don't need | ||
293 | * to follow it. | ||
294 | */ | ||
295 | if (d_mountpoint(dentry)) { | ||
296 | if (!autofs4_follow_mount(&nd->path)) { | ||
297 | status = -ENOENT; | ||
298 | goto out_error; | ||
299 | } | ||
300 | } | ||
301 | |||
302 | done: | ||
303 | return NULL; | ||
304 | |||
305 | out_error: | ||
306 | path_put(&nd->path); | ||
307 | return ERR_PTR(status); | ||
308 | } | ||
309 | |||
310 | /* | ||
311 | * Revalidate is called on every cache lookup. Some of those | ||
312 | * cache lookups may actually happen while the dentry is not | ||
313 | * yet completely filled in, and revalidate has to delay such | ||
314 | * lookups.. | ||
315 | */ | ||
316 | static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) | ||
317 | { | 143 | { |
318 | struct inode *dir; | 144 | struct autofs_info *ino = autofs4_dentry_ino(de); |
319 | struct autofs_sb_info *sbi; | 145 | struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb); |
320 | int oz_mode; | ||
321 | int flags = nd ? nd->flags : 0; | ||
322 | int status = 1; | ||
323 | |||
324 | if (flags & LOOKUP_RCU) | ||
325 | return -ECHILD; | ||
326 | |||
327 | dir = dentry->d_parent->d_inode; | ||
328 | sbi = autofs4_sbi(dir->i_sb); | ||
329 | oz_mode = autofs4_oz_mode(sbi); | ||
330 | |||
331 | /* Pending dentry */ | ||
332 | spin_lock(&sbi->fs_lock); | ||
333 | if (autofs4_ispending(dentry)) { | ||
334 | /* The daemon never causes a mount to trigger */ | ||
335 | spin_unlock(&sbi->fs_lock); | ||
336 | |||
337 | if (oz_mode) | ||
338 | return 1; | ||
339 | |||
340 | /* | ||
341 | * If the directory has gone away due to an expire | ||
342 | * we have been called as ->d_revalidate() and so | ||
343 | * we need to return false and proceed to ->lookup(). | ||
344 | */ | ||
345 | if (autofs4_expire_wait(dentry) == -EAGAIN) | ||
346 | return 0; | ||
347 | |||
348 | /* | ||
349 | * A zero status is success otherwise we have a | ||
350 | * negative error code. | ||
351 | */ | ||
352 | status = try_to_fill_dentry(dentry, flags); | ||
353 | if (status == 0) | ||
354 | return 1; | ||
355 | |||
356 | return status; | ||
357 | } | ||
358 | spin_unlock(&sbi->fs_lock); | ||
359 | |||
360 | /* Negative dentry.. invalidate if "old" */ | ||
361 | if (dentry->d_inode == NULL) | ||
362 | return 0; | ||
363 | |||
364 | /* Check for a non-mountpoint directory with no contents */ | ||
365 | spin_lock(&autofs4_lock); | ||
366 | spin_lock(&dentry->d_lock); | ||
367 | if (S_ISDIR(dentry->d_inode->i_mode) && | ||
368 | !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { | ||
369 | DPRINTK("dentry=%p %.*s, emptydir", | ||
370 | dentry, dentry->d_name.len, dentry->d_name.name); | ||
371 | spin_unlock(&dentry->d_lock); | ||
372 | spin_unlock(&autofs4_lock); | ||
373 | |||
374 | /* The daemon never causes a mount to trigger */ | ||
375 | if (oz_mode) | ||
376 | return 1; | ||
377 | |||
378 | /* | ||
379 | * A zero status is success otherwise we have a | ||
380 | * negative error code. | ||
381 | */ | ||
382 | status = try_to_fill_dentry(dentry, flags); | ||
383 | if (status == 0) | ||
384 | return 1; | ||
385 | |||
386 | return status; | ||
387 | } | ||
388 | spin_unlock(&dentry->d_lock); | ||
389 | spin_unlock(&autofs4_lock); | ||
390 | |||
391 | return 1; | ||
392 | } | ||
393 | |||
394 | void autofs4_dentry_release(struct dentry *de) | ||
395 | { | ||
396 | struct autofs_info *inf; | ||
397 | 146 | ||
398 | DPRINTK("releasing %p", de); | 147 | DPRINTK("releasing %p", de); |
399 | 148 | ||
400 | inf = autofs4_dentry_ino(de); | 149 | if (!ino) |
401 | de->d_fsdata = NULL; | 150 | return; |
402 | |||
403 | if (inf) { | ||
404 | struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb); | ||
405 | |||
406 | if (sbi) { | ||
407 | spin_lock(&sbi->lookup_lock); | ||
408 | if (!list_empty(&inf->active)) | ||
409 | list_del(&inf->active); | ||
410 | if (!list_empty(&inf->expiring)) | ||
411 | list_del(&inf->expiring); | ||
412 | spin_unlock(&sbi->lookup_lock); | ||
413 | } | ||
414 | |||
415 | inf->dentry = NULL; | ||
416 | inf->inode = NULL; | ||
417 | 151 | ||
418 | autofs4_free_ino(inf); | 152 | if (sbi) { |
153 | spin_lock(&sbi->lookup_lock); | ||
154 | if (!list_empty(&ino->active)) | ||
155 | list_del(&ino->active); | ||
156 | if (!list_empty(&ino->expiring)) | ||
157 | list_del(&ino->expiring); | ||
158 | spin_unlock(&sbi->lookup_lock); | ||
419 | } | 159 | } |
420 | } | ||
421 | 160 | ||
422 | /* For dentries of directories in the root dir */ | 161 | autofs4_free_ino(ino); |
423 | static const struct dentry_operations autofs4_root_dentry_operations = { | 162 | } |
424 | .d_revalidate = autofs4_revalidate, | ||
425 | .d_release = autofs4_dentry_release, | ||
426 | }; | ||
427 | |||
428 | /* For other dentries */ | ||
429 | static const struct dentry_operations autofs4_dentry_operations = { | ||
430 | .d_revalidate = autofs4_revalidate, | ||
431 | .d_release = autofs4_dentry_release, | ||
432 | }; | ||
433 | 163 | ||
434 | static struct dentry *autofs4_lookup_active(struct dentry *dentry) | 164 | static struct dentry *autofs4_lookup_active(struct dentry *dentry) |
435 | { | 165 | { |
@@ -541,51 +271,246 @@ next: | |||
541 | return NULL; | 271 | return NULL; |
542 | } | 272 | } |
543 | 273 | ||
274 | static int autofs4_mount_wait(struct dentry *dentry) | ||
275 | { | ||
276 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | ||
277 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | ||
278 | int status; | ||
279 | |||
280 | if (ino->flags & AUTOFS_INF_PENDING) { | ||
281 | DPRINTK("waiting for mount name=%.*s", | ||
282 | dentry->d_name.len, dentry->d_name.name); | ||
283 | status = autofs4_wait(sbi, dentry, NFY_MOUNT); | ||
284 | DPRINTK("mount wait done status=%d", status); | ||
285 | ino->last_used = jiffies; | ||
286 | return status; | ||
287 | } | ||
288 | return 0; | ||
289 | } | ||
290 | |||
291 | static int do_expire_wait(struct dentry *dentry) | ||
292 | { | ||
293 | struct dentry *expiring; | ||
294 | |||
295 | expiring = autofs4_lookup_expiring(dentry); | ||
296 | if (!expiring) | ||
297 | return autofs4_expire_wait(dentry); | ||
298 | else { | ||
299 | /* | ||
300 | * If we are racing with expire the request might not | ||
301 | * be quite complete, but the directory has been removed | ||
302 | * so it must have been successful, just wait for it. | ||
303 | */ | ||
304 | autofs4_expire_wait(expiring); | ||
305 | autofs4_del_expiring(expiring); | ||
306 | dput(expiring); | ||
307 | } | ||
308 | return 0; | ||
309 | } | ||
310 | |||
311 | static struct dentry *autofs4_mountpoint_changed(struct path *path) | ||
312 | { | ||
313 | struct dentry *dentry = path->dentry; | ||
314 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | ||
315 | |||
316 | /* | ||
317 | * If this is an indirect mount the dentry could have gone away | ||
318 | * as a result of an expire and a new one created. | ||
319 | */ | ||
320 | if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) { | ||
321 | struct dentry *parent = dentry->d_parent; | ||
322 | struct dentry *new = d_lookup(parent, &dentry->d_name); | ||
323 | if (!new) | ||
324 | return NULL; | ||
325 | dput(path->dentry); | ||
326 | path->dentry = new; | ||
327 | } | ||
328 | return path->dentry; | ||
329 | } | ||
330 | |||
331 | static struct vfsmount *autofs4_d_automount(struct path *path) | ||
332 | { | ||
333 | struct dentry *dentry = path->dentry; | ||
334 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | ||
335 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | ||
336 | int status; | ||
337 | |||
338 | DPRINTK("dentry=%p %.*s", | ||
339 | dentry, dentry->d_name.len, dentry->d_name.name); | ||
340 | |||
341 | /* | ||
342 | * Someone may have manually umounted this or it was a submount | ||
343 | * that has gone away. | ||
344 | */ | ||
345 | spin_lock(&dentry->d_lock); | ||
346 | if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { | ||
347 | if (!(dentry->d_flags & DCACHE_MANAGE_TRANSIT) && | ||
348 | (dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) | ||
349 | __managed_dentry_set_transit(path->dentry); | ||
350 | } | ||
351 | spin_unlock(&dentry->d_lock); | ||
352 | |||
353 | /* The daemon never triggers a mount. */ | ||
354 | if (autofs4_oz_mode(sbi)) | ||
355 | return NULL; | ||
356 | |||
357 | /* | ||
358 | * If an expire request is pending everyone must wait. | ||
359 | * If the expire fails we're still mounted so continue | ||
360 | * the follow and return. A return of -EAGAIN (which only | ||
361 | * happens with indirect mounts) means the expire completed | ||
362 | * and the directory was removed, so just go ahead and try | ||
363 | * the mount. | ||
364 | */ | ||
365 | status = do_expire_wait(dentry); | ||
366 | if (status && status != -EAGAIN) | ||
367 | return NULL; | ||
368 | |||
369 | /* Callback to the daemon to perform the mount or wait */ | ||
370 | spin_lock(&sbi->fs_lock); | ||
371 | if (ino->flags & AUTOFS_INF_PENDING) { | ||
372 | spin_unlock(&sbi->fs_lock); | ||
373 | status = autofs4_mount_wait(dentry); | ||
374 | if (status) | ||
375 | return ERR_PTR(status); | ||
376 | spin_lock(&sbi->fs_lock); | ||
377 | goto done; | ||
378 | } | ||
379 | |||
380 | /* | ||
381 | * If the dentry is a symlink it's equivalent to a directory | ||
382 | * having d_mountpoint() true, so there's no need to call back | ||
383 | * to the daemon. | ||
384 | */ | ||
385 | if (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode)) | ||
386 | goto done; | ||
387 | if (!d_mountpoint(dentry)) { | ||
388 | /* | ||
389 | * It's possible that user space hasn't removed directories | ||
390 | * after umounting a rootless multi-mount, although it | ||
391 | * should. For v5 have_submounts() is sufficient to handle | ||
392 | * this because the leaves of the directory tree under the | ||
393 | * mount never trigger mounts themselves (they have an autofs | ||
394 | * trigger mount mounted on them). But v4 pseudo direct mounts | ||
395 | * do need the leaves to to trigger mounts. In this case we | ||
396 | * have no choice but to use the list_empty() check and | ||
397 | * require user space behave. | ||
398 | */ | ||
399 | if (sbi->version > 4) { | ||
400 | if (have_submounts(dentry)) | ||
401 | goto done; | ||
402 | } else { | ||
403 | spin_lock(&dentry->d_lock); | ||
404 | if (!list_empty(&dentry->d_subdirs)) { | ||
405 | spin_unlock(&dentry->d_lock); | ||
406 | goto done; | ||
407 | } | ||
408 | spin_unlock(&dentry->d_lock); | ||
409 | } | ||
410 | ino->flags |= AUTOFS_INF_PENDING; | ||
411 | spin_unlock(&sbi->fs_lock); | ||
412 | status = autofs4_mount_wait(dentry); | ||
413 | if (status) | ||
414 | return ERR_PTR(status); | ||
415 | spin_lock(&sbi->fs_lock); | ||
416 | ino->flags &= ~AUTOFS_INF_PENDING; | ||
417 | } | ||
418 | done: | ||
419 | if (!(ino->flags & AUTOFS_INF_EXPIRING)) { | ||
420 | /* | ||
421 | * Any needed mounting has been completed and the path updated | ||
422 | * so turn this into a normal dentry so we don't continually | ||
423 | * call ->d_automount() and ->d_manage(). | ||
424 | */ | ||
425 | spin_lock(&dentry->d_lock); | ||
426 | __managed_dentry_clear_transit(dentry); | ||
427 | /* | ||
428 | * Only clear DMANAGED_AUTOMOUNT for rootless multi-mounts and | ||
429 | * symlinks as in all other cases the dentry will be covered by | ||
430 | * an actual mount so ->d_automount() won't be called during | ||
431 | * the follow. | ||
432 | */ | ||
433 | if ((!d_mountpoint(dentry) && | ||
434 | !list_empty(&dentry->d_subdirs)) || | ||
435 | (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode))) | ||
436 | __managed_dentry_clear_automount(dentry); | ||
437 | spin_unlock(&dentry->d_lock); | ||
438 | } | ||
439 | spin_unlock(&sbi->fs_lock); | ||
440 | |||
441 | /* Mount succeeded, check if we ended up with a new dentry */ | ||
442 | dentry = autofs4_mountpoint_changed(path); | ||
443 | if (!dentry) | ||
444 | return ERR_PTR(-ENOENT); | ||
445 | |||
446 | return NULL; | ||
447 | } | ||
448 | |||
449 | int autofs4_d_manage(struct dentry *dentry, bool mounting_here, bool rcu_walk) | ||
450 | { | ||
451 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | ||
452 | |||
453 | DPRINTK("dentry=%p %.*s", | ||
454 | dentry, dentry->d_name.len, dentry->d_name.name); | ||
455 | |||
456 | /* The daemon never waits. */ | ||
457 | if (autofs4_oz_mode(sbi) || mounting_here) { | ||
458 | if (!d_mountpoint(dentry)) | ||
459 | return -EISDIR; | ||
460 | return 0; | ||
461 | } | ||
462 | |||
463 | /* We need to sleep, so we need pathwalk to be in ref-mode */ | ||
464 | if (rcu_walk) | ||
465 | return -ECHILD; | ||
466 | |||
467 | /* Wait for pending expires */ | ||
468 | do_expire_wait(dentry); | ||
469 | |||
470 | /* | ||
471 | * This dentry may be under construction so wait on mount | ||
472 | * completion. | ||
473 | */ | ||
474 | return autofs4_mount_wait(dentry); | ||
475 | } | ||
476 | |||
544 | /* Lookups in the root directory */ | 477 | /* Lookups in the root directory */ |
545 | static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) | 478 | static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) |
546 | { | 479 | { |
547 | struct autofs_sb_info *sbi; | 480 | struct autofs_sb_info *sbi; |
548 | struct autofs_info *ino; | 481 | struct autofs_info *ino; |
549 | struct dentry *expiring, *active; | 482 | struct dentry *active; |
550 | int oz_mode; | ||
551 | 483 | ||
552 | DPRINTK("name = %.*s", | 484 | DPRINTK("name = %.*s", dentry->d_name.len, dentry->d_name.name); |
553 | dentry->d_name.len, dentry->d_name.name); | ||
554 | 485 | ||
555 | /* File name too long to exist */ | 486 | /* File name too long to exist */ |
556 | if (dentry->d_name.len > NAME_MAX) | 487 | if (dentry->d_name.len > NAME_MAX) |
557 | return ERR_PTR(-ENAMETOOLONG); | 488 | return ERR_PTR(-ENAMETOOLONG); |
558 | 489 | ||
559 | sbi = autofs4_sbi(dir->i_sb); | 490 | sbi = autofs4_sbi(dir->i_sb); |
560 | oz_mode = autofs4_oz_mode(sbi); | ||
561 | 491 | ||
562 | DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d", | 492 | DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d", |
563 | current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode); | 493 | current->pid, task_pgrp_nr(current), sbi->catatonic, |
494 | autofs4_oz_mode(sbi)); | ||
564 | 495 | ||
565 | active = autofs4_lookup_active(dentry); | 496 | active = autofs4_lookup_active(dentry); |
566 | if (active) { | 497 | if (active) { |
567 | dentry = active; | 498 | return active; |
568 | ino = autofs4_dentry_ino(dentry); | ||
569 | } else { | 499 | } else { |
570 | /* | 500 | /* |
571 | * Mark the dentry incomplete but don't hash it. We do this | 501 | * A dentry that is not within the root can never trigger a |
572 | * to serialize our inode creation operations (symlink and | 502 | * mount operation, unless the directory already exists, so we |
573 | * mkdir) which prevents deadlock during the callback to | 503 | * can return fail immediately. The daemon however does need |
574 | * the daemon. Subsequent user space lookups for the same | 504 | * to create directories within the file system. |
575 | * dentry are placed on the wait queue while the daemon | ||
576 | * itself is allowed passage unresticted so the create | ||
577 | * operation itself can then hash the dentry. Finally, | ||
578 | * we check for the hashed dentry and return the newly | ||
579 | * hashed dentry. | ||
580 | */ | 505 | */ |
581 | d_set_d_op(dentry, &autofs4_root_dentry_operations); | 506 | if (!autofs4_oz_mode(sbi) && !IS_ROOT(dentry->d_parent)) |
507 | return ERR_PTR(-ENOENT); | ||
582 | 508 | ||
583 | /* | 509 | /* Mark entries in the root as mount triggers */ |
584 | * And we need to ensure that the same dentry is used for | 510 | if (autofs_type_indirect(sbi->type) && IS_ROOT(dentry->d_parent)) |
585 | * all following lookup calls until it is hashed so that | 511 | __managed_dentry_set_managed(dentry); |
586 | * the dentry flags are persistent throughout the request. | 512 | |
587 | */ | 513 | ino = autofs4_new_ino(sbi); |
588 | ino = autofs4_init_ino(NULL, sbi, 0555); | ||
589 | if (!ino) | 514 | if (!ino) |
590 | return ERR_PTR(-ENOMEM); | 515 | return ERR_PTR(-ENOMEM); |
591 | 516 | ||
@@ -596,82 +521,6 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s | |||
596 | 521 | ||
597 | d_instantiate(dentry, NULL); | 522 | d_instantiate(dentry, NULL); |
598 | } | 523 | } |
599 | |||
600 | if (!oz_mode) { | ||
601 | mutex_unlock(&dir->i_mutex); | ||
602 | expiring = autofs4_lookup_expiring(dentry); | ||
603 | if (expiring) { | ||
604 | /* | ||
605 | * If we are racing with expire the request might not | ||
606 | * be quite complete but the directory has been removed | ||
607 | * so it must have been successful, so just wait for it. | ||
608 | */ | ||
609 | autofs4_expire_wait(expiring); | ||
610 | autofs4_del_expiring(expiring); | ||
611 | dput(expiring); | ||
612 | } | ||
613 | |||
614 | spin_lock(&sbi->fs_lock); | ||
615 | ino->flags |= AUTOFS_INF_PENDING; | ||
616 | spin_unlock(&sbi->fs_lock); | ||
617 | if (dentry->d_op && dentry->d_op->d_revalidate) | ||
618 | (dentry->d_op->d_revalidate)(dentry, nd); | ||
619 | mutex_lock(&dir->i_mutex); | ||
620 | } | ||
621 | |||
622 | /* | ||
623 | * If we are still pending, check if we had to handle | ||
624 | * a signal. If so we can force a restart.. | ||
625 | */ | ||
626 | if (ino->flags & AUTOFS_INF_PENDING) { | ||
627 | /* See if we were interrupted */ | ||
628 | if (signal_pending(current)) { | ||
629 | sigset_t *sigset = ¤t->pending.signal; | ||
630 | if (sigismember (sigset, SIGKILL) || | ||
631 | sigismember (sigset, SIGQUIT) || | ||
632 | sigismember (sigset, SIGINT)) { | ||
633 | if (active) | ||
634 | dput(active); | ||
635 | return ERR_PTR(-ERESTARTNOINTR); | ||
636 | } | ||
637 | } | ||
638 | if (!oz_mode) { | ||
639 | spin_lock(&sbi->fs_lock); | ||
640 | ino->flags &= ~AUTOFS_INF_PENDING; | ||
641 | spin_unlock(&sbi->fs_lock); | ||
642 | } | ||
643 | } | ||
644 | |||
645 | /* | ||
646 | * If this dentry is unhashed, then we shouldn't honour this | ||
647 | * lookup. Returning ENOENT here doesn't do the right thing | ||
648 | * for all system calls, but it should be OK for the operations | ||
649 | * we permit from an autofs. | ||
650 | */ | ||
651 | if (!oz_mode && d_unhashed(dentry)) { | ||
652 | /* | ||
653 | * A user space application can (and has done in the past) | ||
654 | * remove and re-create this directory during the callback. | ||
655 | * This can leave us with an unhashed dentry, but a | ||
656 | * successful mount! So we need to perform another | ||
657 | * cached lookup in case the dentry now exists. | ||
658 | */ | ||
659 | struct dentry *parent = dentry->d_parent; | ||
660 | struct dentry *new = d_lookup(parent, &dentry->d_name); | ||
661 | if (new != NULL) | ||
662 | dentry = new; | ||
663 | else | ||
664 | dentry = ERR_PTR(-ENOENT); | ||
665 | |||
666 | if (active) | ||
667 | dput(active); | ||
668 | |||
669 | return dentry; | ||
670 | } | ||
671 | |||
672 | if (active) | ||
673 | return active; | ||
674 | |||
675 | return NULL; | 524 | return NULL; |
676 | } | 525 | } |
677 | 526 | ||
@@ -683,6 +532,7 @@ static int autofs4_dir_symlink(struct inode *dir, | |||
683 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 532 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
684 | struct autofs_info *p_ino; | 533 | struct autofs_info *p_ino; |
685 | struct inode *inode; | 534 | struct inode *inode; |
535 | size_t size = strlen(symname); | ||
686 | char *cp; | 536 | char *cp; |
687 | 537 | ||
688 | DPRINTK("%s <- %.*s", symname, | 538 | DPRINTK("%s <- %.*s", symname, |
@@ -691,45 +541,35 @@ static int autofs4_dir_symlink(struct inode *dir, | |||
691 | if (!autofs4_oz_mode(sbi)) | 541 | if (!autofs4_oz_mode(sbi)) |
692 | return -EACCES; | 542 | return -EACCES; |
693 | 543 | ||
694 | ino = autofs4_init_ino(ino, sbi, S_IFLNK | 0555); | 544 | BUG_ON(!ino); |
695 | if (!ino) | 545 | |
696 | return -ENOMEM; | 546 | autofs4_clean_ino(ino); |
697 | 547 | ||
698 | autofs4_del_active(dentry); | 548 | autofs4_del_active(dentry); |
699 | 549 | ||
700 | ino->size = strlen(symname); | 550 | cp = kmalloc(size + 1, GFP_KERNEL); |
701 | cp = kmalloc(ino->size + 1, GFP_KERNEL); | 551 | if (!cp) |
702 | if (!cp) { | ||
703 | if (!dentry->d_fsdata) | ||
704 | kfree(ino); | ||
705 | return -ENOMEM; | 552 | return -ENOMEM; |
706 | } | ||
707 | 553 | ||
708 | strcpy(cp, symname); | 554 | strcpy(cp, symname); |
709 | 555 | ||
710 | inode = autofs4_get_inode(dir->i_sb, ino); | 556 | inode = autofs4_get_inode(dir->i_sb, S_IFLNK | 0555); |
711 | if (!inode) { | 557 | if (!inode) { |
712 | kfree(cp); | 558 | kfree(cp); |
713 | if (!dentry->d_fsdata) | 559 | if (!dentry->d_fsdata) |
714 | kfree(ino); | 560 | kfree(ino); |
715 | return -ENOMEM; | 561 | return -ENOMEM; |
716 | } | 562 | } |
563 | inode->i_private = cp; | ||
564 | inode->i_size = size; | ||
717 | d_add(dentry, inode); | 565 | d_add(dentry, inode); |
718 | 566 | ||
719 | if (dir == dir->i_sb->s_root->d_inode) | 567 | dget(dentry); |
720 | d_set_d_op(dentry, &autofs4_root_dentry_operations); | ||
721 | else | ||
722 | d_set_d_op(dentry, &autofs4_dentry_operations); | ||
723 | |||
724 | dentry->d_fsdata = ino; | ||
725 | ino->dentry = dget(dentry); | ||
726 | atomic_inc(&ino->count); | 568 | atomic_inc(&ino->count); |
727 | p_ino = autofs4_dentry_ino(dentry->d_parent); | 569 | p_ino = autofs4_dentry_ino(dentry->d_parent); |
728 | if (p_ino && dentry->d_parent != dentry) | 570 | if (p_ino && dentry->d_parent != dentry) |
729 | atomic_inc(&p_ino->count); | 571 | atomic_inc(&p_ino->count); |
730 | ino->inode = inode; | ||
731 | 572 | ||
732 | ino->u.symlink = cp; | ||
733 | dir->i_mtime = CURRENT_TIME; | 573 | dir->i_mtime = CURRENT_TIME; |
734 | 574 | ||
735 | return 0; | 575 | return 0; |
@@ -782,6 +622,58 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) | |||
782 | return 0; | 622 | return 0; |
783 | } | 623 | } |
784 | 624 | ||
625 | /* | ||
626 | * Version 4 of autofs provides a pseudo direct mount implementation | ||
627 | * that relies on directories at the leaves of a directory tree under | ||
628 | * an indirect mount to trigger mounts. To allow for this we need to | ||
629 | * set the DMANAGED_AUTOMOUNT and DMANAGED_TRANSIT flags on the leaves | ||
630 | * of the directory tree. There is no need to clear the automount flag | ||
631 | * following a mount or restore it after an expire because these mounts | ||
632 | * are always covered. However, it is neccessary to ensure that these | ||
633 | * flags are clear on non-empty directories to avoid unnecessary calls | ||
634 | * during path walks. | ||
635 | */ | ||
636 | static void autofs_set_leaf_automount_flags(struct dentry *dentry) | ||
637 | { | ||
638 | struct dentry *parent; | ||
639 | |||
640 | /* root and dentrys in the root are already handled */ | ||
641 | if (IS_ROOT(dentry->d_parent)) | ||
642 | return; | ||
643 | |||
644 | managed_dentry_set_managed(dentry); | ||
645 | |||
646 | parent = dentry->d_parent; | ||
647 | /* only consider parents below dentrys in the root */ | ||
648 | if (IS_ROOT(parent->d_parent)) | ||
649 | return; | ||
650 | managed_dentry_clear_managed(parent); | ||
651 | return; | ||
652 | } | ||
653 | |||
654 | static void autofs_clear_leaf_automount_flags(struct dentry *dentry) | ||
655 | { | ||
656 | struct list_head *d_child; | ||
657 | struct dentry *parent; | ||
658 | |||
659 | /* flags for dentrys in the root are handled elsewhere */ | ||
660 | if (IS_ROOT(dentry->d_parent)) | ||
661 | return; | ||
662 | |||
663 | managed_dentry_clear_managed(dentry); | ||
664 | |||
665 | parent = dentry->d_parent; | ||
666 | /* only consider parents below dentrys in the root */ | ||
667 | if (IS_ROOT(parent->d_parent)) | ||
668 | return; | ||
669 | d_child = &dentry->d_u.d_child; | ||
670 | /* Set parent managed if it's becoming empty */ | ||
671 | if (d_child->next == &parent->d_subdirs && | ||
672 | d_child->prev == &parent->d_subdirs) | ||
673 | managed_dentry_set_managed(parent); | ||
674 | return; | ||
675 | } | ||
676 | |||
785 | static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) | 677 | static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) |
786 | { | 678 | { |
787 | struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); | 679 | struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); |
@@ -809,6 +701,9 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) | |||
809 | spin_unlock(&dentry->d_lock); | 701 | spin_unlock(&dentry->d_lock); |
810 | spin_unlock(&autofs4_lock); | 702 | spin_unlock(&autofs4_lock); |
811 | 703 | ||
704 | if (sbi->version < 5) | ||
705 | autofs_clear_leaf_automount_flags(dentry); | ||
706 | |||
812 | if (atomic_dec_and_test(&ino->count)) { | 707 | if (atomic_dec_and_test(&ino->count)) { |
813 | p_ino = autofs4_dentry_ino(dentry->d_parent); | 708 | p_ino = autofs4_dentry_ino(dentry->d_parent); |
814 | if (p_ino && dentry->d_parent != dentry) | 709 | if (p_ino && dentry->d_parent != dentry) |
@@ -837,32 +732,25 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
837 | DPRINTK("dentry %p, creating %.*s", | 732 | DPRINTK("dentry %p, creating %.*s", |
838 | dentry, dentry->d_name.len, dentry->d_name.name); | 733 | dentry, dentry->d_name.len, dentry->d_name.name); |
839 | 734 | ||
840 | ino = autofs4_init_ino(ino, sbi, S_IFDIR | 0555); | 735 | BUG_ON(!ino); |
841 | if (!ino) | 736 | |
842 | return -ENOMEM; | 737 | autofs4_clean_ino(ino); |
843 | 738 | ||
844 | autofs4_del_active(dentry); | 739 | autofs4_del_active(dentry); |
845 | 740 | ||
846 | inode = autofs4_get_inode(dir->i_sb, ino); | 741 | inode = autofs4_get_inode(dir->i_sb, S_IFDIR | 0555); |
847 | if (!inode) { | 742 | if (!inode) |
848 | if (!dentry->d_fsdata) | ||
849 | kfree(ino); | ||
850 | return -ENOMEM; | 743 | return -ENOMEM; |
851 | } | ||
852 | d_add(dentry, inode); | 744 | d_add(dentry, inode); |
853 | 745 | ||
854 | if (dir == dir->i_sb->s_root->d_inode) | 746 | if (sbi->version < 5) |
855 | d_set_d_op(dentry, &autofs4_root_dentry_operations); | 747 | autofs_set_leaf_automount_flags(dentry); |
856 | else | ||
857 | d_set_d_op(dentry, &autofs4_dentry_operations); | ||
858 | 748 | ||
859 | dentry->d_fsdata = ino; | 749 | dget(dentry); |
860 | ino->dentry = dget(dentry); | ||
861 | atomic_inc(&ino->count); | 750 | atomic_inc(&ino->count); |
862 | p_ino = autofs4_dentry_ino(dentry->d_parent); | 751 | p_ino = autofs4_dentry_ino(dentry->d_parent); |
863 | if (p_ino && dentry->d_parent != dentry) | 752 | if (p_ino && dentry->d_parent != dentry) |
864 | atomic_inc(&p_ino->count); | 753 | atomic_inc(&p_ino->count); |
865 | ino->inode = inode; | ||
866 | inc_nlink(dir); | 754 | inc_nlink(dir); |
867 | dir->i_mtime = CURRENT_TIME; | 755 | dir->i_mtime = CURRENT_TIME; |
868 | 756 | ||
@@ -944,8 +832,7 @@ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p) | |||
944 | int is_autofs4_dentry(struct dentry *dentry) | 832 | int is_autofs4_dentry(struct dentry *dentry) |
945 | { | 833 | { |
946 | return dentry && dentry->d_inode && | 834 | return dentry && dentry->d_inode && |
947 | (dentry->d_op == &autofs4_root_dentry_operations || | 835 | dentry->d_op == &autofs4_dentry_operations && |
948 | dentry->d_op == &autofs4_dentry_operations) && | ||
949 | dentry->d_fsdata != NULL; | 836 | dentry->d_fsdata != NULL; |
950 | } | 837 | } |
951 | 838 | ||
diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c index b4ea82934d2e..f27c094a1919 100644 --- a/fs/autofs4/symlink.c +++ b/fs/autofs4/symlink.c | |||
@@ -14,8 +14,7 @@ | |||
14 | 14 | ||
15 | static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) | 15 | static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) |
16 | { | 16 | { |
17 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 17 | nd_set_link(nd, dentry->d_inode->i_private); |
18 | nd_set_link(nd, (char *)ino->u.symlink); | ||
19 | return NULL; | 18 | return NULL; |
20 | } | 19 | } |
21 | 20 | ||
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index c5f8459c905e..56010056b2e6 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c | |||
@@ -309,6 +309,9 @@ static int validate_request(struct autofs_wait_queue **wait, | |||
309 | * completed while we waited on the mutex ... | 309 | * completed while we waited on the mutex ... |
310 | */ | 310 | */ |
311 | if (notify == NFY_MOUNT) { | 311 | if (notify == NFY_MOUNT) { |
312 | struct dentry *new = NULL; | ||
313 | int valid = 1; | ||
314 | |||
312 | /* | 315 | /* |
313 | * If the dentry was successfully mounted while we slept | 316 | * If the dentry was successfully mounted while we slept |
314 | * on the wait queue mutex we can return success. If it | 317 | * on the wait queue mutex we can return success. If it |
@@ -316,8 +319,20 @@ static int validate_request(struct autofs_wait_queue **wait, | |||
316 | * a multi-mount with no mount at it's base) we can | 319 | * a multi-mount with no mount at it's base) we can |
317 | * continue on and create a new request. | 320 | * continue on and create a new request. |
318 | */ | 321 | */ |
322 | if (!IS_ROOT(dentry)) { | ||
323 | if (dentry->d_inode && d_unhashed(dentry)) { | ||
324 | struct dentry *parent = dentry->d_parent; | ||
325 | new = d_lookup(parent, &dentry->d_name); | ||
326 | if (new) | ||
327 | dentry = new; | ||
328 | } | ||
329 | } | ||
319 | if (have_submounts(dentry)) | 330 | if (have_submounts(dentry)) |
320 | return 0; | 331 | valid = 0; |
332 | |||
333 | if (new) | ||
334 | dput(new); | ||
335 | return valid; | ||
321 | } | 336 | } |
322 | 337 | ||
323 | return 1; | 338 | return 1; |
diff --git a/fs/block_dev.c b/fs/block_dev.c index fe3f59c14a02..333a7bb4cb9c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -432,6 +432,9 @@ static void init_once(void *foo) | |||
432 | mutex_init(&bdev->bd_mutex); | 432 | mutex_init(&bdev->bd_mutex); |
433 | INIT_LIST_HEAD(&bdev->bd_inodes); | 433 | INIT_LIST_HEAD(&bdev->bd_inodes); |
434 | INIT_LIST_HEAD(&bdev->bd_list); | 434 | INIT_LIST_HEAD(&bdev->bd_list); |
435 | #ifdef CONFIG_SYSFS | ||
436 | INIT_LIST_HEAD(&bdev->bd_holder_disks); | ||
437 | #endif | ||
435 | inode_init_once(&ei->vfs_inode); | 438 | inode_init_once(&ei->vfs_inode); |
436 | /* Initialize mutex for freeze. */ | 439 | /* Initialize mutex for freeze. */ |
437 | mutex_init(&bdev->bd_fsfreeze_mutex); | 440 | mutex_init(&bdev->bd_fsfreeze_mutex); |
@@ -779,6 +782,23 @@ static struct block_device *bd_start_claiming(struct block_device *bdev, | |||
779 | } | 782 | } |
780 | 783 | ||
781 | #ifdef CONFIG_SYSFS | 784 | #ifdef CONFIG_SYSFS |
785 | struct bd_holder_disk { | ||
786 | struct list_head list; | ||
787 | struct gendisk *disk; | ||
788 | int refcnt; | ||
789 | }; | ||
790 | |||
791 | static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev, | ||
792 | struct gendisk *disk) | ||
793 | { | ||
794 | struct bd_holder_disk *holder; | ||
795 | |||
796 | list_for_each_entry(holder, &bdev->bd_holder_disks, list) | ||
797 | if (holder->disk == disk) | ||
798 | return holder; | ||
799 | return NULL; | ||
800 | } | ||
801 | |||
782 | static int add_symlink(struct kobject *from, struct kobject *to) | 802 | static int add_symlink(struct kobject *from, struct kobject *to) |
783 | { | 803 | { |
784 | return sysfs_create_link(from, to, kobject_name(to)); | 804 | return sysfs_create_link(from, to, kobject_name(to)); |
@@ -794,6 +814,8 @@ static void del_symlink(struct kobject *from, struct kobject *to) | |||
794 | * @bdev: the claimed slave bdev | 814 | * @bdev: the claimed slave bdev |
795 | * @disk: the holding disk | 815 | * @disk: the holding disk |
796 | * | 816 | * |
817 | * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT. | ||
818 | * | ||
797 | * This functions creates the following sysfs symlinks. | 819 | * This functions creates the following sysfs symlinks. |
798 | * | 820 | * |
799 | * - from "slaves" directory of the holder @disk to the claimed @bdev | 821 | * - from "slaves" directory of the holder @disk to the claimed @bdev |
@@ -817,47 +839,83 @@ static void del_symlink(struct kobject *from, struct kobject *to) | |||
817 | */ | 839 | */ |
818 | int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) | 840 | int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) |
819 | { | 841 | { |
842 | struct bd_holder_disk *holder; | ||
820 | int ret = 0; | 843 | int ret = 0; |
821 | 844 | ||
822 | mutex_lock(&bdev->bd_mutex); | 845 | mutex_lock(&bdev->bd_mutex); |
823 | 846 | ||
824 | WARN_ON_ONCE(!bdev->bd_holder || bdev->bd_holder_disk); | 847 | WARN_ON_ONCE(!bdev->bd_holder); |
825 | 848 | ||
826 | /* FIXME: remove the following once add_disk() handles errors */ | 849 | /* FIXME: remove the following once add_disk() handles errors */ |
827 | if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir)) | 850 | if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir)) |
828 | goto out_unlock; | 851 | goto out_unlock; |
829 | 852 | ||
830 | ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); | 853 | holder = bd_find_holder_disk(bdev, disk); |
831 | if (ret) | 854 | if (holder) { |
855 | holder->refcnt++; | ||
832 | goto out_unlock; | 856 | goto out_unlock; |
857 | } | ||
833 | 858 | ||
834 | ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); | 859 | holder = kzalloc(sizeof(*holder), GFP_KERNEL); |
835 | if (ret) { | 860 | if (!holder) { |
836 | del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); | 861 | ret = -ENOMEM; |
837 | goto out_unlock; | 862 | goto out_unlock; |
838 | } | 863 | } |
839 | 864 | ||
840 | bdev->bd_holder_disk = disk; | 865 | INIT_LIST_HEAD(&holder->list); |
866 | holder->disk = disk; | ||
867 | holder->refcnt = 1; | ||
868 | |||
869 | ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); | ||
870 | if (ret) | ||
871 | goto out_free; | ||
872 | |||
873 | ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); | ||
874 | if (ret) | ||
875 | goto out_del; | ||
876 | |||
877 | list_add(&holder->list, &bdev->bd_holder_disks); | ||
878 | goto out_unlock; | ||
879 | |||
880 | out_del: | ||
881 | del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); | ||
882 | out_free: | ||
883 | kfree(holder); | ||
841 | out_unlock: | 884 | out_unlock: |
842 | mutex_unlock(&bdev->bd_mutex); | 885 | mutex_unlock(&bdev->bd_mutex); |
843 | return ret; | 886 | return ret; |
844 | } | 887 | } |
845 | EXPORT_SYMBOL_GPL(bd_link_disk_holder); | 888 | EXPORT_SYMBOL_GPL(bd_link_disk_holder); |
846 | 889 | ||
847 | static void bd_unlink_disk_holder(struct block_device *bdev) | 890 | /** |
891 | * bd_unlink_disk_holder - destroy symlinks created by bd_link_disk_holder() | ||
892 | * @bdev: the calimed slave bdev | ||
893 | * @disk: the holding disk | ||
894 | * | ||
895 | * DON'T USE THIS UNLESS YOU'RE ALREADY USING IT. | ||
896 | * | ||
897 | * CONTEXT: | ||
898 | * Might sleep. | ||
899 | */ | ||
900 | void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) | ||
848 | { | 901 | { |
849 | struct gendisk *disk = bdev->bd_holder_disk; | 902 | struct bd_holder_disk *holder; |
850 | 903 | ||
851 | bdev->bd_holder_disk = NULL; | 904 | mutex_lock(&bdev->bd_mutex); |
852 | if (!disk) | ||
853 | return; | ||
854 | 905 | ||
855 | del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); | 906 | holder = bd_find_holder_disk(bdev, disk); |
856 | del_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); | 907 | |
908 | if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) { | ||
909 | del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); | ||
910 | del_symlink(bdev->bd_part->holder_dir, | ||
911 | &disk_to_dev(disk)->kobj); | ||
912 | list_del_init(&holder->list); | ||
913 | kfree(holder); | ||
914 | } | ||
915 | |||
916 | mutex_unlock(&bdev->bd_mutex); | ||
857 | } | 917 | } |
858 | #else | 918 | EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); |
859 | static inline void bd_unlink_disk_holder(struct block_device *bdev) | ||
860 | { } | ||
861 | #endif | 919 | #endif |
862 | 920 | ||
863 | /** | 921 | /** |
@@ -1380,7 +1438,6 @@ int blkdev_put(struct block_device *bdev, fmode_t mode) | |||
1380 | * unblock evpoll if it was a write holder. | 1438 | * unblock evpoll if it was a write holder. |
1381 | */ | 1439 | */ |
1382 | if (bdev_free) { | 1440 | if (bdev_free) { |
1383 | bd_unlink_disk_holder(bdev); | ||
1384 | if (bdev->bd_write_holder) { | 1441 | if (bdev->bd_write_holder) { |
1385 | disk_unblock_events(bdev->bd_disk); | 1442 | disk_unblock_events(bdev->bd_disk); |
1386 | bdev->bd_write_holder = false; | 1443 | bdev->bd_write_holder = false; |
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 7bb3c020e570..ecb9fd3be143 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig | |||
@@ -4,6 +4,8 @@ config BTRFS_FS | |||
4 | select LIBCRC32C | 4 | select LIBCRC32C |
5 | select ZLIB_INFLATE | 5 | select ZLIB_INFLATE |
6 | select ZLIB_DEFLATE | 6 | select ZLIB_DEFLATE |
7 | select LZO_COMPRESS | ||
8 | select LZO_DECOMPRESS | ||
7 | help | 9 | help |
8 | Btrfs is a new filesystem with extents, writable snapshotting, | 10 | Btrfs is a new filesystem with extents, writable snapshotting, |
9 | support for multiple devices and many more features. | 11 | support for multiple devices and many more features. |
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index a35eb36b32fd..31610ea73aec 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -6,5 +6,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
6 | transaction.o inode.o file.o tree-defrag.o \ | 6 | transaction.o inode.o file.o tree-defrag.o \ |
7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ | 7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ |
8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
9 | export.o tree-log.o acl.o free-space-cache.o zlib.o \ | 9 | export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ |
10 | compression.o delayed-ref.o relocation.o | 10 | compression.o delayed-ref.o relocation.o |
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 6ae2c8cac9d5..9c949348510b 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -37,6 +37,9 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | |||
37 | char *value = NULL; | 37 | char *value = NULL; |
38 | struct posix_acl *acl; | 38 | struct posix_acl *acl; |
39 | 39 | ||
40 | if (!IS_POSIXACL(inode)) | ||
41 | return NULL; | ||
42 | |||
40 | acl = get_cached_acl(inode, type); | 43 | acl = get_cached_acl(inode, type); |
41 | if (acl != ACL_NOT_CACHED) | 44 | if (acl != ACL_NOT_CACHED) |
42 | return acl; | 45 | return acl; |
@@ -60,8 +63,10 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | |||
60 | size = __btrfs_getxattr(inode, name, value, size); | 63 | size = __btrfs_getxattr(inode, name, value, size); |
61 | if (size > 0) { | 64 | if (size > 0) { |
62 | acl = posix_acl_from_xattr(value, size); | 65 | acl = posix_acl_from_xattr(value, size); |
63 | if (IS_ERR(acl)) | 66 | if (IS_ERR(acl)) { |
67 | kfree(value); | ||
64 | return acl; | 68 | return acl; |
69 | } | ||
65 | set_cached_acl(inode, type, acl); | 70 | set_cached_acl(inode, type, acl); |
66 | } | 71 | } |
67 | kfree(value); | 72 | kfree(value); |
@@ -82,6 +87,9 @@ static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name, | |||
82 | struct posix_acl *acl; | 87 | struct posix_acl *acl; |
83 | int ret = 0; | 88 | int ret = 0; |
84 | 89 | ||
90 | if (!IS_POSIXACL(dentry->d_inode)) | ||
91 | return -EOPNOTSUPP; | ||
92 | |||
85 | acl = btrfs_get_acl(dentry->d_inode, type); | 93 | acl = btrfs_get_acl(dentry->d_inode, type); |
86 | 94 | ||
87 | if (IS_ERR(acl)) | 95 | if (IS_ERR(acl)) |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 6ad63f17eca0..ccc991c542df 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -157,7 +157,7 @@ struct btrfs_inode { | |||
157 | /* | 157 | /* |
158 | * always compress this one file | 158 | * always compress this one file |
159 | */ | 159 | */ |
160 | unsigned force_compress:1; | 160 | unsigned force_compress:4; |
161 | 161 | ||
162 | struct inode vfs_inode; | 162 | struct inode vfs_inode; |
163 | }; | 163 | }; |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b50bc4bd5c56..4d2110eafe29 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -62,6 +62,9 @@ struct compressed_bio { | |||
62 | /* number of bytes on disk */ | 62 | /* number of bytes on disk */ |
63 | unsigned long compressed_len; | 63 | unsigned long compressed_len; |
64 | 64 | ||
65 | /* the compression algorithm for this bio */ | ||
66 | int compress_type; | ||
67 | |||
65 | /* number of compressed pages in the array */ | 68 | /* number of compressed pages in the array */ |
66 | unsigned long nr_pages; | 69 | unsigned long nr_pages; |
67 | 70 | ||
@@ -173,11 +176,12 @@ static void end_compressed_bio_read(struct bio *bio, int err) | |||
173 | /* ok, we're the last bio for this extent, lets start | 176 | /* ok, we're the last bio for this extent, lets start |
174 | * the decompression. | 177 | * the decompression. |
175 | */ | 178 | */ |
176 | ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, | 179 | ret = btrfs_decompress_biovec(cb->compress_type, |
177 | cb->start, | 180 | cb->compressed_pages, |
178 | cb->orig_bio->bi_io_vec, | 181 | cb->start, |
179 | cb->orig_bio->bi_vcnt, | 182 | cb->orig_bio->bi_io_vec, |
180 | cb->compressed_len); | 183 | cb->orig_bio->bi_vcnt, |
184 | cb->compressed_len); | ||
181 | csum_failed: | 185 | csum_failed: |
182 | if (ret) | 186 | if (ret) |
183 | cb->errors = 1; | 187 | cb->errors = 1; |
@@ -558,7 +562,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
558 | u64 em_len; | 562 | u64 em_len; |
559 | u64 em_start; | 563 | u64 em_start; |
560 | struct extent_map *em; | 564 | struct extent_map *em; |
561 | int ret; | 565 | int ret = -ENOMEM; |
562 | u32 *sums; | 566 | u32 *sums; |
563 | 567 | ||
564 | tree = &BTRFS_I(inode)->io_tree; | 568 | tree = &BTRFS_I(inode)->io_tree; |
@@ -573,6 +577,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
573 | 577 | ||
574 | compressed_len = em->block_len; | 578 | compressed_len = em->block_len; |
575 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); | 579 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); |
580 | if (!cb) | ||
581 | goto out; | ||
582 | |||
576 | atomic_set(&cb->pending_bios, 0); | 583 | atomic_set(&cb->pending_bios, 0); |
577 | cb->errors = 0; | 584 | cb->errors = 0; |
578 | cb->inode = inode; | 585 | cb->inode = inode; |
@@ -588,17 +595,23 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
588 | 595 | ||
589 | cb->len = uncompressed_len; | 596 | cb->len = uncompressed_len; |
590 | cb->compressed_len = compressed_len; | 597 | cb->compressed_len = compressed_len; |
598 | cb->compress_type = extent_compress_type(bio_flags); | ||
591 | cb->orig_bio = bio; | 599 | cb->orig_bio = bio; |
592 | 600 | ||
593 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / | 601 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / |
594 | PAGE_CACHE_SIZE; | 602 | PAGE_CACHE_SIZE; |
595 | cb->compressed_pages = kmalloc(sizeof(struct page *) * nr_pages, | 603 | cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages, |
596 | GFP_NOFS); | 604 | GFP_NOFS); |
605 | if (!cb->compressed_pages) | ||
606 | goto fail1; | ||
607 | |||
597 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 608 | bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
598 | 609 | ||
599 | for (page_index = 0; page_index < nr_pages; page_index++) { | 610 | for (page_index = 0; page_index < nr_pages; page_index++) { |
600 | cb->compressed_pages[page_index] = alloc_page(GFP_NOFS | | 611 | cb->compressed_pages[page_index] = alloc_page(GFP_NOFS | |
601 | __GFP_HIGHMEM); | 612 | __GFP_HIGHMEM); |
613 | if (!cb->compressed_pages[page_index]) | ||
614 | goto fail2; | ||
602 | } | 615 | } |
603 | cb->nr_pages = nr_pages; | 616 | cb->nr_pages = nr_pages; |
604 | 617 | ||
@@ -609,6 +622,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
609 | cb->len = uncompressed_len; | 622 | cb->len = uncompressed_len; |
610 | 623 | ||
611 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); | 624 | comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); |
625 | if (!comp_bio) | ||
626 | goto fail2; | ||
612 | comp_bio->bi_private = cb; | 627 | comp_bio->bi_private = cb; |
613 | comp_bio->bi_end_io = end_compressed_bio_read; | 628 | comp_bio->bi_end_io = end_compressed_bio_read; |
614 | atomic_inc(&cb->pending_bios); | 629 | atomic_inc(&cb->pending_bios); |
@@ -676,4 +691,329 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
676 | 691 | ||
677 | bio_put(comp_bio); | 692 | bio_put(comp_bio); |
678 | return 0; | 693 | return 0; |
694 | |||
695 | fail2: | ||
696 | for (page_index = 0; page_index < nr_pages; page_index++) | ||
697 | free_page((unsigned long)cb->compressed_pages[page_index]); | ||
698 | |||
699 | kfree(cb->compressed_pages); | ||
700 | fail1: | ||
701 | kfree(cb); | ||
702 | out: | ||
703 | free_extent_map(em); | ||
704 | return ret; | ||
705 | } | ||
706 | |||
707 | static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES]; | ||
708 | static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES]; | ||
709 | static int comp_num_workspace[BTRFS_COMPRESS_TYPES]; | ||
710 | static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES]; | ||
711 | static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; | ||
712 | |||
713 | struct btrfs_compress_op *btrfs_compress_op[] = { | ||
714 | &btrfs_zlib_compress, | ||
715 | &btrfs_lzo_compress, | ||
716 | }; | ||
717 | |||
718 | int __init btrfs_init_compress(void) | ||
719 | { | ||
720 | int i; | ||
721 | |||
722 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { | ||
723 | INIT_LIST_HEAD(&comp_idle_workspace[i]); | ||
724 | spin_lock_init(&comp_workspace_lock[i]); | ||
725 | atomic_set(&comp_alloc_workspace[i], 0); | ||
726 | init_waitqueue_head(&comp_workspace_wait[i]); | ||
727 | } | ||
728 | return 0; | ||
729 | } | ||
730 | |||
731 | /* | ||
732 | * this finds an available workspace or allocates a new one | ||
733 | * ERR_PTR is returned if things go bad. | ||
734 | */ | ||
735 | static struct list_head *find_workspace(int type) | ||
736 | { | ||
737 | struct list_head *workspace; | ||
738 | int cpus = num_online_cpus(); | ||
739 | int idx = type - 1; | ||
740 | |||
741 | struct list_head *idle_workspace = &comp_idle_workspace[idx]; | ||
742 | spinlock_t *workspace_lock = &comp_workspace_lock[idx]; | ||
743 | atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; | ||
744 | wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; | ||
745 | int *num_workspace = &comp_num_workspace[idx]; | ||
746 | again: | ||
747 | spin_lock(workspace_lock); | ||
748 | if (!list_empty(idle_workspace)) { | ||
749 | workspace = idle_workspace->next; | ||
750 | list_del(workspace); | ||
751 | (*num_workspace)--; | ||
752 | spin_unlock(workspace_lock); | ||
753 | return workspace; | ||
754 | |||
755 | } | ||
756 | if (atomic_read(alloc_workspace) > cpus) { | ||
757 | DEFINE_WAIT(wait); | ||
758 | |||
759 | spin_unlock(workspace_lock); | ||
760 | prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
761 | if (atomic_read(alloc_workspace) > cpus && !*num_workspace) | ||
762 | schedule(); | ||
763 | finish_wait(workspace_wait, &wait); | ||
764 | goto again; | ||
765 | } | ||
766 | atomic_inc(alloc_workspace); | ||
767 | spin_unlock(workspace_lock); | ||
768 | |||
769 | workspace = btrfs_compress_op[idx]->alloc_workspace(); | ||
770 | if (IS_ERR(workspace)) { | ||
771 | atomic_dec(alloc_workspace); | ||
772 | wake_up(workspace_wait); | ||
773 | } | ||
774 | return workspace; | ||
775 | } | ||
776 | |||
777 | /* | ||
778 | * put a workspace struct back on the list or free it if we have enough | ||
779 | * idle ones sitting around | ||
780 | */ | ||
781 | static void free_workspace(int type, struct list_head *workspace) | ||
782 | { | ||
783 | int idx = type - 1; | ||
784 | struct list_head *idle_workspace = &comp_idle_workspace[idx]; | ||
785 | spinlock_t *workspace_lock = &comp_workspace_lock[idx]; | ||
786 | atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; | ||
787 | wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; | ||
788 | int *num_workspace = &comp_num_workspace[idx]; | ||
789 | |||
790 | spin_lock(workspace_lock); | ||
791 | if (*num_workspace < num_online_cpus()) { | ||
792 | list_add_tail(workspace, idle_workspace); | ||
793 | (*num_workspace)++; | ||
794 | spin_unlock(workspace_lock); | ||
795 | goto wake; | ||
796 | } | ||
797 | spin_unlock(workspace_lock); | ||
798 | |||
799 | btrfs_compress_op[idx]->free_workspace(workspace); | ||
800 | atomic_dec(alloc_workspace); | ||
801 | wake: | ||
802 | if (waitqueue_active(workspace_wait)) | ||
803 | wake_up(workspace_wait); | ||
804 | } | ||
805 | |||
806 | /* | ||
807 | * cleanup function for module exit | ||
808 | */ | ||
809 | static void free_workspaces(void) | ||
810 | { | ||
811 | struct list_head *workspace; | ||
812 | int i; | ||
813 | |||
814 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { | ||
815 | while (!list_empty(&comp_idle_workspace[i])) { | ||
816 | workspace = comp_idle_workspace[i].next; | ||
817 | list_del(workspace); | ||
818 | btrfs_compress_op[i]->free_workspace(workspace); | ||
819 | atomic_dec(&comp_alloc_workspace[i]); | ||
820 | } | ||
821 | } | ||
822 | } | ||
823 | |||
824 | /* | ||
825 | * given an address space and start/len, compress the bytes. | ||
826 | * | ||
827 | * pages are allocated to hold the compressed result and stored | ||
828 | * in 'pages' | ||
829 | * | ||
830 | * out_pages is used to return the number of pages allocated. There | ||
831 | * may be pages allocated even if we return an error | ||
832 | * | ||
833 | * total_in is used to return the number of bytes actually read. It | ||
834 | * may be smaller then len if we had to exit early because we | ||
835 | * ran out of room in the pages array or because we cross the | ||
836 | * max_out threshold. | ||
837 | * | ||
838 | * total_out is used to return the total number of compressed bytes | ||
839 | * | ||
840 | * max_out tells us the max number of bytes that we're allowed to | ||
841 | * stuff into pages | ||
842 | */ | ||
843 | int btrfs_compress_pages(int type, struct address_space *mapping, | ||
844 | u64 start, unsigned long len, | ||
845 | struct page **pages, | ||
846 | unsigned long nr_dest_pages, | ||
847 | unsigned long *out_pages, | ||
848 | unsigned long *total_in, | ||
849 | unsigned long *total_out, | ||
850 | unsigned long max_out) | ||
851 | { | ||
852 | struct list_head *workspace; | ||
853 | int ret; | ||
854 | |||
855 | workspace = find_workspace(type); | ||
856 | if (IS_ERR(workspace)) | ||
857 | return -1; | ||
858 | |||
859 | ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, | ||
860 | start, len, pages, | ||
861 | nr_dest_pages, out_pages, | ||
862 | total_in, total_out, | ||
863 | max_out); | ||
864 | free_workspace(type, workspace); | ||
865 | return ret; | ||
866 | } | ||
867 | |||
868 | /* | ||
869 | * pages_in is an array of pages with compressed data. | ||
870 | * | ||
871 | * disk_start is the starting logical offset of this array in the file | ||
872 | * | ||
873 | * bvec is a bio_vec of pages from the file that we want to decompress into | ||
874 | * | ||
875 | * vcnt is the count of pages in the biovec | ||
876 | * | ||
877 | * srclen is the number of bytes in pages_in | ||
878 | * | ||
879 | * The basic idea is that we have a bio that was created by readpages. | ||
880 | * The pages in the bio are for the uncompressed data, and they may not | ||
881 | * be contiguous. They all correspond to the range of bytes covered by | ||
882 | * the compressed extent. | ||
883 | */ | ||
884 | int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, | ||
885 | struct bio_vec *bvec, int vcnt, size_t srclen) | ||
886 | { | ||
887 | struct list_head *workspace; | ||
888 | int ret; | ||
889 | |||
890 | workspace = find_workspace(type); | ||
891 | if (IS_ERR(workspace)) | ||
892 | return -ENOMEM; | ||
893 | |||
894 | ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, | ||
895 | disk_start, | ||
896 | bvec, vcnt, srclen); | ||
897 | free_workspace(type, workspace); | ||
898 | return ret; | ||
899 | } | ||
900 | |||
901 | /* | ||
902 | * a less complex decompression routine. Our compressed data fits in a | ||
903 | * single page, and we want to read a single page out of it. | ||
904 | * start_byte tells us the offset into the compressed data we're interested in | ||
905 | */ | ||
906 | int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, | ||
907 | unsigned long start_byte, size_t srclen, size_t destlen) | ||
908 | { | ||
909 | struct list_head *workspace; | ||
910 | int ret; | ||
911 | |||
912 | workspace = find_workspace(type); | ||
913 | if (IS_ERR(workspace)) | ||
914 | return -ENOMEM; | ||
915 | |||
916 | ret = btrfs_compress_op[type-1]->decompress(workspace, data_in, | ||
917 | dest_page, start_byte, | ||
918 | srclen, destlen); | ||
919 | |||
920 | free_workspace(type, workspace); | ||
921 | return ret; | ||
922 | } | ||
923 | |||
924 | void btrfs_exit_compress(void) | ||
925 | { | ||
926 | free_workspaces(); | ||
927 | } | ||
928 | |||
929 | /* | ||
930 | * Copy uncompressed data from working buffer to pages. | ||
931 | * | ||
932 | * buf_start is the byte offset we're of the start of our workspace buffer. | ||
933 | * | ||
934 | * total_out is the last byte of the buffer | ||
935 | */ | ||
936 | int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, | ||
937 | unsigned long total_out, u64 disk_start, | ||
938 | struct bio_vec *bvec, int vcnt, | ||
939 | unsigned long *page_index, | ||
940 | unsigned long *pg_offset) | ||
941 | { | ||
942 | unsigned long buf_offset; | ||
943 | unsigned long current_buf_start; | ||
944 | unsigned long start_byte; | ||
945 | unsigned long working_bytes = total_out - buf_start; | ||
946 | unsigned long bytes; | ||
947 | char *kaddr; | ||
948 | struct page *page_out = bvec[*page_index].bv_page; | ||
949 | |||
950 | /* | ||
951 | * start byte is the first byte of the page we're currently | ||
952 | * copying into relative to the start of the compressed data. | ||
953 | */ | ||
954 | start_byte = page_offset(page_out) - disk_start; | ||
955 | |||
956 | /* we haven't yet hit data corresponding to this page */ | ||
957 | if (total_out <= start_byte) | ||
958 | return 1; | ||
959 | |||
960 | /* | ||
961 | * the start of the data we care about is offset into | ||
962 | * the middle of our working buffer | ||
963 | */ | ||
964 | if (total_out > start_byte && buf_start < start_byte) { | ||
965 | buf_offset = start_byte - buf_start; | ||
966 | working_bytes -= buf_offset; | ||
967 | } else { | ||
968 | buf_offset = 0; | ||
969 | } | ||
970 | current_buf_start = buf_start; | ||
971 | |||
972 | /* copy bytes from the working buffer into the pages */ | ||
973 | while (working_bytes > 0) { | ||
974 | bytes = min(PAGE_CACHE_SIZE - *pg_offset, | ||
975 | PAGE_CACHE_SIZE - buf_offset); | ||
976 | bytes = min(bytes, working_bytes); | ||
977 | kaddr = kmap_atomic(page_out, KM_USER0); | ||
978 | memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); | ||
979 | kunmap_atomic(kaddr, KM_USER0); | ||
980 | flush_dcache_page(page_out); | ||
981 | |||
982 | *pg_offset += bytes; | ||
983 | buf_offset += bytes; | ||
984 | working_bytes -= bytes; | ||
985 | current_buf_start += bytes; | ||
986 | |||
987 | /* check if we need to pick another page */ | ||
988 | if (*pg_offset == PAGE_CACHE_SIZE) { | ||
989 | (*page_index)++; | ||
990 | if (*page_index >= vcnt) | ||
991 | return 0; | ||
992 | |||
993 | page_out = bvec[*page_index].bv_page; | ||
994 | *pg_offset = 0; | ||
995 | start_byte = page_offset(page_out) - disk_start; | ||
996 | |||
997 | /* | ||
998 | * make sure our new page is covered by this | ||
999 | * working buffer | ||
1000 | */ | ||
1001 | if (total_out <= start_byte) | ||
1002 | return 1; | ||
1003 | |||
1004 | /* | ||
1005 | * the next page in the biovec might not be adjacent | ||
1006 | * to the last page, but it might still be found | ||
1007 | * inside this working buffer. bump our offset pointer | ||
1008 | */ | ||
1009 | if (total_out > start_byte && | ||
1010 | current_buf_start < start_byte) { | ||
1011 | buf_offset = start_byte - buf_start; | ||
1012 | working_bytes = total_out - start_byte; | ||
1013 | current_buf_start = buf_start + buf_offset; | ||
1014 | } | ||
1015 | } | ||
1016 | } | ||
1017 | |||
1018 | return 1; | ||
679 | } | 1019 | } |
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 421f5b4aa715..51000174b9d7 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h | |||
@@ -19,24 +19,27 @@ | |||
19 | #ifndef __BTRFS_COMPRESSION_ | 19 | #ifndef __BTRFS_COMPRESSION_ |
20 | #define __BTRFS_COMPRESSION_ | 20 | #define __BTRFS_COMPRESSION_ |
21 | 21 | ||
22 | int btrfs_zlib_decompress(unsigned char *data_in, | 22 | int btrfs_init_compress(void); |
23 | struct page *dest_page, | 23 | void btrfs_exit_compress(void); |
24 | unsigned long start_byte, | 24 | |
25 | size_t srclen, size_t destlen); | 25 | int btrfs_compress_pages(int type, struct address_space *mapping, |
26 | int btrfs_zlib_compress_pages(struct address_space *mapping, | 26 | u64 start, unsigned long len, |
27 | u64 start, unsigned long len, | 27 | struct page **pages, |
28 | struct page **pages, | 28 | unsigned long nr_dest_pages, |
29 | unsigned long nr_dest_pages, | 29 | unsigned long *out_pages, |
30 | unsigned long *out_pages, | 30 | unsigned long *total_in, |
31 | unsigned long *total_in, | 31 | unsigned long *total_out, |
32 | unsigned long *total_out, | 32 | unsigned long max_out); |
33 | unsigned long max_out); | 33 | int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, |
34 | int btrfs_zlib_decompress_biovec(struct page **pages_in, | 34 | struct bio_vec *bvec, int vcnt, size_t srclen); |
35 | u64 disk_start, | 35 | int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, |
36 | struct bio_vec *bvec, | 36 | unsigned long start_byte, size_t srclen, size_t destlen); |
37 | int vcnt, | 37 | int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, |
38 | size_t srclen); | 38 | unsigned long total_out, u64 disk_start, |
39 | void btrfs_zlib_exit(void); | 39 | struct bio_vec *bvec, int vcnt, |
40 | unsigned long *page_index, | ||
41 | unsigned long *pg_offset); | ||
42 | |||
40 | int btrfs_submit_compressed_write(struct inode *inode, u64 start, | 43 | int btrfs_submit_compressed_write(struct inode *inode, u64 start, |
41 | unsigned long len, u64 disk_start, | 44 | unsigned long len, u64 disk_start, |
42 | unsigned long compressed_len, | 45 | unsigned long compressed_len, |
@@ -44,4 +47,37 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
44 | unsigned long nr_pages); | 47 | unsigned long nr_pages); |
45 | int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | 48 | int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, |
46 | int mirror_num, unsigned long bio_flags); | 49 | int mirror_num, unsigned long bio_flags); |
50 | |||
51 | struct btrfs_compress_op { | ||
52 | struct list_head *(*alloc_workspace)(void); | ||
53 | |||
54 | void (*free_workspace)(struct list_head *workspace); | ||
55 | |||
56 | int (*compress_pages)(struct list_head *workspace, | ||
57 | struct address_space *mapping, | ||
58 | u64 start, unsigned long len, | ||
59 | struct page **pages, | ||
60 | unsigned long nr_dest_pages, | ||
61 | unsigned long *out_pages, | ||
62 | unsigned long *total_in, | ||
63 | unsigned long *total_out, | ||
64 | unsigned long max_out); | ||
65 | |||
66 | int (*decompress_biovec)(struct list_head *workspace, | ||
67 | struct page **pages_in, | ||
68 | u64 disk_start, | ||
69 | struct bio_vec *bvec, | ||
70 | int vcnt, | ||
71 | size_t srclen); | ||
72 | |||
73 | int (*decompress)(struct list_head *workspace, | ||
74 | unsigned char *data_in, | ||
75 | struct page *dest_page, | ||
76 | unsigned long start_byte, | ||
77 | size_t srclen, size_t destlen); | ||
78 | }; | ||
79 | |||
80 | extern struct btrfs_compress_op btrfs_zlib_compress; | ||
81 | extern struct btrfs_compress_op btrfs_lzo_compress; | ||
82 | |||
47 | #endif | 83 | #endif |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9ac171599258..b5baff0dccfe 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -105,6 +105,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, | |||
105 | /* this also releases the path */ | 105 | /* this also releases the path */ |
106 | void btrfs_free_path(struct btrfs_path *p) | 106 | void btrfs_free_path(struct btrfs_path *p) |
107 | { | 107 | { |
108 | if (!p) | ||
109 | return; | ||
108 | btrfs_release_path(NULL, p); | 110 | btrfs_release_path(NULL, p); |
109 | kmem_cache_free(btrfs_path_cachep, p); | 111 | kmem_cache_free(btrfs_path_cachep, p); |
110 | } | 112 | } |
@@ -2514,6 +2516,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2514 | btrfs_assert_tree_locked(path->nodes[1]); | 2516 | btrfs_assert_tree_locked(path->nodes[1]); |
2515 | 2517 | ||
2516 | right = read_node_slot(root, upper, slot + 1); | 2518 | right = read_node_slot(root, upper, slot + 1); |
2519 | if (right == NULL) | ||
2520 | return 1; | ||
2521 | |||
2517 | btrfs_tree_lock(right); | 2522 | btrfs_tree_lock(right); |
2518 | btrfs_set_lock_blocking(right); | 2523 | btrfs_set_lock_blocking(right); |
2519 | 2524 | ||
@@ -2764,6 +2769,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2764 | btrfs_assert_tree_locked(path->nodes[1]); | 2769 | btrfs_assert_tree_locked(path->nodes[1]); |
2765 | 2770 | ||
2766 | left = read_node_slot(root, path->nodes[1], slot - 1); | 2771 | left = read_node_slot(root, path->nodes[1], slot - 1); |
2772 | if (left == NULL) | ||
2773 | return 1; | ||
2774 | |||
2767 | btrfs_tree_lock(left); | 2775 | btrfs_tree_lock(left); |
2768 | btrfs_set_lock_blocking(left); | 2776 | btrfs_set_lock_blocking(left); |
2769 | 2777 | ||
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a142d204b526..2c98b3af6052 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/backing-dev.h> | 27 | #include <linux/backing-dev.h> |
28 | #include <linux/wait.h> | 28 | #include <linux/wait.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <linux/kobject.h> | ||
30 | #include <asm/kmap_types.h> | 31 | #include <asm/kmap_types.h> |
31 | #include "extent_io.h" | 32 | #include "extent_io.h" |
32 | #include "extent_map.h" | 33 | #include "extent_map.h" |
@@ -294,6 +295,14 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) | |||
294 | #define BTRFS_FSID_SIZE 16 | 295 | #define BTRFS_FSID_SIZE 16 |
295 | #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) | 296 | #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) |
296 | #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) | 297 | #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) |
298 | |||
299 | /* | ||
300 | * File system states | ||
301 | */ | ||
302 | |||
303 | /* Errors detected */ | ||
304 | #define BTRFS_SUPER_FLAG_ERROR (1ULL << 2) | ||
305 | |||
297 | #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) | 306 | #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) |
298 | #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) | 307 | #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) |
299 | 308 | ||
@@ -398,13 +407,15 @@ struct btrfs_super_block { | |||
398 | #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) | 407 | #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) |
399 | #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) | 408 | #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) |
400 | #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) | 409 | #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) |
410 | #define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) | ||
401 | 411 | ||
402 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL | 412 | #define BTRFS_FEATURE_COMPAT_SUPP 0ULL |
403 | #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL | 413 | #define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL |
404 | #define BTRFS_FEATURE_INCOMPAT_SUPP \ | 414 | #define BTRFS_FEATURE_INCOMPAT_SUPP \ |
405 | (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ | 415 | (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ |
406 | BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ | 416 | BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ |
407 | BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) | 417 | BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ |
418 | BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO) | ||
408 | 419 | ||
409 | /* | 420 | /* |
410 | * A leaf is full of items. offset and size tell us where to find | 421 | * A leaf is full of items. offset and size tell us where to find |
@@ -551,9 +562,11 @@ struct btrfs_timespec { | |||
551 | } __attribute__ ((__packed__)); | 562 | } __attribute__ ((__packed__)); |
552 | 563 | ||
553 | enum btrfs_compression_type { | 564 | enum btrfs_compression_type { |
554 | BTRFS_COMPRESS_NONE = 0, | 565 | BTRFS_COMPRESS_NONE = 0, |
555 | BTRFS_COMPRESS_ZLIB = 1, | 566 | BTRFS_COMPRESS_ZLIB = 1, |
556 | BTRFS_COMPRESS_LAST = 2, | 567 | BTRFS_COMPRESS_LZO = 2, |
568 | BTRFS_COMPRESS_TYPES = 2, | ||
569 | BTRFS_COMPRESS_LAST = 3, | ||
557 | }; | 570 | }; |
558 | 571 | ||
559 | struct btrfs_inode_item { | 572 | struct btrfs_inode_item { |
@@ -597,6 +610,8 @@ struct btrfs_dir_item { | |||
597 | u8 type; | 610 | u8 type; |
598 | } __attribute__ ((__packed__)); | 611 | } __attribute__ ((__packed__)); |
599 | 612 | ||
613 | #define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0) | ||
614 | |||
600 | struct btrfs_root_item { | 615 | struct btrfs_root_item { |
601 | struct btrfs_inode_item inode; | 616 | struct btrfs_inode_item inode; |
602 | __le64 generation; | 617 | __le64 generation; |
@@ -895,7 +910,8 @@ struct btrfs_fs_info { | |||
895 | */ | 910 | */ |
896 | u64 last_trans_log_full_commit; | 911 | u64 last_trans_log_full_commit; |
897 | u64 open_ioctl_trans; | 912 | u64 open_ioctl_trans; |
898 | unsigned long mount_opt; | 913 | unsigned long mount_opt:20; |
914 | unsigned long compress_type:4; | ||
899 | u64 max_inline; | 915 | u64 max_inline; |
900 | u64 alloc_start; | 916 | u64 alloc_start; |
901 | struct btrfs_transaction *running_transaction; | 917 | struct btrfs_transaction *running_transaction; |
@@ -1050,6 +1066,9 @@ struct btrfs_fs_info { | |||
1050 | unsigned metadata_ratio; | 1066 | unsigned metadata_ratio; |
1051 | 1067 | ||
1052 | void *bdev_holder; | 1068 | void *bdev_holder; |
1069 | |||
1070 | /* filesystem state */ | ||
1071 | u64 fs_state; | ||
1053 | }; | 1072 | }; |
1054 | 1073 | ||
1055 | /* | 1074 | /* |
@@ -1893,6 +1912,11 @@ BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); | |||
1893 | BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, | 1912 | BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, |
1894 | last_snapshot, 64); | 1913 | last_snapshot, 64); |
1895 | 1914 | ||
1915 | static inline bool btrfs_root_readonly(struct btrfs_root *root) | ||
1916 | { | ||
1917 | return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY; | ||
1918 | } | ||
1919 | |||
1896 | /* struct btrfs_super_block */ | 1920 | /* struct btrfs_super_block */ |
1897 | 1921 | ||
1898 | BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); | 1922 | BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); |
@@ -2145,6 +2169,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
2145 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 2169 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
2146 | struct btrfs_root *root, u64 group_start); | 2170 | struct btrfs_root *root, u64 group_start); |
2147 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | 2171 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); |
2172 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); | ||
2148 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | 2173 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); |
2149 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2174 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
2150 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); | 2175 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); |
@@ -2188,6 +2213,12 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
2188 | int btrfs_set_block_group_rw(struct btrfs_root *root, | 2213 | int btrfs_set_block_group_rw(struct btrfs_root *root, |
2189 | struct btrfs_block_group_cache *cache); | 2214 | struct btrfs_block_group_cache *cache); |
2190 | void btrfs_put_block_group_cache(struct btrfs_fs_info *info); | 2215 | void btrfs_put_block_group_cache(struct btrfs_fs_info *info); |
2216 | u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); | ||
2217 | int btrfs_error_unpin_extent_range(struct btrfs_root *root, | ||
2218 | u64 start, u64 end); | ||
2219 | int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, | ||
2220 | u64 num_bytes); | ||
2221 | |||
2191 | /* ctree.c */ | 2222 | /* ctree.c */ |
2192 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 2223 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
2193 | int level, int *slot); | 2224 | int level, int *slot); |
@@ -2541,6 +2572,14 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); | |||
2541 | /* super.c */ | 2572 | /* super.c */ |
2542 | int btrfs_parse_options(struct btrfs_root *root, char *options); | 2573 | int btrfs_parse_options(struct btrfs_root *root, char *options); |
2543 | int btrfs_sync_fs(struct super_block *sb, int wait); | 2574 | int btrfs_sync_fs(struct super_block *sb, int wait); |
2575 | void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, | ||
2576 | unsigned int line, int errno); | ||
2577 | |||
2578 | #define btrfs_std_error(fs_info, errno) \ | ||
2579 | do { \ | ||
2580 | if ((errno)) \ | ||
2581 | __btrfs_std_error((fs_info), __func__, __LINE__, (errno));\ | ||
2582 | } while (0) | ||
2544 | 2583 | ||
2545 | /* acl.c */ | 2584 | /* acl.c */ |
2546 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | 2585 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 51d2e4de34eb..fdce8799b98d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -44,6 +44,20 @@ | |||
44 | static struct extent_io_ops btree_extent_io_ops; | 44 | static struct extent_io_ops btree_extent_io_ops; |
45 | static void end_workqueue_fn(struct btrfs_work *work); | 45 | static void end_workqueue_fn(struct btrfs_work *work); |
46 | static void free_fs_root(struct btrfs_root *root); | 46 | static void free_fs_root(struct btrfs_root *root); |
47 | static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | ||
48 | int read_only); | ||
49 | static int btrfs_destroy_ordered_operations(struct btrfs_root *root); | ||
50 | static int btrfs_destroy_ordered_extents(struct btrfs_root *root); | ||
51 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | ||
52 | struct btrfs_root *root); | ||
53 | static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); | ||
54 | static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root); | ||
55 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, | ||
56 | struct extent_io_tree *dirty_pages, | ||
57 | int mark); | ||
58 | static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | ||
59 | struct extent_io_tree *pinned_extents); | ||
60 | static int btrfs_cleanup_transaction(struct btrfs_root *root); | ||
47 | 61 | ||
48 | /* | 62 | /* |
49 | * end_io_wq structs are used to do processing in task context when an IO is | 63 | * end_io_wq structs are used to do processing in task context when an IO is |
@@ -353,6 +367,10 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | |||
353 | WARN_ON(len == 0); | 367 | WARN_ON(len == 0); |
354 | 368 | ||
355 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); | 369 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); |
370 | if (eb == NULL) { | ||
371 | WARN_ON(1); | ||
372 | goto out; | ||
373 | } | ||
356 | ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, | 374 | ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, |
357 | btrfs_header_generation(eb)); | 375 | btrfs_header_generation(eb)); |
358 | BUG_ON(ret); | 376 | BUG_ON(ret); |
@@ -427,6 +445,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
427 | WARN_ON(len == 0); | 445 | WARN_ON(len == 0); |
428 | 446 | ||
429 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); | 447 | eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); |
448 | if (eb == NULL) { | ||
449 | ret = -EIO; | ||
450 | goto out; | ||
451 | } | ||
430 | 452 | ||
431 | found_start = btrfs_header_bytenr(eb); | 453 | found_start = btrfs_header_bytenr(eb); |
432 | if (found_start != start) { | 454 | if (found_start != start) { |
@@ -1145,6 +1167,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1145 | } | 1167 | } |
1146 | btrfs_free_path(path); | 1168 | btrfs_free_path(path); |
1147 | if (ret) { | 1169 | if (ret) { |
1170 | kfree(root); | ||
1148 | if (ret > 0) | 1171 | if (ret > 0) |
1149 | ret = -ENOENT; | 1172 | ret = -ENOENT; |
1150 | return ERR_PTR(ret); | 1173 | return ERR_PTR(ret); |
@@ -1527,6 +1550,7 @@ static int transaction_kthread(void *arg) | |||
1527 | spin_unlock(&root->fs_info->new_trans_lock); | 1550 | spin_unlock(&root->fs_info->new_trans_lock); |
1528 | 1551 | ||
1529 | trans = btrfs_join_transaction(root, 1); | 1552 | trans = btrfs_join_transaction(root, 1); |
1553 | BUG_ON(IS_ERR(trans)); | ||
1530 | if (transid == trans->transid) { | 1554 | if (transid == trans->transid) { |
1531 | ret = btrfs_commit_transaction(trans, root); | 1555 | ret = btrfs_commit_transaction(trans, root); |
1532 | BUG_ON(ret); | 1556 | BUG_ON(ret); |
@@ -1713,8 +1737,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1713 | fs_info, BTRFS_ROOT_TREE_OBJECTID); | 1737 | fs_info, BTRFS_ROOT_TREE_OBJECTID); |
1714 | 1738 | ||
1715 | bh = btrfs_read_dev_super(fs_devices->latest_bdev); | 1739 | bh = btrfs_read_dev_super(fs_devices->latest_bdev); |
1716 | if (!bh) | 1740 | if (!bh) { |
1741 | err = -EINVAL; | ||
1717 | goto fail_iput; | 1742 | goto fail_iput; |
1743 | } | ||
1718 | 1744 | ||
1719 | memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); | 1745 | memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); |
1720 | memcpy(&fs_info->super_for_commit, &fs_info->super_copy, | 1746 | memcpy(&fs_info->super_for_commit, &fs_info->super_copy, |
@@ -1727,6 +1753,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1727 | if (!btrfs_super_root(disk_super)) | 1753 | if (!btrfs_super_root(disk_super)) |
1728 | goto fail_iput; | 1754 | goto fail_iput; |
1729 | 1755 | ||
1756 | /* check FS state, whether FS is broken. */ | ||
1757 | fs_info->fs_state |= btrfs_super_flags(disk_super); | ||
1758 | |||
1759 | btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); | ||
1760 | |||
1730 | ret = btrfs_parse_options(tree_root, options); | 1761 | ret = btrfs_parse_options(tree_root, options); |
1731 | if (ret) { | 1762 | if (ret) { |
1732 | err = ret; | 1763 | err = ret; |
@@ -1744,10 +1775,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1744 | } | 1775 | } |
1745 | 1776 | ||
1746 | features = btrfs_super_incompat_flags(disk_super); | 1777 | features = btrfs_super_incompat_flags(disk_super); |
1747 | if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) { | 1778 | features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; |
1748 | features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; | 1779 | if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) |
1749 | btrfs_set_super_incompat_flags(disk_super, features); | 1780 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; |
1750 | } | 1781 | btrfs_set_super_incompat_flags(disk_super, features); |
1751 | 1782 | ||
1752 | features = btrfs_super_compat_ro_flags(disk_super) & | 1783 | features = btrfs_super_compat_ro_flags(disk_super) & |
1753 | ~BTRFS_FEATURE_COMPAT_RO_SUPP; | 1784 | ~BTRFS_FEATURE_COMPAT_RO_SUPP; |
@@ -1957,7 +1988,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1957 | btrfs_set_opt(fs_info->mount_opt, SSD); | 1988 | btrfs_set_opt(fs_info->mount_opt, SSD); |
1958 | } | 1989 | } |
1959 | 1990 | ||
1960 | if (btrfs_super_log_root(disk_super) != 0) { | 1991 | /* do not make disk changes in broken FS */ |
1992 | if (btrfs_super_log_root(disk_super) != 0 && | ||
1993 | !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) { | ||
1961 | u64 bytenr = btrfs_super_log_root(disk_super); | 1994 | u64 bytenr = btrfs_super_log_root(disk_super); |
1962 | 1995 | ||
1963 | if (fs_devices->rw_devices == 0) { | 1996 | if (fs_devices->rw_devices == 0) { |
@@ -2421,10 +2454,14 @@ int btrfs_commit_super(struct btrfs_root *root) | |||
2421 | up_write(&root->fs_info->cleanup_work_sem); | 2454 | up_write(&root->fs_info->cleanup_work_sem); |
2422 | 2455 | ||
2423 | trans = btrfs_join_transaction(root, 1); | 2456 | trans = btrfs_join_transaction(root, 1); |
2457 | if (IS_ERR(trans)) | ||
2458 | return PTR_ERR(trans); | ||
2424 | ret = btrfs_commit_transaction(trans, root); | 2459 | ret = btrfs_commit_transaction(trans, root); |
2425 | BUG_ON(ret); | 2460 | BUG_ON(ret); |
2426 | /* run commit again to drop the original snapshot */ | 2461 | /* run commit again to drop the original snapshot */ |
2427 | trans = btrfs_join_transaction(root, 1); | 2462 | trans = btrfs_join_transaction(root, 1); |
2463 | if (IS_ERR(trans)) | ||
2464 | return PTR_ERR(trans); | ||
2428 | btrfs_commit_transaction(trans, root); | 2465 | btrfs_commit_transaction(trans, root); |
2429 | ret = btrfs_write_and_wait_transaction(NULL, root); | 2466 | ret = btrfs_write_and_wait_transaction(NULL, root); |
2430 | BUG_ON(ret); | 2467 | BUG_ON(ret); |
@@ -2442,8 +2479,28 @@ int close_ctree(struct btrfs_root *root) | |||
2442 | smp_mb(); | 2479 | smp_mb(); |
2443 | 2480 | ||
2444 | btrfs_put_block_group_cache(fs_info); | 2481 | btrfs_put_block_group_cache(fs_info); |
2482 | |||
2483 | /* | ||
2484 | * Here come 2 situations when btrfs is broken to flip readonly: | ||
2485 | * | ||
2486 | * 1. when btrfs flips readonly somewhere else before | ||
2487 | * btrfs_commit_super, sb->s_flags has MS_RDONLY flag, | ||
2488 | * and btrfs will skip to write sb directly to keep | ||
2489 | * ERROR state on disk. | ||
2490 | * | ||
2491 | * 2. when btrfs flips readonly just in btrfs_commit_super, | ||
2492 | * and in such case, btrfs cannnot write sb via btrfs_commit_super, | ||
2493 | * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag, | ||
2494 | * btrfs will cleanup all FS resources first and write sb then. | ||
2495 | */ | ||
2445 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 2496 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
2446 | ret = btrfs_commit_super(root); | 2497 | ret = btrfs_commit_super(root); |
2498 | if (ret) | ||
2499 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | ||
2500 | } | ||
2501 | |||
2502 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
2503 | ret = btrfs_error_commit_super(root); | ||
2447 | if (ret) | 2504 | if (ret) |
2448 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | 2505 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); |
2449 | } | 2506 | } |
@@ -2502,6 +2559,8 @@ int close_ctree(struct btrfs_root *root) | |||
2502 | kfree(fs_info->chunk_root); | 2559 | kfree(fs_info->chunk_root); |
2503 | kfree(fs_info->dev_root); | 2560 | kfree(fs_info->dev_root); |
2504 | kfree(fs_info->csum_root); | 2561 | kfree(fs_info->csum_root); |
2562 | kfree(fs_info); | ||
2563 | |||
2505 | return 0; | 2564 | return 0; |
2506 | } | 2565 | } |
2507 | 2566 | ||
@@ -2619,6 +2678,352 @@ out: | |||
2619 | return 0; | 2678 | return 0; |
2620 | } | 2679 | } |
2621 | 2680 | ||
2681 | static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | ||
2682 | int read_only) | ||
2683 | { | ||
2684 | if (read_only) | ||
2685 | return; | ||
2686 | |||
2687 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | ||
2688 | printk(KERN_WARNING "warning: mount fs with errors, " | ||
2689 | "running btrfsck is recommended\n"); | ||
2690 | } | ||
2691 | |||
2692 | int btrfs_error_commit_super(struct btrfs_root *root) | ||
2693 | { | ||
2694 | int ret; | ||
2695 | |||
2696 | mutex_lock(&root->fs_info->cleaner_mutex); | ||
2697 | btrfs_run_delayed_iputs(root); | ||
2698 | mutex_unlock(&root->fs_info->cleaner_mutex); | ||
2699 | |||
2700 | down_write(&root->fs_info->cleanup_work_sem); | ||
2701 | up_write(&root->fs_info->cleanup_work_sem); | ||
2702 | |||
2703 | /* cleanup FS via transaction */ | ||
2704 | btrfs_cleanup_transaction(root); | ||
2705 | |||
2706 | ret = write_ctree_super(NULL, root, 0); | ||
2707 | |||
2708 | return ret; | ||
2709 | } | ||
2710 | |||
2711 | static int btrfs_destroy_ordered_operations(struct btrfs_root *root) | ||
2712 | { | ||
2713 | struct btrfs_inode *btrfs_inode; | ||
2714 | struct list_head splice; | ||
2715 | |||
2716 | INIT_LIST_HEAD(&splice); | ||
2717 | |||
2718 | mutex_lock(&root->fs_info->ordered_operations_mutex); | ||
2719 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
2720 | |||
2721 | list_splice_init(&root->fs_info->ordered_operations, &splice); | ||
2722 | while (!list_empty(&splice)) { | ||
2723 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | ||
2724 | ordered_operations); | ||
2725 | |||
2726 | list_del_init(&btrfs_inode->ordered_operations); | ||
2727 | |||
2728 | btrfs_invalidate_inodes(btrfs_inode->root); | ||
2729 | } | ||
2730 | |||
2731 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
2732 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | ||
2733 | |||
2734 | return 0; | ||
2735 | } | ||
2736 | |||
2737 | static int btrfs_destroy_ordered_extents(struct btrfs_root *root) | ||
2738 | { | ||
2739 | struct list_head splice; | ||
2740 | struct btrfs_ordered_extent *ordered; | ||
2741 | struct inode *inode; | ||
2742 | |||
2743 | INIT_LIST_HEAD(&splice); | ||
2744 | |||
2745 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
2746 | |||
2747 | list_splice_init(&root->fs_info->ordered_extents, &splice); | ||
2748 | while (!list_empty(&splice)) { | ||
2749 | ordered = list_entry(splice.next, struct btrfs_ordered_extent, | ||
2750 | root_extent_list); | ||
2751 | |||
2752 | list_del_init(&ordered->root_extent_list); | ||
2753 | atomic_inc(&ordered->refs); | ||
2754 | |||
2755 | /* the inode may be getting freed (in sys_unlink path). */ | ||
2756 | inode = igrab(ordered->inode); | ||
2757 | |||
2758 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
2759 | if (inode) | ||
2760 | iput(inode); | ||
2761 | |||
2762 | atomic_set(&ordered->refs, 1); | ||
2763 | btrfs_put_ordered_extent(ordered); | ||
2764 | |||
2765 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
2766 | } | ||
2767 | |||
2768 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
2769 | |||
2770 | return 0; | ||
2771 | } | ||
2772 | |||
2773 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | ||
2774 | struct btrfs_root *root) | ||
2775 | { | ||
2776 | struct rb_node *node; | ||
2777 | struct btrfs_delayed_ref_root *delayed_refs; | ||
2778 | struct btrfs_delayed_ref_node *ref; | ||
2779 | int ret = 0; | ||
2780 | |||
2781 | delayed_refs = &trans->delayed_refs; | ||
2782 | |||
2783 | spin_lock(&delayed_refs->lock); | ||
2784 | if (delayed_refs->num_entries == 0) { | ||
2785 | printk(KERN_INFO "delayed_refs has NO entry\n"); | ||
2786 | return ret; | ||
2787 | } | ||
2788 | |||
2789 | node = rb_first(&delayed_refs->root); | ||
2790 | while (node) { | ||
2791 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | ||
2792 | node = rb_next(node); | ||
2793 | |||
2794 | ref->in_tree = 0; | ||
2795 | rb_erase(&ref->rb_node, &delayed_refs->root); | ||
2796 | delayed_refs->num_entries--; | ||
2797 | |||
2798 | atomic_set(&ref->refs, 1); | ||
2799 | if (btrfs_delayed_ref_is_head(ref)) { | ||
2800 | struct btrfs_delayed_ref_head *head; | ||
2801 | |||
2802 | head = btrfs_delayed_node_to_head(ref); | ||
2803 | mutex_lock(&head->mutex); | ||
2804 | kfree(head->extent_op); | ||
2805 | delayed_refs->num_heads--; | ||
2806 | if (list_empty(&head->cluster)) | ||
2807 | delayed_refs->num_heads_ready--; | ||
2808 | list_del_init(&head->cluster); | ||
2809 | mutex_unlock(&head->mutex); | ||
2810 | } | ||
2811 | |||
2812 | spin_unlock(&delayed_refs->lock); | ||
2813 | btrfs_put_delayed_ref(ref); | ||
2814 | |||
2815 | cond_resched(); | ||
2816 | spin_lock(&delayed_refs->lock); | ||
2817 | } | ||
2818 | |||
2819 | spin_unlock(&delayed_refs->lock); | ||
2820 | |||
2821 | return ret; | ||
2822 | } | ||
2823 | |||
2824 | static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) | ||
2825 | { | ||
2826 | struct btrfs_pending_snapshot *snapshot; | ||
2827 | struct list_head splice; | ||
2828 | |||
2829 | INIT_LIST_HEAD(&splice); | ||
2830 | |||
2831 | list_splice_init(&t->pending_snapshots, &splice); | ||
2832 | |||
2833 | while (!list_empty(&splice)) { | ||
2834 | snapshot = list_entry(splice.next, | ||
2835 | struct btrfs_pending_snapshot, | ||
2836 | list); | ||
2837 | |||
2838 | list_del_init(&snapshot->list); | ||
2839 | |||
2840 | kfree(snapshot); | ||
2841 | } | ||
2842 | |||
2843 | return 0; | ||
2844 | } | ||
2845 | |||
2846 | static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root) | ||
2847 | { | ||
2848 | struct btrfs_inode *btrfs_inode; | ||
2849 | struct list_head splice; | ||
2850 | |||
2851 | INIT_LIST_HEAD(&splice); | ||
2852 | |||
2853 | list_splice_init(&root->fs_info->delalloc_inodes, &splice); | ||
2854 | |||
2855 | spin_lock(&root->fs_info->delalloc_lock); | ||
2856 | |||
2857 | while (!list_empty(&splice)) { | ||
2858 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | ||
2859 | delalloc_inodes); | ||
2860 | |||
2861 | list_del_init(&btrfs_inode->delalloc_inodes); | ||
2862 | |||
2863 | btrfs_invalidate_inodes(btrfs_inode->root); | ||
2864 | } | ||
2865 | |||
2866 | spin_unlock(&root->fs_info->delalloc_lock); | ||
2867 | |||
2868 | return 0; | ||
2869 | } | ||
2870 | |||
2871 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, | ||
2872 | struct extent_io_tree *dirty_pages, | ||
2873 | int mark) | ||
2874 | { | ||
2875 | int ret; | ||
2876 | struct page *page; | ||
2877 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
2878 | struct extent_buffer *eb; | ||
2879 | u64 start = 0; | ||
2880 | u64 end; | ||
2881 | u64 offset; | ||
2882 | unsigned long index; | ||
2883 | |||
2884 | while (1) { | ||
2885 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, | ||
2886 | mark); | ||
2887 | if (ret) | ||
2888 | break; | ||
2889 | |||
2890 | clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); | ||
2891 | while (start <= end) { | ||
2892 | index = start >> PAGE_CACHE_SHIFT; | ||
2893 | start = (u64)(index + 1) << PAGE_CACHE_SHIFT; | ||
2894 | page = find_get_page(btree_inode->i_mapping, index); | ||
2895 | if (!page) | ||
2896 | continue; | ||
2897 | offset = page_offset(page); | ||
2898 | |||
2899 | spin_lock(&dirty_pages->buffer_lock); | ||
2900 | eb = radix_tree_lookup( | ||
2901 | &(&BTRFS_I(page->mapping->host)->io_tree)->buffer, | ||
2902 | offset >> PAGE_CACHE_SHIFT); | ||
2903 | spin_unlock(&dirty_pages->buffer_lock); | ||
2904 | if (eb) { | ||
2905 | ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY, | ||
2906 | &eb->bflags); | ||
2907 | atomic_set(&eb->refs, 1); | ||
2908 | } | ||
2909 | if (PageWriteback(page)) | ||
2910 | end_page_writeback(page); | ||
2911 | |||
2912 | lock_page(page); | ||
2913 | if (PageDirty(page)) { | ||
2914 | clear_page_dirty_for_io(page); | ||
2915 | spin_lock_irq(&page->mapping->tree_lock); | ||
2916 | radix_tree_tag_clear(&page->mapping->page_tree, | ||
2917 | page_index(page), | ||
2918 | PAGECACHE_TAG_DIRTY); | ||
2919 | spin_unlock_irq(&page->mapping->tree_lock); | ||
2920 | } | ||
2921 | |||
2922 | page->mapping->a_ops->invalidatepage(page, 0); | ||
2923 | unlock_page(page); | ||
2924 | } | ||
2925 | } | ||
2926 | |||
2927 | return ret; | ||
2928 | } | ||
2929 | |||
2930 | static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | ||
2931 | struct extent_io_tree *pinned_extents) | ||
2932 | { | ||
2933 | struct extent_io_tree *unpin; | ||
2934 | u64 start; | ||
2935 | u64 end; | ||
2936 | int ret; | ||
2937 | |||
2938 | unpin = pinned_extents; | ||
2939 | while (1) { | ||
2940 | ret = find_first_extent_bit(unpin, 0, &start, &end, | ||
2941 | EXTENT_DIRTY); | ||
2942 | if (ret) | ||
2943 | break; | ||
2944 | |||
2945 | /* opt_discard */ | ||
2946 | ret = btrfs_error_discard_extent(root, start, end + 1 - start); | ||
2947 | |||
2948 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | ||
2949 | btrfs_error_unpin_extent_range(root, start, end); | ||
2950 | cond_resched(); | ||
2951 | } | ||
2952 | |||
2953 | return 0; | ||
2954 | } | ||
2955 | |||
2956 | static int btrfs_cleanup_transaction(struct btrfs_root *root) | ||
2957 | { | ||
2958 | struct btrfs_transaction *t; | ||
2959 | LIST_HEAD(list); | ||
2960 | |||
2961 | WARN_ON(1); | ||
2962 | |||
2963 | mutex_lock(&root->fs_info->trans_mutex); | ||
2964 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | ||
2965 | |||
2966 | list_splice_init(&root->fs_info->trans_list, &list); | ||
2967 | while (!list_empty(&list)) { | ||
2968 | t = list_entry(list.next, struct btrfs_transaction, list); | ||
2969 | if (!t) | ||
2970 | break; | ||
2971 | |||
2972 | btrfs_destroy_ordered_operations(root); | ||
2973 | |||
2974 | btrfs_destroy_ordered_extents(root); | ||
2975 | |||
2976 | btrfs_destroy_delayed_refs(t, root); | ||
2977 | |||
2978 | btrfs_block_rsv_release(root, | ||
2979 | &root->fs_info->trans_block_rsv, | ||
2980 | t->dirty_pages.dirty_bytes); | ||
2981 | |||
2982 | /* FIXME: cleanup wait for commit */ | ||
2983 | t->in_commit = 1; | ||
2984 | t->blocked = 1; | ||
2985 | if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) | ||
2986 | wake_up(&root->fs_info->transaction_blocked_wait); | ||
2987 | |||
2988 | t->blocked = 0; | ||
2989 | if (waitqueue_active(&root->fs_info->transaction_wait)) | ||
2990 | wake_up(&root->fs_info->transaction_wait); | ||
2991 | mutex_unlock(&root->fs_info->trans_mutex); | ||
2992 | |||
2993 | mutex_lock(&root->fs_info->trans_mutex); | ||
2994 | t->commit_done = 1; | ||
2995 | if (waitqueue_active(&t->commit_wait)) | ||
2996 | wake_up(&t->commit_wait); | ||
2997 | mutex_unlock(&root->fs_info->trans_mutex); | ||
2998 | |||
2999 | mutex_lock(&root->fs_info->trans_mutex); | ||
3000 | |||
3001 | btrfs_destroy_pending_snapshots(t); | ||
3002 | |||
3003 | btrfs_destroy_delalloc_inodes(root); | ||
3004 | |||
3005 | spin_lock(&root->fs_info->new_trans_lock); | ||
3006 | root->fs_info->running_transaction = NULL; | ||
3007 | spin_unlock(&root->fs_info->new_trans_lock); | ||
3008 | |||
3009 | btrfs_destroy_marked_extents(root, &t->dirty_pages, | ||
3010 | EXTENT_DIRTY); | ||
3011 | |||
3012 | btrfs_destroy_pinned_extent(root, | ||
3013 | root->fs_info->pinned_extents); | ||
3014 | |||
3015 | t->use_count = 0; | ||
3016 | list_del_init(&t->list); | ||
3017 | memset(t, 0, sizeof(*t)); | ||
3018 | kmem_cache_free(btrfs_transaction_cachep, t); | ||
3019 | } | ||
3020 | |||
3021 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | ||
3022 | mutex_unlock(&root->fs_info->trans_mutex); | ||
3023 | |||
3024 | return 0; | ||
3025 | } | ||
3026 | |||
2622 | static struct extent_io_ops btree_extent_io_ops = { | 3027 | static struct extent_io_ops btree_extent_io_ops = { |
2623 | .write_cache_pages_lock_hook = btree_lock_page_hook, | 3028 | .write_cache_pages_lock_hook = btree_lock_page_hook, |
2624 | .readpage_end_io_hook = btree_readpage_end_io_hook, | 3029 | .readpage_end_io_hook = btree_readpage_end_io_hook, |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 88e825a0bf21..07b20dc2fd95 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -52,6 +52,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, | |||
52 | struct btrfs_root *root, int max_mirrors); | 52 | struct btrfs_root *root, int max_mirrors); |
53 | struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); | 53 | struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); |
54 | int btrfs_commit_super(struct btrfs_root *root); | 54 | int btrfs_commit_super(struct btrfs_root *root); |
55 | int btrfs_error_commit_super(struct btrfs_root *root); | ||
55 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | 56 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, |
56 | u64 bytenr, u32 blocksize); | 57 | u64 bytenr, u32 blocksize); |
57 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, | 58 | struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, |
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 9786963b07e5..ff27d7a477b2 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c | |||
@@ -171,6 +171,8 @@ static struct dentry *btrfs_get_parent(struct dentry *child) | |||
171 | int ret; | 171 | int ret; |
172 | 172 | ||
173 | path = btrfs_alloc_path(); | 173 | path = btrfs_alloc_path(); |
174 | if (!path) | ||
175 | return ERR_PTR(-ENOMEM); | ||
174 | 176 | ||
175 | if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) { | 177 | if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) { |
176 | key.objectid = root->root_key.objectid; | 178 | key.objectid = root->root_key.objectid; |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 227e5815d838..4e7e012ad667 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -320,11 +320,6 @@ static int caching_kthread(void *data) | |||
320 | if (!path) | 320 | if (!path) |
321 | return -ENOMEM; | 321 | return -ENOMEM; |
322 | 322 | ||
323 | exclude_super_stripes(extent_root, block_group); | ||
324 | spin_lock(&block_group->space_info->lock); | ||
325 | block_group->space_info->bytes_readonly += block_group->bytes_super; | ||
326 | spin_unlock(&block_group->space_info->lock); | ||
327 | |||
328 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 323 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); |
329 | 324 | ||
330 | /* | 325 | /* |
@@ -467,8 +462,10 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
467 | cache->cached = BTRFS_CACHE_NO; | 462 | cache->cached = BTRFS_CACHE_NO; |
468 | } | 463 | } |
469 | spin_unlock(&cache->lock); | 464 | spin_unlock(&cache->lock); |
470 | if (ret == 1) | 465 | if (ret == 1) { |
466 | free_excluded_extents(fs_info->extent_root, cache); | ||
471 | return 0; | 467 | return 0; |
468 | } | ||
472 | } | 469 | } |
473 | 470 | ||
474 | if (load_cache_only) | 471 | if (load_cache_only) |
@@ -3089,7 +3086,7 @@ static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) | |||
3089 | return btrfs_reduce_alloc_profile(root, flags); | 3086 | return btrfs_reduce_alloc_profile(root, flags); |
3090 | } | 3087 | } |
3091 | 3088 | ||
3092 | static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) | 3089 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) |
3093 | { | 3090 | { |
3094 | u64 flags; | 3091 | u64 flags; |
3095 | 3092 | ||
@@ -3161,8 +3158,12 @@ alloc: | |||
3161 | bytes + 2 * 1024 * 1024, | 3158 | bytes + 2 * 1024 * 1024, |
3162 | alloc_target, 0); | 3159 | alloc_target, 0); |
3163 | btrfs_end_transaction(trans, root); | 3160 | btrfs_end_transaction(trans, root); |
3164 | if (ret < 0) | 3161 | if (ret < 0) { |
3165 | return ret; | 3162 | if (ret != -ENOSPC) |
3163 | return ret; | ||
3164 | else | ||
3165 | goto commit_trans; | ||
3166 | } | ||
3166 | 3167 | ||
3167 | if (!data_sinfo) { | 3168 | if (!data_sinfo) { |
3168 | btrfs_set_inode_space_info(root, inode); | 3169 | btrfs_set_inode_space_info(root, inode); |
@@ -3173,6 +3174,7 @@ alloc: | |||
3173 | spin_unlock(&data_sinfo->lock); | 3174 | spin_unlock(&data_sinfo->lock); |
3174 | 3175 | ||
3175 | /* commit the current transaction and try again */ | 3176 | /* commit the current transaction and try again */ |
3177 | commit_trans: | ||
3176 | if (!committed && !root->fs_info->open_ioctl_trans) { | 3178 | if (!committed && !root->fs_info->open_ioctl_trans) { |
3177 | committed = 1; | 3179 | committed = 1; |
3178 | trans = btrfs_join_transaction(root, 1); | 3180 | trans = btrfs_join_transaction(root, 1); |
@@ -3339,8 +3341,10 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3339 | u64 reserved; | 3341 | u64 reserved; |
3340 | u64 max_reclaim; | 3342 | u64 max_reclaim; |
3341 | u64 reclaimed = 0; | 3343 | u64 reclaimed = 0; |
3344 | long time_left; | ||
3342 | int pause = 1; | 3345 | int pause = 1; |
3343 | int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; | 3346 | int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; |
3347 | int loops = 0; | ||
3344 | 3348 | ||
3345 | block_rsv = &root->fs_info->delalloc_block_rsv; | 3349 | block_rsv = &root->fs_info->delalloc_block_rsv; |
3346 | space_info = block_rsv->space_info; | 3350 | space_info = block_rsv->space_info; |
@@ -3353,7 +3357,7 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3353 | 3357 | ||
3354 | max_reclaim = min(reserved, to_reclaim); | 3358 | max_reclaim = min(reserved, to_reclaim); |
3355 | 3359 | ||
3356 | while (1) { | 3360 | while (loops < 1024) { |
3357 | /* have the flusher threads jump in and do some IO */ | 3361 | /* have the flusher threads jump in and do some IO */ |
3358 | smp_mb(); | 3362 | smp_mb(); |
3359 | nr_pages = min_t(unsigned long, nr_pages, | 3363 | nr_pages = min_t(unsigned long, nr_pages, |
@@ -3361,8 +3365,12 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3361 | writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); | 3365 | writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); |
3362 | 3366 | ||
3363 | spin_lock(&space_info->lock); | 3367 | spin_lock(&space_info->lock); |
3364 | if (reserved > space_info->bytes_reserved) | 3368 | if (reserved > space_info->bytes_reserved) { |
3369 | loops = 0; | ||
3365 | reclaimed += reserved - space_info->bytes_reserved; | 3370 | reclaimed += reserved - space_info->bytes_reserved; |
3371 | } else { | ||
3372 | loops++; | ||
3373 | } | ||
3366 | reserved = space_info->bytes_reserved; | 3374 | reserved = space_info->bytes_reserved; |
3367 | spin_unlock(&space_info->lock); | 3375 | spin_unlock(&space_info->lock); |
3368 | 3376 | ||
@@ -3373,7 +3381,12 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3373 | return -EAGAIN; | 3381 | return -EAGAIN; |
3374 | 3382 | ||
3375 | __set_current_state(TASK_INTERRUPTIBLE); | 3383 | __set_current_state(TASK_INTERRUPTIBLE); |
3376 | schedule_timeout(pause); | 3384 | time_left = schedule_timeout(pause); |
3385 | |||
3386 | /* We were interrupted, exit */ | ||
3387 | if (time_left) | ||
3388 | break; | ||
3389 | |||
3377 | pause <<= 1; | 3390 | pause <<= 1; |
3378 | if (pause > HZ / 10) | 3391 | if (pause > HZ / 10) |
3379 | pause = HZ / 10; | 3392 | pause = HZ / 10; |
@@ -3583,8 +3596,20 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, | |||
3583 | 3596 | ||
3584 | if (num_bytes > 0) { | 3597 | if (num_bytes > 0) { |
3585 | if (dest) { | 3598 | if (dest) { |
3586 | block_rsv_add_bytes(dest, num_bytes, 0); | 3599 | spin_lock(&dest->lock); |
3587 | } else { | 3600 | if (!dest->full) { |
3601 | u64 bytes_to_add; | ||
3602 | |||
3603 | bytes_to_add = dest->size - dest->reserved; | ||
3604 | bytes_to_add = min(num_bytes, bytes_to_add); | ||
3605 | dest->reserved += bytes_to_add; | ||
3606 | if (dest->reserved >= dest->size) | ||
3607 | dest->full = 1; | ||
3608 | num_bytes -= bytes_to_add; | ||
3609 | } | ||
3610 | spin_unlock(&dest->lock); | ||
3611 | } | ||
3612 | if (num_bytes) { | ||
3588 | spin_lock(&space_info->lock); | 3613 | spin_lock(&space_info->lock); |
3589 | space_info->bytes_reserved -= num_bytes; | 3614 | space_info->bytes_reserved -= num_bytes; |
3590 | spin_unlock(&space_info->lock); | 3615 | spin_unlock(&space_info->lock); |
@@ -3721,11 +3746,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | |||
3721 | return 0; | 3746 | return 0; |
3722 | } | 3747 | } |
3723 | 3748 | ||
3724 | WARN_ON(1); | ||
3725 | printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", | ||
3726 | block_rsv->size, block_rsv->reserved, | ||
3727 | block_rsv->freed[0], block_rsv->freed[1]); | ||
3728 | |||
3729 | return -ENOSPC; | 3749 | return -ENOSPC; |
3730 | } | 3750 | } |
3731 | 3751 | ||
@@ -4012,6 +4032,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
4012 | 4032 | ||
4013 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4033 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
4014 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | 4034 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); |
4035 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); | ||
4015 | 4036 | ||
4016 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 4037 | spin_lock(&BTRFS_I(inode)->accounting_lock); |
4017 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); | 4038 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); |
@@ -5633,6 +5654,7 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
5633 | struct btrfs_root *root, u32 blocksize) | 5654 | struct btrfs_root *root, u32 blocksize) |
5634 | { | 5655 | { |
5635 | struct btrfs_block_rsv *block_rsv; | 5656 | struct btrfs_block_rsv *block_rsv; |
5657 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; | ||
5636 | int ret; | 5658 | int ret; |
5637 | 5659 | ||
5638 | block_rsv = get_block_rsv(trans, root); | 5660 | block_rsv = get_block_rsv(trans, root); |
@@ -5640,14 +5662,39 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
5640 | if (block_rsv->size == 0) { | 5662 | if (block_rsv->size == 0) { |
5641 | ret = reserve_metadata_bytes(trans, root, block_rsv, | 5663 | ret = reserve_metadata_bytes(trans, root, block_rsv, |
5642 | blocksize, 0); | 5664 | blocksize, 0); |
5643 | if (ret) | 5665 | /* |
5666 | * If we couldn't reserve metadata bytes try and use some from | ||
5667 | * the global reserve. | ||
5668 | */ | ||
5669 | if (ret && block_rsv != global_rsv) { | ||
5670 | ret = block_rsv_use_bytes(global_rsv, blocksize); | ||
5671 | if (!ret) | ||
5672 | return global_rsv; | ||
5673 | return ERR_PTR(ret); | ||
5674 | } else if (ret) { | ||
5644 | return ERR_PTR(ret); | 5675 | return ERR_PTR(ret); |
5676 | } | ||
5645 | return block_rsv; | 5677 | return block_rsv; |
5646 | } | 5678 | } |
5647 | 5679 | ||
5648 | ret = block_rsv_use_bytes(block_rsv, blocksize); | 5680 | ret = block_rsv_use_bytes(block_rsv, blocksize); |
5649 | if (!ret) | 5681 | if (!ret) |
5650 | return block_rsv; | 5682 | return block_rsv; |
5683 | if (ret) { | ||
5684 | WARN_ON(1); | ||
5685 | ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize, | ||
5686 | 0); | ||
5687 | if (!ret) { | ||
5688 | spin_lock(&block_rsv->lock); | ||
5689 | block_rsv->size += blocksize; | ||
5690 | spin_unlock(&block_rsv->lock); | ||
5691 | return block_rsv; | ||
5692 | } else if (ret && block_rsv != global_rsv) { | ||
5693 | ret = block_rsv_use_bytes(global_rsv, blocksize); | ||
5694 | if (!ret) | ||
5695 | return global_rsv; | ||
5696 | } | ||
5697 | } | ||
5651 | 5698 | ||
5652 | return ERR_PTR(-ENOSPC); | 5699 | return ERR_PTR(-ENOSPC); |
5653 | } | 5700 | } |
@@ -6221,6 +6268,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6221 | BUG_ON(!wc); | 6268 | BUG_ON(!wc); |
6222 | 6269 | ||
6223 | trans = btrfs_start_transaction(tree_root, 0); | 6270 | trans = btrfs_start_transaction(tree_root, 0); |
6271 | BUG_ON(IS_ERR(trans)); | ||
6272 | |||
6224 | if (block_rsv) | 6273 | if (block_rsv) |
6225 | trans->block_rsv = block_rsv; | 6274 | trans->block_rsv = block_rsv; |
6226 | 6275 | ||
@@ -6318,6 +6367,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6318 | 6367 | ||
6319 | btrfs_end_transaction_throttle(trans, tree_root); | 6368 | btrfs_end_transaction_throttle(trans, tree_root); |
6320 | trans = btrfs_start_transaction(tree_root, 0); | 6369 | trans = btrfs_start_transaction(tree_root, 0); |
6370 | BUG_ON(IS_ERR(trans)); | ||
6321 | if (block_rsv) | 6371 | if (block_rsv) |
6322 | trans->block_rsv = block_rsv; | 6372 | trans->block_rsv = block_rsv; |
6323 | } | 6373 | } |
@@ -6446,6 +6496,8 @@ static noinline int relocate_inode_pages(struct inode *inode, u64 start, | |||
6446 | int ret = 0; | 6496 | int ret = 0; |
6447 | 6497 | ||
6448 | ra = kzalloc(sizeof(*ra), GFP_NOFS); | 6498 | ra = kzalloc(sizeof(*ra), GFP_NOFS); |
6499 | if (!ra) | ||
6500 | return -ENOMEM; | ||
6449 | 6501 | ||
6450 | mutex_lock(&inode->i_mutex); | 6502 | mutex_lock(&inode->i_mutex); |
6451 | first_index = start >> PAGE_CACHE_SHIFT; | 6503 | first_index = start >> PAGE_CACHE_SHIFT; |
@@ -7477,7 +7529,7 @@ int btrfs_drop_dead_reloc_roots(struct btrfs_root *root) | |||
7477 | BUG_ON(reloc_root->commit_root != NULL); | 7529 | BUG_ON(reloc_root->commit_root != NULL); |
7478 | while (1) { | 7530 | while (1) { |
7479 | trans = btrfs_join_transaction(root, 1); | 7531 | trans = btrfs_join_transaction(root, 1); |
7480 | BUG_ON(!trans); | 7532 | BUG_ON(IS_ERR(trans)); |
7481 | 7533 | ||
7482 | mutex_lock(&root->fs_info->drop_mutex); | 7534 | mutex_lock(&root->fs_info->drop_mutex); |
7483 | ret = btrfs_drop_snapshot(trans, reloc_root); | 7535 | ret = btrfs_drop_snapshot(trans, reloc_root); |
@@ -7535,7 +7587,7 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root) | |||
7535 | 7587 | ||
7536 | if (found) { | 7588 | if (found) { |
7537 | trans = btrfs_start_transaction(root, 1); | 7589 | trans = btrfs_start_transaction(root, 1); |
7538 | BUG_ON(!trans); | 7590 | BUG_ON(IS_ERR(trans)); |
7539 | ret = btrfs_commit_transaction(trans, root); | 7591 | ret = btrfs_commit_transaction(trans, root); |
7540 | BUG_ON(ret); | 7592 | BUG_ON(ret); |
7541 | } | 7593 | } |
@@ -7779,7 +7831,7 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root, | |||
7779 | 7831 | ||
7780 | 7832 | ||
7781 | trans = btrfs_start_transaction(extent_root, 1); | 7833 | trans = btrfs_start_transaction(extent_root, 1); |
7782 | BUG_ON(!trans); | 7834 | BUG_ON(IS_ERR(trans)); |
7783 | 7835 | ||
7784 | if (extent_key->objectid == 0) { | 7836 | if (extent_key->objectid == 0) { |
7785 | ret = del_extent_zero(trans, extent_root, path, extent_key); | 7837 | ret = del_extent_zero(trans, extent_root, path, extent_key); |
@@ -7970,13 +8022,14 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache) | |||
7970 | 8022 | ||
7971 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + | 8023 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + |
7972 | sinfo->bytes_may_use + sinfo->bytes_readonly + | 8024 | sinfo->bytes_may_use + sinfo->bytes_readonly + |
7973 | cache->reserved_pinned + num_bytes < sinfo->total_bytes) { | 8025 | cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { |
7974 | sinfo->bytes_readonly += num_bytes; | 8026 | sinfo->bytes_readonly += num_bytes; |
7975 | sinfo->bytes_reserved += cache->reserved_pinned; | 8027 | sinfo->bytes_reserved += cache->reserved_pinned; |
7976 | cache->reserved_pinned = 0; | 8028 | cache->reserved_pinned = 0; |
7977 | cache->ro = 1; | 8029 | cache->ro = 1; |
7978 | ret = 0; | 8030 | ret = 0; |
7979 | } | 8031 | } |
8032 | |||
7980 | spin_unlock(&cache->lock); | 8033 | spin_unlock(&cache->lock); |
7981 | spin_unlock(&sinfo->lock); | 8034 | spin_unlock(&sinfo->lock); |
7982 | return ret; | 8035 | return ret; |
@@ -8012,6 +8065,62 @@ out: | |||
8012 | return ret; | 8065 | return ret; |
8013 | } | 8066 | } |
8014 | 8067 | ||
8068 | /* | ||
8069 | * helper to account the unused space of all the readonly block group in the | ||
8070 | * list. takes mirrors into account. | ||
8071 | */ | ||
8072 | static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list) | ||
8073 | { | ||
8074 | struct btrfs_block_group_cache *block_group; | ||
8075 | u64 free_bytes = 0; | ||
8076 | int factor; | ||
8077 | |||
8078 | list_for_each_entry(block_group, groups_list, list) { | ||
8079 | spin_lock(&block_group->lock); | ||
8080 | |||
8081 | if (!block_group->ro) { | ||
8082 | spin_unlock(&block_group->lock); | ||
8083 | continue; | ||
8084 | } | ||
8085 | |||
8086 | if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 | | ||
8087 | BTRFS_BLOCK_GROUP_RAID10 | | ||
8088 | BTRFS_BLOCK_GROUP_DUP)) | ||
8089 | factor = 2; | ||
8090 | else | ||
8091 | factor = 1; | ||
8092 | |||
8093 | free_bytes += (block_group->key.offset - | ||
8094 | btrfs_block_group_used(&block_group->item)) * | ||
8095 | factor; | ||
8096 | |||
8097 | spin_unlock(&block_group->lock); | ||
8098 | } | ||
8099 | |||
8100 | return free_bytes; | ||
8101 | } | ||
8102 | |||
8103 | /* | ||
8104 | * helper to account the unused space of all the readonly block group in the | ||
8105 | * space_info. takes mirrors into account. | ||
8106 | */ | ||
8107 | u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) | ||
8108 | { | ||
8109 | int i; | ||
8110 | u64 free_bytes = 0; | ||
8111 | |||
8112 | spin_lock(&sinfo->lock); | ||
8113 | |||
8114 | for(i = 0; i < BTRFS_NR_RAID_TYPES; i++) | ||
8115 | if (!list_empty(&sinfo->block_groups[i])) | ||
8116 | free_bytes += __btrfs_get_ro_block_group_free_space( | ||
8117 | &sinfo->block_groups[i]); | ||
8118 | |||
8119 | spin_unlock(&sinfo->lock); | ||
8120 | |||
8121 | return free_bytes; | ||
8122 | } | ||
8123 | |||
8015 | int btrfs_set_block_group_rw(struct btrfs_root *root, | 8124 | int btrfs_set_block_group_rw(struct btrfs_root *root, |
8016 | struct btrfs_block_group_cache *cache) | 8125 | struct btrfs_block_group_cache *cache) |
8017 | { | 8126 | { |
@@ -8092,7 +8201,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
8092 | mutex_lock(&root->fs_info->chunk_mutex); | 8201 | mutex_lock(&root->fs_info->chunk_mutex); |
8093 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | 8202 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { |
8094 | u64 min_free = btrfs_block_group_used(&block_group->item); | 8203 | u64 min_free = btrfs_block_group_used(&block_group->item); |
8095 | u64 dev_offset, max_avail; | 8204 | u64 dev_offset; |
8096 | 8205 | ||
8097 | /* | 8206 | /* |
8098 | * check to make sure we can actually find a chunk with enough | 8207 | * check to make sure we can actually find a chunk with enough |
@@ -8100,7 +8209,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
8100 | */ | 8209 | */ |
8101 | if (device->total_bytes > device->bytes_used + min_free) { | 8210 | if (device->total_bytes > device->bytes_used + min_free) { |
8102 | ret = find_free_dev_extent(NULL, device, min_free, | 8211 | ret = find_free_dev_extent(NULL, device, min_free, |
8103 | &dev_offset, &max_avail); | 8212 | &dev_offset, NULL); |
8104 | if (!ret) | 8213 | if (!ret) |
8105 | break; | 8214 | break; |
8106 | ret = -1; | 8215 | ret = -1; |
@@ -8213,6 +8322,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
8213 | if (block_group->cached == BTRFS_CACHE_STARTED) | 8322 | if (block_group->cached == BTRFS_CACHE_STARTED) |
8214 | wait_block_group_cache_done(block_group); | 8323 | wait_block_group_cache_done(block_group); |
8215 | 8324 | ||
8325 | /* | ||
8326 | * We haven't cached this block group, which means we could | ||
8327 | * possibly have excluded extents on this block group. | ||
8328 | */ | ||
8329 | if (block_group->cached == BTRFS_CACHE_NO) | ||
8330 | free_excluded_extents(info->extent_root, block_group); | ||
8331 | |||
8216 | btrfs_remove_free_space_cache(block_group); | 8332 | btrfs_remove_free_space_cache(block_group); |
8217 | btrfs_put_block_group(block_group); | 8333 | btrfs_put_block_group(block_group); |
8218 | 8334 | ||
@@ -8328,6 +8444,13 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
8328 | cache->sectorsize = root->sectorsize; | 8444 | cache->sectorsize = root->sectorsize; |
8329 | 8445 | ||
8330 | /* | 8446 | /* |
8447 | * We need to exclude the super stripes now so that the space | ||
8448 | * info has super bytes accounted for, otherwise we'll think | ||
8449 | * we have more space than we actually do. | ||
8450 | */ | ||
8451 | exclude_super_stripes(root, cache); | ||
8452 | |||
8453 | /* | ||
8331 | * check for two cases, either we are full, and therefore | 8454 | * check for two cases, either we are full, and therefore |
8332 | * don't need to bother with the caching work since we won't | 8455 | * don't need to bother with the caching work since we won't |
8333 | * find any space, or we are empty, and we can just add all | 8456 | * find any space, or we are empty, and we can just add all |
@@ -8335,12 +8458,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
8335 | * time, particularly in the full case. | 8458 | * time, particularly in the full case. |
8336 | */ | 8459 | */ |
8337 | if (found_key.offset == btrfs_block_group_used(&cache->item)) { | 8460 | if (found_key.offset == btrfs_block_group_used(&cache->item)) { |
8338 | exclude_super_stripes(root, cache); | ||
8339 | cache->last_byte_to_unpin = (u64)-1; | 8461 | cache->last_byte_to_unpin = (u64)-1; |
8340 | cache->cached = BTRFS_CACHE_FINISHED; | 8462 | cache->cached = BTRFS_CACHE_FINISHED; |
8341 | free_excluded_extents(root, cache); | 8463 | free_excluded_extents(root, cache); |
8342 | } else if (btrfs_block_group_used(&cache->item) == 0) { | 8464 | } else if (btrfs_block_group_used(&cache->item) == 0) { |
8343 | exclude_super_stripes(root, cache); | ||
8344 | cache->last_byte_to_unpin = (u64)-1; | 8465 | cache->last_byte_to_unpin = (u64)-1; |
8345 | cache->cached = BTRFS_CACHE_FINISHED; | 8466 | cache->cached = BTRFS_CACHE_FINISHED; |
8346 | add_new_free_space(cache, root->fs_info, | 8467 | add_new_free_space(cache, root->fs_info, |
@@ -8584,3 +8705,14 @@ out: | |||
8584 | btrfs_free_path(path); | 8705 | btrfs_free_path(path); |
8585 | return ret; | 8706 | return ret; |
8586 | } | 8707 | } |
8708 | |||
8709 | int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | ||
8710 | { | ||
8711 | return unpin_extent_range(root, start, end); | ||
8712 | } | ||
8713 | |||
8714 | int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, | ||
8715 | u64 num_bytes) | ||
8716 | { | ||
8717 | return btrfs_discard_extent(root, bytenr, num_bytes); | ||
8718 | } | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3e86b9f36507..5e76a474cb7e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -1865,7 +1865,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num, | |||
1865 | bio_get(bio); | 1865 | bio_get(bio); |
1866 | 1866 | ||
1867 | if (tree->ops && tree->ops->submit_bio_hook) | 1867 | if (tree->ops && tree->ops->submit_bio_hook) |
1868 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, | 1868 | ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio, |
1869 | mirror_num, bio_flags, start); | 1869 | mirror_num, bio_flags, start); |
1870 | else | 1870 | else |
1871 | submit_bio(rw, bio); | 1871 | submit_bio(rw, bio); |
@@ -1920,6 +1920,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, | |||
1920 | nr = bio_get_nr_vecs(bdev); | 1920 | nr = bio_get_nr_vecs(bdev); |
1921 | 1921 | ||
1922 | bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); | 1922 | bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); |
1923 | if (!bio) | ||
1924 | return -ENOMEM; | ||
1923 | 1925 | ||
1924 | bio_add_page(bio, page, page_size, offset); | 1926 | bio_add_page(bio, page, page_size, offset); |
1925 | bio->bi_end_io = end_io_func; | 1927 | bio->bi_end_io = end_io_func; |
@@ -2028,8 +2030,11 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2028 | BUG_ON(extent_map_end(em) <= cur); | 2030 | BUG_ON(extent_map_end(em) <= cur); |
2029 | BUG_ON(end < cur); | 2031 | BUG_ON(end < cur); |
2030 | 2032 | ||
2031 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | 2033 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
2032 | this_bio_flag = EXTENT_BIO_COMPRESSED; | 2034 | this_bio_flag = EXTENT_BIO_COMPRESSED; |
2035 | extent_set_compress_type(&this_bio_flag, | ||
2036 | em->compress_type); | ||
2037 | } | ||
2033 | 2038 | ||
2034 | iosize = min(extent_map_end(em) - cur, end - cur + 1); | 2039 | iosize = min(extent_map_end(em) - cur, end - cur + 1); |
2035 | cur_end = min(extent_map_end(em) - 1, end); | 2040 | cur_end = min(extent_map_end(em) - 1, end); |
@@ -2123,7 +2128,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, | |||
2123 | ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, | 2128 | ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, |
2124 | &bio_flags); | 2129 | &bio_flags); |
2125 | if (bio) | 2130 | if (bio) |
2126 | submit_one_bio(READ, bio, 0, bio_flags); | 2131 | ret = submit_one_bio(READ, bio, 0, bio_flags); |
2127 | return ret; | 2132 | return ret; |
2128 | } | 2133 | } |
2129 | 2134 | ||
@@ -3072,6 +3077,8 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
3072 | #endif | 3077 | #endif |
3073 | 3078 | ||
3074 | eb = kmem_cache_zalloc(extent_buffer_cache, mask); | 3079 | eb = kmem_cache_zalloc(extent_buffer_cache, mask); |
3080 | if (eb == NULL) | ||
3081 | return NULL; | ||
3075 | eb->start = start; | 3082 | eb->start = start; |
3076 | eb->len = len; | 3083 | eb->len = len; |
3077 | spin_lock_init(&eb->lock); | 3084 | spin_lock_init(&eb->lock); |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 4183c8178f01..7083cfafd061 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -20,8 +20,12 @@ | |||
20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
21 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) | 21 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) |
22 | 22 | ||
23 | /* flags for bio submission */ | 23 | /* |
24 | * flags for bio submission. The high bits indicate the compression | ||
25 | * type for this bio | ||
26 | */ | ||
24 | #define EXTENT_BIO_COMPRESSED 1 | 27 | #define EXTENT_BIO_COMPRESSED 1 |
28 | #define EXTENT_BIO_FLAG_SHIFT 16 | ||
25 | 29 | ||
26 | /* these are bit numbers for test/set bit */ | 30 | /* these are bit numbers for test/set bit */ |
27 | #define EXTENT_BUFFER_UPTODATE 0 | 31 | #define EXTENT_BUFFER_UPTODATE 0 |
@@ -135,6 +139,17 @@ struct extent_buffer { | |||
135 | wait_queue_head_t lock_wq; | 139 | wait_queue_head_t lock_wq; |
136 | }; | 140 | }; |
137 | 141 | ||
142 | static inline void extent_set_compress_type(unsigned long *bio_flags, | ||
143 | int compress_type) | ||
144 | { | ||
145 | *bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT; | ||
146 | } | ||
147 | |||
148 | static inline int extent_compress_type(unsigned long bio_flags) | ||
149 | { | ||
150 | return bio_flags >> EXTENT_BIO_FLAG_SHIFT; | ||
151 | } | ||
152 | |||
138 | struct extent_map_tree; | 153 | struct extent_map_tree; |
139 | 154 | ||
140 | static inline struct extent_state *extent_state_next(struct extent_state *state) | 155 | static inline struct extent_state *extent_state_next(struct extent_state *state) |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 23cb8da3ff66..b0e1fce12530 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/module.h> | 3 | #include <linux/module.h> |
4 | #include <linux/spinlock.h> | 4 | #include <linux/spinlock.h> |
5 | #include <linux/hardirq.h> | 5 | #include <linux/hardirq.h> |
6 | #include "ctree.h" | ||
6 | #include "extent_map.h" | 7 | #include "extent_map.h" |
7 | 8 | ||
8 | 9 | ||
@@ -54,6 +55,7 @@ struct extent_map *alloc_extent_map(gfp_t mask) | |||
54 | return em; | 55 | return em; |
55 | em->in_tree = 0; | 56 | em->in_tree = 0; |
56 | em->flags = 0; | 57 | em->flags = 0; |
58 | em->compress_type = BTRFS_COMPRESS_NONE; | ||
57 | atomic_set(&em->refs, 1); | 59 | atomic_set(&em->refs, 1); |
58 | return em; | 60 | return em; |
59 | } | 61 | } |
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index ab6d74b6e647..28b44dbd1e35 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
@@ -26,7 +26,8 @@ struct extent_map { | |||
26 | unsigned long flags; | 26 | unsigned long flags; |
27 | struct block_device *bdev; | 27 | struct block_device *bdev; |
28 | atomic_t refs; | 28 | atomic_t refs; |
29 | int in_tree; | 29 | unsigned int in_tree:1; |
30 | unsigned int compress_type:4; | ||
30 | }; | 31 | }; |
31 | 32 | ||
32 | struct extent_map_tree { | 33 | struct extent_map_tree { |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index a562a250ae77..4f19a3e1bf32 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -536,6 +536,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, | |||
536 | root = root->fs_info->csum_root; | 536 | root = root->fs_info->csum_root; |
537 | 537 | ||
538 | path = btrfs_alloc_path(); | 538 | path = btrfs_alloc_path(); |
539 | if (!path) | ||
540 | return -ENOMEM; | ||
539 | 541 | ||
540 | while (1) { | 542 | while (1) { |
541 | key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; | 543 | key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
@@ -548,7 +550,10 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, | |||
548 | if (path->slots[0] == 0) | 550 | if (path->slots[0] == 0) |
549 | goto out; | 551 | goto out; |
550 | path->slots[0]--; | 552 | path->slots[0]--; |
553 | } else if (ret < 0) { | ||
554 | goto out; | ||
551 | } | 555 | } |
556 | |||
552 | leaf = path->nodes[0]; | 557 | leaf = path->nodes[0]; |
553 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | 558 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); |
554 | 559 | ||
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 66836d85763b..c1d3a818731a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/string.h> | 24 | #include <linux/string.h> |
25 | #include <linux/backing-dev.h> | 25 | #include <linux/backing-dev.h> |
26 | #include <linux/mpage.h> | 26 | #include <linux/mpage.h> |
27 | #include <linux/falloc.h> | ||
27 | #include <linux/swap.h> | 28 | #include <linux/swap.h> |
28 | #include <linux/writeback.h> | 29 | #include <linux/writeback.h> |
29 | #include <linux/statfs.h> | 30 | #include <linux/statfs.h> |
@@ -224,6 +225,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
224 | 225 | ||
225 | split->bdev = em->bdev; | 226 | split->bdev = em->bdev; |
226 | split->flags = flags; | 227 | split->flags = flags; |
228 | split->compress_type = em->compress_type; | ||
227 | ret = add_extent_mapping(em_tree, split); | 229 | ret = add_extent_mapping(em_tree, split); |
228 | BUG_ON(ret); | 230 | BUG_ON(ret); |
229 | free_extent_map(split); | 231 | free_extent_map(split); |
@@ -238,6 +240,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
238 | split->len = em->start + em->len - (start + len); | 240 | split->len = em->start + em->len - (start + len); |
239 | split->bdev = em->bdev; | 241 | split->bdev = em->bdev; |
240 | split->flags = flags; | 242 | split->flags = flags; |
243 | split->compress_type = em->compress_type; | ||
241 | 244 | ||
242 | if (compressed) { | 245 | if (compressed) { |
243 | split->block_len = em->block_len; | 246 | split->block_len = em->block_len; |
@@ -790,8 +793,12 @@ again: | |||
790 | for (i = 0; i < num_pages; i++) { | 793 | for (i = 0; i < num_pages; i++) { |
791 | pages[i] = grab_cache_page(inode->i_mapping, index + i); | 794 | pages[i] = grab_cache_page(inode->i_mapping, index + i); |
792 | if (!pages[i]) { | 795 | if (!pages[i]) { |
793 | err = -ENOMEM; | 796 | int c; |
794 | BUG_ON(1); | 797 | for (c = i - 1; c >= 0; c--) { |
798 | unlock_page(pages[c]); | ||
799 | page_cache_release(pages[c]); | ||
800 | } | ||
801 | return -ENOMEM; | ||
795 | } | 802 | } |
796 | wait_on_page_writeback(pages[i]); | 803 | wait_on_page_writeback(pages[i]); |
797 | } | 804 | } |
@@ -890,6 +897,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
890 | if (err) | 897 | if (err) |
891 | goto out; | 898 | goto out; |
892 | 899 | ||
900 | /* | ||
901 | * If BTRFS flips readonly due to some impossible error | ||
902 | * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR), | ||
903 | * although we have opened a file as writable, we have | ||
904 | * to stop this write operation to ensure FS consistency. | ||
905 | */ | ||
906 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
907 | err = -EROFS; | ||
908 | goto out; | ||
909 | } | ||
910 | |||
893 | file_update_time(file); | 911 | file_update_time(file); |
894 | BTRFS_I(inode)->sequence++; | 912 | BTRFS_I(inode)->sequence++; |
895 | 913 | ||
@@ -932,6 +950,10 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
932 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / | 950 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / |
933 | (sizeof(struct page *))); | 951 | (sizeof(struct page *))); |
934 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 952 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
953 | if (!pages) { | ||
954 | ret = -ENOMEM; | ||
955 | goto out; | ||
956 | } | ||
935 | 957 | ||
936 | /* generic_write_checks can change our pos */ | 958 | /* generic_write_checks can change our pos */ |
937 | start_pos = pos; | 959 | start_pos = pos; |
@@ -970,8 +992,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
970 | size_t write_bytes = min(iov_iter_count(&i), | 992 | size_t write_bytes = min(iov_iter_count(&i), |
971 | nrptrs * (size_t)PAGE_CACHE_SIZE - | 993 | nrptrs * (size_t)PAGE_CACHE_SIZE - |
972 | offset); | 994 | offset); |
973 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> | 995 | size_t num_pages = (write_bytes + offset + |
974 | PAGE_CACHE_SHIFT; | 996 | PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
975 | 997 | ||
976 | WARN_ON(num_pages > nrptrs); | 998 | WARN_ON(num_pages > nrptrs); |
977 | memset(pages, 0, sizeof(struct page *) * nrptrs); | 999 | memset(pages, 0, sizeof(struct page *) * nrptrs); |
@@ -1001,8 +1023,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1001 | 1023 | ||
1002 | copied = btrfs_copy_from_user(pos, num_pages, | 1024 | copied = btrfs_copy_from_user(pos, num_pages, |
1003 | write_bytes, pages, &i); | 1025 | write_bytes, pages, &i); |
1004 | dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >> | 1026 | dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >> |
1005 | PAGE_CACHE_SHIFT; | 1027 | PAGE_CACHE_SHIFT; |
1006 | 1028 | ||
1007 | if (num_pages > dirty_pages) { | 1029 | if (num_pages > dirty_pages) { |
1008 | if (copied > 0) | 1030 | if (copied > 0) |
@@ -1237,6 +1259,117 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) | |||
1237 | return 0; | 1259 | return 0; |
1238 | } | 1260 | } |
1239 | 1261 | ||
1262 | static long btrfs_fallocate(struct file *file, int mode, | ||
1263 | loff_t offset, loff_t len) | ||
1264 | { | ||
1265 | struct inode *inode = file->f_path.dentry->d_inode; | ||
1266 | struct extent_state *cached_state = NULL; | ||
1267 | u64 cur_offset; | ||
1268 | u64 last_byte; | ||
1269 | u64 alloc_start; | ||
1270 | u64 alloc_end; | ||
1271 | u64 alloc_hint = 0; | ||
1272 | u64 locked_end; | ||
1273 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | ||
1274 | struct extent_map *em; | ||
1275 | int ret; | ||
1276 | |||
1277 | alloc_start = offset & ~mask; | ||
1278 | alloc_end = (offset + len + mask) & ~mask; | ||
1279 | |||
1280 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ | ||
1281 | if (mode & ~FALLOC_FL_KEEP_SIZE) | ||
1282 | return -EOPNOTSUPP; | ||
1283 | |||
1284 | /* | ||
1285 | * wait for ordered IO before we have any locks. We'll loop again | ||
1286 | * below with the locks held. | ||
1287 | */ | ||
1288 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); | ||
1289 | |||
1290 | mutex_lock(&inode->i_mutex); | ||
1291 | ret = inode_newsize_ok(inode, alloc_end); | ||
1292 | if (ret) | ||
1293 | goto out; | ||
1294 | |||
1295 | if (alloc_start > inode->i_size) { | ||
1296 | ret = btrfs_cont_expand(inode, alloc_start); | ||
1297 | if (ret) | ||
1298 | goto out; | ||
1299 | } | ||
1300 | |||
1301 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); | ||
1302 | if (ret) | ||
1303 | goto out; | ||
1304 | |||
1305 | locked_end = alloc_end - 1; | ||
1306 | while (1) { | ||
1307 | struct btrfs_ordered_extent *ordered; | ||
1308 | |||
1309 | /* the extent lock is ordered inside the running | ||
1310 | * transaction | ||
1311 | */ | ||
1312 | lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, | ||
1313 | locked_end, 0, &cached_state, GFP_NOFS); | ||
1314 | ordered = btrfs_lookup_first_ordered_extent(inode, | ||
1315 | alloc_end - 1); | ||
1316 | if (ordered && | ||
1317 | ordered->file_offset + ordered->len > alloc_start && | ||
1318 | ordered->file_offset < alloc_end) { | ||
1319 | btrfs_put_ordered_extent(ordered); | ||
1320 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | ||
1321 | alloc_start, locked_end, | ||
1322 | &cached_state, GFP_NOFS); | ||
1323 | /* | ||
1324 | * we can't wait on the range with the transaction | ||
1325 | * running or with the extent lock held | ||
1326 | */ | ||
1327 | btrfs_wait_ordered_range(inode, alloc_start, | ||
1328 | alloc_end - alloc_start); | ||
1329 | } else { | ||
1330 | if (ordered) | ||
1331 | btrfs_put_ordered_extent(ordered); | ||
1332 | break; | ||
1333 | } | ||
1334 | } | ||
1335 | |||
1336 | cur_offset = alloc_start; | ||
1337 | while (1) { | ||
1338 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, | ||
1339 | alloc_end - cur_offset, 0); | ||
1340 | BUG_ON(IS_ERR(em) || !em); | ||
1341 | last_byte = min(extent_map_end(em), alloc_end); | ||
1342 | last_byte = (last_byte + mask) & ~mask; | ||
1343 | if (em->block_start == EXTENT_MAP_HOLE || | ||
1344 | (cur_offset >= inode->i_size && | ||
1345 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | ||
1346 | ret = btrfs_prealloc_file_range(inode, mode, cur_offset, | ||
1347 | last_byte - cur_offset, | ||
1348 | 1 << inode->i_blkbits, | ||
1349 | offset + len, | ||
1350 | &alloc_hint); | ||
1351 | if (ret < 0) { | ||
1352 | free_extent_map(em); | ||
1353 | break; | ||
1354 | } | ||
1355 | } | ||
1356 | free_extent_map(em); | ||
1357 | |||
1358 | cur_offset = last_byte; | ||
1359 | if (cur_offset >= alloc_end) { | ||
1360 | ret = 0; | ||
1361 | break; | ||
1362 | } | ||
1363 | } | ||
1364 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | ||
1365 | &cached_state, GFP_NOFS); | ||
1366 | |||
1367 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); | ||
1368 | out: | ||
1369 | mutex_unlock(&inode->i_mutex); | ||
1370 | return ret; | ||
1371 | } | ||
1372 | |||
1240 | const struct file_operations btrfs_file_operations = { | 1373 | const struct file_operations btrfs_file_operations = { |
1241 | .llseek = generic_file_llseek, | 1374 | .llseek = generic_file_llseek, |
1242 | .read = do_sync_read, | 1375 | .read = do_sync_read, |
@@ -1248,6 +1381,7 @@ const struct file_operations btrfs_file_operations = { | |||
1248 | .open = generic_file_open, | 1381 | .open = generic_file_open, |
1249 | .release = btrfs_release_file, | 1382 | .release = btrfs_release_file, |
1250 | .fsync = btrfs_sync_file, | 1383 | .fsync = btrfs_sync_file, |
1384 | .fallocate = btrfs_fallocate, | ||
1251 | .unlocked_ioctl = btrfs_ioctl, | 1385 | .unlocked_ioctl = btrfs_ioctl, |
1252 | #ifdef CONFIG_COMPAT | 1386 | #ifdef CONFIG_COMPAT |
1253 | .compat_ioctl = btrfs_ioctl, | 1387 | .compat_ioctl = btrfs_ioctl, |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 60d684266959..a0390657451b 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -987,11 +987,18 @@ tree_search_offset(struct btrfs_block_group_cache *block_group, | |||
987 | return entry; | 987 | return entry; |
988 | } | 988 | } |
989 | 989 | ||
990 | static void unlink_free_space(struct btrfs_block_group_cache *block_group, | 990 | static inline void |
991 | struct btrfs_free_space *info) | 991 | __unlink_free_space(struct btrfs_block_group_cache *block_group, |
992 | struct btrfs_free_space *info) | ||
992 | { | 993 | { |
993 | rb_erase(&info->offset_index, &block_group->free_space_offset); | 994 | rb_erase(&info->offset_index, &block_group->free_space_offset); |
994 | block_group->free_extents--; | 995 | block_group->free_extents--; |
996 | } | ||
997 | |||
998 | static void unlink_free_space(struct btrfs_block_group_cache *block_group, | ||
999 | struct btrfs_free_space *info) | ||
1000 | { | ||
1001 | __unlink_free_space(block_group, info); | ||
995 | block_group->free_space -= info->bytes; | 1002 | block_group->free_space -= info->bytes; |
996 | } | 1003 | } |
997 | 1004 | ||
@@ -1016,14 +1023,18 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) | |||
1016 | u64 max_bytes; | 1023 | u64 max_bytes; |
1017 | u64 bitmap_bytes; | 1024 | u64 bitmap_bytes; |
1018 | u64 extent_bytes; | 1025 | u64 extent_bytes; |
1026 | u64 size = block_group->key.offset; | ||
1019 | 1027 | ||
1020 | /* | 1028 | /* |
1021 | * The goal is to keep the total amount of memory used per 1gb of space | 1029 | * The goal is to keep the total amount of memory used per 1gb of space |
1022 | * at or below 32k, so we need to adjust how much memory we allow to be | 1030 | * at or below 32k, so we need to adjust how much memory we allow to be |
1023 | * used by extent based free space tracking | 1031 | * used by extent based free space tracking |
1024 | */ | 1032 | */ |
1025 | max_bytes = MAX_CACHE_BYTES_PER_GIG * | 1033 | if (size < 1024 * 1024 * 1024) |
1026 | (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); | 1034 | max_bytes = MAX_CACHE_BYTES_PER_GIG; |
1035 | else | ||
1036 | max_bytes = MAX_CACHE_BYTES_PER_GIG * | ||
1037 | div64_u64(size, 1024 * 1024 * 1024); | ||
1027 | 1038 | ||
1028 | /* | 1039 | /* |
1029 | * we want to account for 1 more bitmap than what we have so we can make | 1040 | * we want to account for 1 more bitmap than what we have so we can make |
@@ -1171,6 +1182,16 @@ static void add_new_bitmap(struct btrfs_block_group_cache *block_group, | |||
1171 | recalculate_thresholds(block_group); | 1182 | recalculate_thresholds(block_group); |
1172 | } | 1183 | } |
1173 | 1184 | ||
1185 | static void free_bitmap(struct btrfs_block_group_cache *block_group, | ||
1186 | struct btrfs_free_space *bitmap_info) | ||
1187 | { | ||
1188 | unlink_free_space(block_group, bitmap_info); | ||
1189 | kfree(bitmap_info->bitmap); | ||
1190 | kfree(bitmap_info); | ||
1191 | block_group->total_bitmaps--; | ||
1192 | recalculate_thresholds(block_group); | ||
1193 | } | ||
1194 | |||
1174 | static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group, | 1195 | static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group, |
1175 | struct btrfs_free_space *bitmap_info, | 1196 | struct btrfs_free_space *bitmap_info, |
1176 | u64 *offset, u64 *bytes) | 1197 | u64 *offset, u64 *bytes) |
@@ -1195,6 +1216,7 @@ again: | |||
1195 | */ | 1216 | */ |
1196 | search_start = *offset; | 1217 | search_start = *offset; |
1197 | search_bytes = *bytes; | 1218 | search_bytes = *bytes; |
1219 | search_bytes = min(search_bytes, end - search_start + 1); | ||
1198 | ret = search_bitmap(block_group, bitmap_info, &search_start, | 1220 | ret = search_bitmap(block_group, bitmap_info, &search_start, |
1199 | &search_bytes); | 1221 | &search_bytes); |
1200 | BUG_ON(ret < 0 || search_start != *offset); | 1222 | BUG_ON(ret < 0 || search_start != *offset); |
@@ -1211,13 +1233,8 @@ again: | |||
1211 | 1233 | ||
1212 | if (*bytes) { | 1234 | if (*bytes) { |
1213 | struct rb_node *next = rb_next(&bitmap_info->offset_index); | 1235 | struct rb_node *next = rb_next(&bitmap_info->offset_index); |
1214 | if (!bitmap_info->bytes) { | 1236 | if (!bitmap_info->bytes) |
1215 | unlink_free_space(block_group, bitmap_info); | 1237 | free_bitmap(block_group, bitmap_info); |
1216 | kfree(bitmap_info->bitmap); | ||
1217 | kfree(bitmap_info); | ||
1218 | block_group->total_bitmaps--; | ||
1219 | recalculate_thresholds(block_group); | ||
1220 | } | ||
1221 | 1238 | ||
1222 | /* | 1239 | /* |
1223 | * no entry after this bitmap, but we still have bytes to | 1240 | * no entry after this bitmap, but we still have bytes to |
@@ -1250,13 +1267,8 @@ again: | |||
1250 | return -EAGAIN; | 1267 | return -EAGAIN; |
1251 | 1268 | ||
1252 | goto again; | 1269 | goto again; |
1253 | } else if (!bitmap_info->bytes) { | 1270 | } else if (!bitmap_info->bytes) |
1254 | unlink_free_space(block_group, bitmap_info); | 1271 | free_bitmap(block_group, bitmap_info); |
1255 | kfree(bitmap_info->bitmap); | ||
1256 | kfree(bitmap_info); | ||
1257 | block_group->total_bitmaps--; | ||
1258 | recalculate_thresholds(block_group); | ||
1259 | } | ||
1260 | 1272 | ||
1261 | return 0; | 1273 | return 0; |
1262 | } | 1274 | } |
@@ -1359,22 +1371,14 @@ out: | |||
1359 | return ret; | 1371 | return ret; |
1360 | } | 1372 | } |
1361 | 1373 | ||
1362 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | 1374 | bool try_merge_free_space(struct btrfs_block_group_cache *block_group, |
1363 | u64 offset, u64 bytes) | 1375 | struct btrfs_free_space *info, bool update_stat) |
1364 | { | 1376 | { |
1365 | struct btrfs_free_space *right_info = NULL; | 1377 | struct btrfs_free_space *left_info; |
1366 | struct btrfs_free_space *left_info = NULL; | 1378 | struct btrfs_free_space *right_info; |
1367 | struct btrfs_free_space *info = NULL; | 1379 | bool merged = false; |
1368 | int ret = 0; | 1380 | u64 offset = info->offset; |
1369 | 1381 | u64 bytes = info->bytes; | |
1370 | info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); | ||
1371 | if (!info) | ||
1372 | return -ENOMEM; | ||
1373 | |||
1374 | info->offset = offset; | ||
1375 | info->bytes = bytes; | ||
1376 | |||
1377 | spin_lock(&block_group->tree_lock); | ||
1378 | 1382 | ||
1379 | /* | 1383 | /* |
1380 | * first we want to see if there is free space adjacent to the range we | 1384 | * first we want to see if there is free space adjacent to the range we |
@@ -1388,37 +1392,62 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | |||
1388 | else | 1392 | else |
1389 | left_info = tree_search_offset(block_group, offset - 1, 0, 0); | 1393 | left_info = tree_search_offset(block_group, offset - 1, 0, 0); |
1390 | 1394 | ||
1391 | /* | ||
1392 | * If there was no extent directly to the left or right of this new | ||
1393 | * extent then we know we're going to have to allocate a new extent, so | ||
1394 | * before we do that see if we need to drop this into a bitmap | ||
1395 | */ | ||
1396 | if ((!left_info || left_info->bitmap) && | ||
1397 | (!right_info || right_info->bitmap)) { | ||
1398 | ret = insert_into_bitmap(block_group, info); | ||
1399 | |||
1400 | if (ret < 0) { | ||
1401 | goto out; | ||
1402 | } else if (ret) { | ||
1403 | ret = 0; | ||
1404 | goto out; | ||
1405 | } | ||
1406 | } | ||
1407 | |||
1408 | if (right_info && !right_info->bitmap) { | 1395 | if (right_info && !right_info->bitmap) { |
1409 | unlink_free_space(block_group, right_info); | 1396 | if (update_stat) |
1397 | unlink_free_space(block_group, right_info); | ||
1398 | else | ||
1399 | __unlink_free_space(block_group, right_info); | ||
1410 | info->bytes += right_info->bytes; | 1400 | info->bytes += right_info->bytes; |
1411 | kfree(right_info); | 1401 | kfree(right_info); |
1402 | merged = true; | ||
1412 | } | 1403 | } |
1413 | 1404 | ||
1414 | if (left_info && !left_info->bitmap && | 1405 | if (left_info && !left_info->bitmap && |
1415 | left_info->offset + left_info->bytes == offset) { | 1406 | left_info->offset + left_info->bytes == offset) { |
1416 | unlink_free_space(block_group, left_info); | 1407 | if (update_stat) |
1408 | unlink_free_space(block_group, left_info); | ||
1409 | else | ||
1410 | __unlink_free_space(block_group, left_info); | ||
1417 | info->offset = left_info->offset; | 1411 | info->offset = left_info->offset; |
1418 | info->bytes += left_info->bytes; | 1412 | info->bytes += left_info->bytes; |
1419 | kfree(left_info); | 1413 | kfree(left_info); |
1414 | merged = true; | ||
1420 | } | 1415 | } |
1421 | 1416 | ||
1417 | return merged; | ||
1418 | } | ||
1419 | |||
1420 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | ||
1421 | u64 offset, u64 bytes) | ||
1422 | { | ||
1423 | struct btrfs_free_space *info; | ||
1424 | int ret = 0; | ||
1425 | |||
1426 | info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); | ||
1427 | if (!info) | ||
1428 | return -ENOMEM; | ||
1429 | |||
1430 | info->offset = offset; | ||
1431 | info->bytes = bytes; | ||
1432 | |||
1433 | spin_lock(&block_group->tree_lock); | ||
1434 | |||
1435 | if (try_merge_free_space(block_group, info, true)) | ||
1436 | goto link; | ||
1437 | |||
1438 | /* | ||
1439 | * There was no extent directly to the left or right of this new | ||
1440 | * extent then we know we're going to have to allocate a new extent, so | ||
1441 | * before we do that see if we need to drop this into a bitmap | ||
1442 | */ | ||
1443 | ret = insert_into_bitmap(block_group, info); | ||
1444 | if (ret < 0) { | ||
1445 | goto out; | ||
1446 | } else if (ret) { | ||
1447 | ret = 0; | ||
1448 | goto out; | ||
1449 | } | ||
1450 | link: | ||
1422 | ret = link_free_space(block_group, info); | 1451 | ret = link_free_space(block_group, info); |
1423 | if (ret) | 1452 | if (ret) |
1424 | kfree(info); | 1453 | kfree(info); |
@@ -1621,6 +1650,7 @@ __btrfs_return_cluster_to_free_space( | |||
1621 | node = rb_next(&entry->offset_index); | 1650 | node = rb_next(&entry->offset_index); |
1622 | rb_erase(&entry->offset_index, &cluster->root); | 1651 | rb_erase(&entry->offset_index, &cluster->root); |
1623 | BUG_ON(entry->bitmap); | 1652 | BUG_ON(entry->bitmap); |
1653 | try_merge_free_space(block_group, entry, false); | ||
1624 | tree_insert_offset(&block_group->free_space_offset, | 1654 | tree_insert_offset(&block_group->free_space_offset, |
1625 | entry->offset, &entry->offset_index, 0); | 1655 | entry->offset, &entry->offset_index, 0); |
1626 | } | 1656 | } |
@@ -1685,13 +1715,8 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, | |||
1685 | ret = offset; | 1715 | ret = offset; |
1686 | if (entry->bitmap) { | 1716 | if (entry->bitmap) { |
1687 | bitmap_clear_bits(block_group, entry, offset, bytes); | 1717 | bitmap_clear_bits(block_group, entry, offset, bytes); |
1688 | if (!entry->bytes) { | 1718 | if (!entry->bytes) |
1689 | unlink_free_space(block_group, entry); | 1719 | free_bitmap(block_group, entry); |
1690 | kfree(entry->bitmap); | ||
1691 | kfree(entry); | ||
1692 | block_group->total_bitmaps--; | ||
1693 | recalculate_thresholds(block_group); | ||
1694 | } | ||
1695 | } else { | 1720 | } else { |
1696 | unlink_free_space(block_group, entry); | 1721 | unlink_free_space(block_group, entry); |
1697 | entry->offset += bytes; | 1722 | entry->offset += bytes; |
@@ -1789,6 +1814,8 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, | |||
1789 | 1814 | ||
1790 | ret = search_start; | 1815 | ret = search_start; |
1791 | bitmap_clear_bits(block_group, entry, ret, bytes); | 1816 | bitmap_clear_bits(block_group, entry, ret, bytes); |
1817 | if (entry->bytes == 0) | ||
1818 | free_bitmap(block_group, entry); | ||
1792 | out: | 1819 | out: |
1793 | spin_unlock(&cluster->lock); | 1820 | spin_unlock(&cluster->lock); |
1794 | spin_unlock(&block_group->tree_lock); | 1821 | spin_unlock(&block_group->tree_lock); |
@@ -1842,15 +1869,26 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
1842 | entry->offset += bytes; | 1869 | entry->offset += bytes; |
1843 | entry->bytes -= bytes; | 1870 | entry->bytes -= bytes; |
1844 | 1871 | ||
1845 | if (entry->bytes == 0) { | 1872 | if (entry->bytes == 0) |
1846 | rb_erase(&entry->offset_index, &cluster->root); | 1873 | rb_erase(&entry->offset_index, &cluster->root); |
1847 | kfree(entry); | ||
1848 | } | ||
1849 | break; | 1874 | break; |
1850 | } | 1875 | } |
1851 | out: | 1876 | out: |
1852 | spin_unlock(&cluster->lock); | 1877 | spin_unlock(&cluster->lock); |
1853 | 1878 | ||
1879 | if (!ret) | ||
1880 | return 0; | ||
1881 | |||
1882 | spin_lock(&block_group->tree_lock); | ||
1883 | |||
1884 | block_group->free_space -= bytes; | ||
1885 | if (entry->bytes == 0) { | ||
1886 | block_group->free_extents--; | ||
1887 | kfree(entry); | ||
1888 | } | ||
1889 | |||
1890 | spin_unlock(&block_group->tree_lock); | ||
1891 | |||
1854 | return ret; | 1892 | return ret; |
1855 | } | 1893 | } |
1856 | 1894 | ||
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a3798a3aa0d2..bcc461a9695f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -122,10 +122,10 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
122 | size_t cur_size = size; | 122 | size_t cur_size = size; |
123 | size_t datasize; | 123 | size_t datasize; |
124 | unsigned long offset; | 124 | unsigned long offset; |
125 | int use_compress = 0; | 125 | int compress_type = BTRFS_COMPRESS_NONE; |
126 | 126 | ||
127 | if (compressed_size && compressed_pages) { | 127 | if (compressed_size && compressed_pages) { |
128 | use_compress = 1; | 128 | compress_type = root->fs_info->compress_type; |
129 | cur_size = compressed_size; | 129 | cur_size = compressed_size; |
130 | } | 130 | } |
131 | 131 | ||
@@ -159,7 +159,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
159 | btrfs_set_file_extent_ram_bytes(leaf, ei, size); | 159 | btrfs_set_file_extent_ram_bytes(leaf, ei, size); |
160 | ptr = btrfs_file_extent_inline_start(ei); | 160 | ptr = btrfs_file_extent_inline_start(ei); |
161 | 161 | ||
162 | if (use_compress) { | 162 | if (compress_type != BTRFS_COMPRESS_NONE) { |
163 | struct page *cpage; | 163 | struct page *cpage; |
164 | int i = 0; | 164 | int i = 0; |
165 | while (compressed_size > 0) { | 165 | while (compressed_size > 0) { |
@@ -176,7 +176,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
176 | compressed_size -= cur_size; | 176 | compressed_size -= cur_size; |
177 | } | 177 | } |
178 | btrfs_set_file_extent_compression(leaf, ei, | 178 | btrfs_set_file_extent_compression(leaf, ei, |
179 | BTRFS_COMPRESS_ZLIB); | 179 | compress_type); |
180 | } else { | 180 | } else { |
181 | page = find_get_page(inode->i_mapping, | 181 | page = find_get_page(inode->i_mapping, |
182 | start >> PAGE_CACHE_SHIFT); | 182 | start >> PAGE_CACHE_SHIFT); |
@@ -263,6 +263,7 @@ struct async_extent { | |||
263 | u64 compressed_size; | 263 | u64 compressed_size; |
264 | struct page **pages; | 264 | struct page **pages; |
265 | unsigned long nr_pages; | 265 | unsigned long nr_pages; |
266 | int compress_type; | ||
266 | struct list_head list; | 267 | struct list_head list; |
267 | }; | 268 | }; |
268 | 269 | ||
@@ -280,7 +281,8 @@ static noinline int add_async_extent(struct async_cow *cow, | |||
280 | u64 start, u64 ram_size, | 281 | u64 start, u64 ram_size, |
281 | u64 compressed_size, | 282 | u64 compressed_size, |
282 | struct page **pages, | 283 | struct page **pages, |
283 | unsigned long nr_pages) | 284 | unsigned long nr_pages, |
285 | int compress_type) | ||
284 | { | 286 | { |
285 | struct async_extent *async_extent; | 287 | struct async_extent *async_extent; |
286 | 288 | ||
@@ -290,6 +292,7 @@ static noinline int add_async_extent(struct async_cow *cow, | |||
290 | async_extent->compressed_size = compressed_size; | 292 | async_extent->compressed_size = compressed_size; |
291 | async_extent->pages = pages; | 293 | async_extent->pages = pages; |
292 | async_extent->nr_pages = nr_pages; | 294 | async_extent->nr_pages = nr_pages; |
295 | async_extent->compress_type = compress_type; | ||
293 | list_add_tail(&async_extent->list, &cow->extents); | 296 | list_add_tail(&async_extent->list, &cow->extents); |
294 | return 0; | 297 | return 0; |
295 | } | 298 | } |
@@ -332,6 +335,7 @@ static noinline int compress_file_range(struct inode *inode, | |||
332 | unsigned long max_uncompressed = 128 * 1024; | 335 | unsigned long max_uncompressed = 128 * 1024; |
333 | int i; | 336 | int i; |
334 | int will_compress; | 337 | int will_compress; |
338 | int compress_type = root->fs_info->compress_type; | ||
335 | 339 | ||
336 | actual_end = min_t(u64, isize, end + 1); | 340 | actual_end = min_t(u64, isize, end + 1); |
337 | again: | 341 | again: |
@@ -381,12 +385,16 @@ again: | |||
381 | WARN_ON(pages); | 385 | WARN_ON(pages); |
382 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); | 386 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); |
383 | 387 | ||
384 | ret = btrfs_zlib_compress_pages(inode->i_mapping, start, | 388 | if (BTRFS_I(inode)->force_compress) |
385 | total_compressed, pages, | 389 | compress_type = BTRFS_I(inode)->force_compress; |
386 | nr_pages, &nr_pages_ret, | 390 | |
387 | &total_in, | 391 | ret = btrfs_compress_pages(compress_type, |
388 | &total_compressed, | 392 | inode->i_mapping, start, |
389 | max_compressed); | 393 | total_compressed, pages, |
394 | nr_pages, &nr_pages_ret, | ||
395 | &total_in, | ||
396 | &total_compressed, | ||
397 | max_compressed); | ||
390 | 398 | ||
391 | if (!ret) { | 399 | if (!ret) { |
392 | unsigned long offset = total_compressed & | 400 | unsigned long offset = total_compressed & |
@@ -408,7 +416,7 @@ again: | |||
408 | } | 416 | } |
409 | if (start == 0) { | 417 | if (start == 0) { |
410 | trans = btrfs_join_transaction(root, 1); | 418 | trans = btrfs_join_transaction(root, 1); |
411 | BUG_ON(!trans); | 419 | BUG_ON(IS_ERR(trans)); |
412 | btrfs_set_trans_block_group(trans, inode); | 420 | btrfs_set_trans_block_group(trans, inode); |
413 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 421 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
414 | 422 | ||
@@ -493,7 +501,8 @@ again: | |||
493 | * and will submit them to the elevator. | 501 | * and will submit them to the elevator. |
494 | */ | 502 | */ |
495 | add_async_extent(async_cow, start, num_bytes, | 503 | add_async_extent(async_cow, start, num_bytes, |
496 | total_compressed, pages, nr_pages_ret); | 504 | total_compressed, pages, nr_pages_ret, |
505 | compress_type); | ||
497 | 506 | ||
498 | if (start + num_bytes < end) { | 507 | if (start + num_bytes < end) { |
499 | start += num_bytes; | 508 | start += num_bytes; |
@@ -515,7 +524,8 @@ cleanup_and_bail_uncompressed: | |||
515 | __set_page_dirty_nobuffers(locked_page); | 524 | __set_page_dirty_nobuffers(locked_page); |
516 | /* unlocked later on in the async handlers */ | 525 | /* unlocked later on in the async handlers */ |
517 | } | 526 | } |
518 | add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0); | 527 | add_async_extent(async_cow, start, end - start + 1, |
528 | 0, NULL, 0, BTRFS_COMPRESS_NONE); | ||
519 | *num_added += 1; | 529 | *num_added += 1; |
520 | } | 530 | } |
521 | 531 | ||
@@ -602,6 +612,7 @@ retry: | |||
602 | GFP_NOFS); | 612 | GFP_NOFS); |
603 | 613 | ||
604 | trans = btrfs_join_transaction(root, 1); | 614 | trans = btrfs_join_transaction(root, 1); |
615 | BUG_ON(IS_ERR(trans)); | ||
605 | ret = btrfs_reserve_extent(trans, root, | 616 | ret = btrfs_reserve_extent(trans, root, |
606 | async_extent->compressed_size, | 617 | async_extent->compressed_size, |
607 | async_extent->compressed_size, | 618 | async_extent->compressed_size, |
@@ -640,6 +651,7 @@ retry: | |||
640 | em->block_start = ins.objectid; | 651 | em->block_start = ins.objectid; |
641 | em->block_len = ins.offset; | 652 | em->block_len = ins.offset; |
642 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 653 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
654 | em->compress_type = async_extent->compress_type; | ||
643 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 655 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
644 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 656 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
645 | 657 | ||
@@ -656,11 +668,13 @@ retry: | |||
656 | async_extent->ram_size - 1, 0); | 668 | async_extent->ram_size - 1, 0); |
657 | } | 669 | } |
658 | 670 | ||
659 | ret = btrfs_add_ordered_extent(inode, async_extent->start, | 671 | ret = btrfs_add_ordered_extent_compress(inode, |
660 | ins.objectid, | 672 | async_extent->start, |
661 | async_extent->ram_size, | 673 | ins.objectid, |
662 | ins.offset, | 674 | async_extent->ram_size, |
663 | BTRFS_ORDERED_COMPRESSED); | 675 | ins.offset, |
676 | BTRFS_ORDERED_COMPRESSED, | ||
677 | async_extent->compress_type); | ||
664 | BUG_ON(ret); | 678 | BUG_ON(ret); |
665 | 679 | ||
666 | /* | 680 | /* |
@@ -758,7 +772,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
758 | 772 | ||
759 | BUG_ON(root == root->fs_info->tree_root); | 773 | BUG_ON(root == root->fs_info->tree_root); |
760 | trans = btrfs_join_transaction(root, 1); | 774 | trans = btrfs_join_transaction(root, 1); |
761 | BUG_ON(!trans); | 775 | BUG_ON(IS_ERR(trans)); |
762 | btrfs_set_trans_block_group(trans, inode); | 776 | btrfs_set_trans_block_group(trans, inode); |
763 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 777 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
764 | 778 | ||
@@ -1036,7 +1050,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, | |||
1036 | } else { | 1050 | } else { |
1037 | trans = btrfs_join_transaction(root, 1); | 1051 | trans = btrfs_join_transaction(root, 1); |
1038 | } | 1052 | } |
1039 | BUG_ON(!trans); | 1053 | BUG_ON(IS_ERR(trans)); |
1040 | 1054 | ||
1041 | cow_start = (u64)-1; | 1055 | cow_start = (u64)-1; |
1042 | cur_offset = start; | 1056 | cur_offset = start; |
@@ -1544,6 +1558,7 @@ out: | |||
1544 | out_page: | 1558 | out_page: |
1545 | unlock_page(page); | 1559 | unlock_page(page); |
1546 | page_cache_release(page); | 1560 | page_cache_release(page); |
1561 | kfree(fixup); | ||
1547 | } | 1562 | } |
1548 | 1563 | ||
1549 | /* | 1564 | /* |
@@ -1670,7 +1685,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1670 | struct btrfs_ordered_extent *ordered_extent = NULL; | 1685 | struct btrfs_ordered_extent *ordered_extent = NULL; |
1671 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 1686 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
1672 | struct extent_state *cached_state = NULL; | 1687 | struct extent_state *cached_state = NULL; |
1673 | int compressed = 0; | 1688 | int compress_type = 0; |
1674 | int ret; | 1689 | int ret; |
1675 | bool nolock = false; | 1690 | bool nolock = false; |
1676 | 1691 | ||
@@ -1690,7 +1705,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1690 | trans = btrfs_join_transaction_nolock(root, 1); | 1705 | trans = btrfs_join_transaction_nolock(root, 1); |
1691 | else | 1706 | else |
1692 | trans = btrfs_join_transaction(root, 1); | 1707 | trans = btrfs_join_transaction(root, 1); |
1693 | BUG_ON(!trans); | 1708 | BUG_ON(IS_ERR(trans)); |
1694 | btrfs_set_trans_block_group(trans, inode); | 1709 | btrfs_set_trans_block_group(trans, inode); |
1695 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 1710 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
1696 | ret = btrfs_update_inode(trans, root, inode); | 1711 | ret = btrfs_update_inode(trans, root, inode); |
@@ -1707,13 +1722,14 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1707 | trans = btrfs_join_transaction_nolock(root, 1); | 1722 | trans = btrfs_join_transaction_nolock(root, 1); |
1708 | else | 1723 | else |
1709 | trans = btrfs_join_transaction(root, 1); | 1724 | trans = btrfs_join_transaction(root, 1); |
1725 | BUG_ON(IS_ERR(trans)); | ||
1710 | btrfs_set_trans_block_group(trans, inode); | 1726 | btrfs_set_trans_block_group(trans, inode); |
1711 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 1727 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
1712 | 1728 | ||
1713 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) | 1729 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) |
1714 | compressed = 1; | 1730 | compress_type = ordered_extent->compress_type; |
1715 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { | 1731 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { |
1716 | BUG_ON(compressed); | 1732 | BUG_ON(compress_type); |
1717 | ret = btrfs_mark_extent_written(trans, inode, | 1733 | ret = btrfs_mark_extent_written(trans, inode, |
1718 | ordered_extent->file_offset, | 1734 | ordered_extent->file_offset, |
1719 | ordered_extent->file_offset + | 1735 | ordered_extent->file_offset + |
@@ -1727,7 +1743,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1727 | ordered_extent->disk_len, | 1743 | ordered_extent->disk_len, |
1728 | ordered_extent->len, | 1744 | ordered_extent->len, |
1729 | ordered_extent->len, | 1745 | ordered_extent->len, |
1730 | compressed, 0, 0, | 1746 | compress_type, 0, 0, |
1731 | BTRFS_FILE_EXTENT_REG); | 1747 | BTRFS_FILE_EXTENT_REG); |
1732 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | 1748 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, |
1733 | ordered_extent->file_offset, | 1749 | ordered_extent->file_offset, |
@@ -1829,6 +1845,8 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
1829 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { | 1845 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
1830 | logical = em->block_start; | 1846 | logical = em->block_start; |
1831 | failrec->bio_flags = EXTENT_BIO_COMPRESSED; | 1847 | failrec->bio_flags = EXTENT_BIO_COMPRESSED; |
1848 | extent_set_compress_type(&failrec->bio_flags, | ||
1849 | em->compress_type); | ||
1832 | } | 1850 | } |
1833 | failrec->logical = logical; | 1851 | failrec->logical = logical; |
1834 | free_extent_map(em); | 1852 | free_extent_map(em); |
@@ -2339,6 +2357,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2339 | */ | 2357 | */ |
2340 | if (is_bad_inode(inode)) { | 2358 | if (is_bad_inode(inode)) { |
2341 | trans = btrfs_start_transaction(root, 0); | 2359 | trans = btrfs_start_transaction(root, 0); |
2360 | BUG_ON(IS_ERR(trans)); | ||
2342 | btrfs_orphan_del(trans, inode); | 2361 | btrfs_orphan_del(trans, inode); |
2343 | btrfs_end_transaction(trans, root); | 2362 | btrfs_end_transaction(trans, root); |
2344 | iput(inode); | 2363 | iput(inode); |
@@ -2366,6 +2385,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2366 | 2385 | ||
2367 | if (root->orphan_block_rsv || root->orphan_item_inserted) { | 2386 | if (root->orphan_block_rsv || root->orphan_item_inserted) { |
2368 | trans = btrfs_join_transaction(root, 1); | 2387 | trans = btrfs_join_transaction(root, 1); |
2388 | BUG_ON(IS_ERR(trans)); | ||
2369 | btrfs_end_transaction(trans, root); | 2389 | btrfs_end_transaction(trans, root); |
2370 | } | 2390 | } |
2371 | 2391 | ||
@@ -2626,7 +2646,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | |||
2626 | path = btrfs_alloc_path(); | 2646 | path = btrfs_alloc_path(); |
2627 | if (!path) { | 2647 | if (!path) { |
2628 | ret = -ENOMEM; | 2648 | ret = -ENOMEM; |
2629 | goto err; | 2649 | goto out; |
2630 | } | 2650 | } |
2631 | 2651 | ||
2632 | path->leave_spinning = 1; | 2652 | path->leave_spinning = 1; |
@@ -2699,9 +2719,10 @@ static int check_path_shared(struct btrfs_root *root, | |||
2699 | struct extent_buffer *eb; | 2719 | struct extent_buffer *eb; |
2700 | int level; | 2720 | int level; |
2701 | u64 refs = 1; | 2721 | u64 refs = 1; |
2702 | int uninitialized_var(ret); | ||
2703 | 2722 | ||
2704 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | 2723 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { |
2724 | int ret; | ||
2725 | |||
2705 | if (!path->nodes[level]) | 2726 | if (!path->nodes[level]) |
2706 | break; | 2727 | break; |
2707 | eb = path->nodes[level]; | 2728 | eb = path->nodes[level]; |
@@ -2712,7 +2733,7 @@ static int check_path_shared(struct btrfs_root *root, | |||
2712 | if (refs > 1) | 2733 | if (refs > 1) |
2713 | return 1; | 2734 | return 1; |
2714 | } | 2735 | } |
2715 | return ret; /* XXX callers? */ | 2736 | return 0; |
2716 | } | 2737 | } |
2717 | 2738 | ||
2718 | /* | 2739 | /* |
@@ -3671,8 +3692,12 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | |||
3671 | static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | 3692 | static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) |
3672 | { | 3693 | { |
3673 | struct inode *inode = dentry->d_inode; | 3694 | struct inode *inode = dentry->d_inode; |
3695 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3674 | int err; | 3696 | int err; |
3675 | 3697 | ||
3698 | if (btrfs_root_readonly(root)) | ||
3699 | return -EROFS; | ||
3700 | |||
3676 | err = inode_change_ok(inode, attr); | 3701 | err = inode_change_ok(inode, attr); |
3677 | if (err) | 3702 | if (err) |
3678 | return err; | 3703 | return err; |
@@ -4115,7 +4140,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
4115 | } | 4140 | } |
4116 | srcu_read_unlock(&root->fs_info->subvol_srcu, index); | 4141 | srcu_read_unlock(&root->fs_info->subvol_srcu, index); |
4117 | 4142 | ||
4118 | if (root != sub_root) { | 4143 | if (!IS_ERR(inode) && root != sub_root) { |
4119 | down_read(&root->fs_info->cleanup_work_sem); | 4144 | down_read(&root->fs_info->cleanup_work_sem); |
4120 | if (!(inode->i_sb->s_flags & MS_RDONLY)) | 4145 | if (!(inode->i_sb->s_flags & MS_RDONLY)) |
4121 | btrfs_orphan_cleanup(sub_root); | 4146 | btrfs_orphan_cleanup(sub_root); |
@@ -4328,6 +4353,8 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
4328 | trans = btrfs_join_transaction_nolock(root, 1); | 4353 | trans = btrfs_join_transaction_nolock(root, 1); |
4329 | else | 4354 | else |
4330 | trans = btrfs_join_transaction(root, 1); | 4355 | trans = btrfs_join_transaction(root, 1); |
4356 | if (IS_ERR(trans)) | ||
4357 | return PTR_ERR(trans); | ||
4331 | btrfs_set_trans_block_group(trans, inode); | 4358 | btrfs_set_trans_block_group(trans, inode); |
4332 | if (nolock) | 4359 | if (nolock) |
4333 | ret = btrfs_end_transaction_nolock(trans, root); | 4360 | ret = btrfs_end_transaction_nolock(trans, root); |
@@ -4353,6 +4380,7 @@ void btrfs_dirty_inode(struct inode *inode) | |||
4353 | return; | 4380 | return; |
4354 | 4381 | ||
4355 | trans = btrfs_join_transaction(root, 1); | 4382 | trans = btrfs_join_transaction(root, 1); |
4383 | BUG_ON(IS_ERR(trans)); | ||
4356 | btrfs_set_trans_block_group(trans, inode); | 4384 | btrfs_set_trans_block_group(trans, inode); |
4357 | 4385 | ||
4358 | ret = btrfs_update_inode(trans, root, inode); | 4386 | ret = btrfs_update_inode(trans, root, inode); |
@@ -4928,8 +4956,10 @@ static noinline int uncompress_inline(struct btrfs_path *path, | |||
4928 | size_t max_size; | 4956 | size_t max_size; |
4929 | unsigned long inline_size; | 4957 | unsigned long inline_size; |
4930 | unsigned long ptr; | 4958 | unsigned long ptr; |
4959 | int compress_type; | ||
4931 | 4960 | ||
4932 | WARN_ON(pg_offset != 0); | 4961 | WARN_ON(pg_offset != 0); |
4962 | compress_type = btrfs_file_extent_compression(leaf, item); | ||
4933 | max_size = btrfs_file_extent_ram_bytes(leaf, item); | 4963 | max_size = btrfs_file_extent_ram_bytes(leaf, item); |
4934 | inline_size = btrfs_file_extent_inline_item_len(leaf, | 4964 | inline_size = btrfs_file_extent_inline_item_len(leaf, |
4935 | btrfs_item_nr(leaf, path->slots[0])); | 4965 | btrfs_item_nr(leaf, path->slots[0])); |
@@ -4939,8 +4969,8 @@ static noinline int uncompress_inline(struct btrfs_path *path, | |||
4939 | read_extent_buffer(leaf, tmp, ptr, inline_size); | 4969 | read_extent_buffer(leaf, tmp, ptr, inline_size); |
4940 | 4970 | ||
4941 | max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); | 4971 | max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size); |
4942 | ret = btrfs_zlib_decompress(tmp, page, extent_offset, | 4972 | ret = btrfs_decompress(compress_type, tmp, page, |
4943 | inline_size, max_size); | 4973 | extent_offset, inline_size, max_size); |
4944 | if (ret) { | 4974 | if (ret) { |
4945 | char *kaddr = kmap_atomic(page, KM_USER0); | 4975 | char *kaddr = kmap_atomic(page, KM_USER0); |
4946 | unsigned long copy_size = min_t(u64, | 4976 | unsigned long copy_size = min_t(u64, |
@@ -4982,7 +5012,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | |||
4982 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 5012 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
4983 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 5013 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
4984 | struct btrfs_trans_handle *trans = NULL; | 5014 | struct btrfs_trans_handle *trans = NULL; |
4985 | int compressed; | 5015 | int compress_type; |
4986 | 5016 | ||
4987 | again: | 5017 | again: |
4988 | read_lock(&em_tree->lock); | 5018 | read_lock(&em_tree->lock); |
@@ -5041,7 +5071,7 @@ again: | |||
5041 | 5071 | ||
5042 | found_type = btrfs_file_extent_type(leaf, item); | 5072 | found_type = btrfs_file_extent_type(leaf, item); |
5043 | extent_start = found_key.offset; | 5073 | extent_start = found_key.offset; |
5044 | compressed = btrfs_file_extent_compression(leaf, item); | 5074 | compress_type = btrfs_file_extent_compression(leaf, item); |
5045 | if (found_type == BTRFS_FILE_EXTENT_REG || | 5075 | if (found_type == BTRFS_FILE_EXTENT_REG || |
5046 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { | 5076 | found_type == BTRFS_FILE_EXTENT_PREALLOC) { |
5047 | extent_end = extent_start + | 5077 | extent_end = extent_start + |
@@ -5087,8 +5117,9 @@ again: | |||
5087 | em->block_start = EXTENT_MAP_HOLE; | 5117 | em->block_start = EXTENT_MAP_HOLE; |
5088 | goto insert; | 5118 | goto insert; |
5089 | } | 5119 | } |
5090 | if (compressed) { | 5120 | if (compress_type != BTRFS_COMPRESS_NONE) { |
5091 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 5121 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
5122 | em->compress_type = compress_type; | ||
5092 | em->block_start = bytenr; | 5123 | em->block_start = bytenr; |
5093 | em->block_len = btrfs_file_extent_disk_num_bytes(leaf, | 5124 | em->block_len = btrfs_file_extent_disk_num_bytes(leaf, |
5094 | item); | 5125 | item); |
@@ -5122,12 +5153,14 @@ again: | |||
5122 | em->len = (copy_size + root->sectorsize - 1) & | 5153 | em->len = (copy_size + root->sectorsize - 1) & |
5123 | ~((u64)root->sectorsize - 1); | 5154 | ~((u64)root->sectorsize - 1); |
5124 | em->orig_start = EXTENT_MAP_INLINE; | 5155 | em->orig_start = EXTENT_MAP_INLINE; |
5125 | if (compressed) | 5156 | if (compress_type) { |
5126 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 5157 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
5158 | em->compress_type = compress_type; | ||
5159 | } | ||
5127 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; | 5160 | ptr = btrfs_file_extent_inline_start(item) + extent_offset; |
5128 | if (create == 0 && !PageUptodate(page)) { | 5161 | if (create == 0 && !PageUptodate(page)) { |
5129 | if (btrfs_file_extent_compression(leaf, item) == | 5162 | if (btrfs_file_extent_compression(leaf, item) != |
5130 | BTRFS_COMPRESS_ZLIB) { | 5163 | BTRFS_COMPRESS_NONE) { |
5131 | ret = uncompress_inline(path, inode, page, | 5164 | ret = uncompress_inline(path, inode, page, |
5132 | pg_offset, | 5165 | pg_offset, |
5133 | extent_offset, item); | 5166 | extent_offset, item); |
@@ -5152,6 +5185,8 @@ again: | |||
5152 | em = NULL; | 5185 | em = NULL; |
5153 | btrfs_release_path(root, path); | 5186 | btrfs_release_path(root, path); |
5154 | trans = btrfs_join_transaction(root, 1); | 5187 | trans = btrfs_join_transaction(root, 1); |
5188 | if (IS_ERR(trans)) | ||
5189 | return ERR_CAST(trans); | ||
5155 | goto again; | 5190 | goto again; |
5156 | } | 5191 | } |
5157 | map = kmap(page); | 5192 | map = kmap(page); |
@@ -5256,8 +5291,8 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | |||
5256 | btrfs_drop_extent_cache(inode, start, start + len - 1, 0); | 5291 | btrfs_drop_extent_cache(inode, start, start + len - 1, 0); |
5257 | 5292 | ||
5258 | trans = btrfs_join_transaction(root, 0); | 5293 | trans = btrfs_join_transaction(root, 0); |
5259 | if (!trans) | 5294 | if (IS_ERR(trans)) |
5260 | return ERR_PTR(-ENOMEM); | 5295 | return ERR_CAST(trans); |
5261 | 5296 | ||
5262 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 5297 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
5263 | 5298 | ||
@@ -5481,7 +5516,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
5481 | * while we look for nocow cross refs | 5516 | * while we look for nocow cross refs |
5482 | */ | 5517 | */ |
5483 | trans = btrfs_join_transaction(root, 0); | 5518 | trans = btrfs_join_transaction(root, 0); |
5484 | if (!trans) | 5519 | if (IS_ERR(trans)) |
5485 | goto must_cow; | 5520 | goto must_cow; |
5486 | 5521 | ||
5487 | if (can_nocow_odirect(trans, inode, start, len) == 1) { | 5522 | if (can_nocow_odirect(trans, inode, start, len) == 1) { |
@@ -5616,7 +5651,7 @@ again: | |||
5616 | BUG_ON(!ordered); | 5651 | BUG_ON(!ordered); |
5617 | 5652 | ||
5618 | trans = btrfs_join_transaction(root, 1); | 5653 | trans = btrfs_join_transaction(root, 1); |
5619 | if (!trans) { | 5654 | if (IS_ERR(trans)) { |
5620 | err = -ENOMEM; | 5655 | err = -ENOMEM; |
5621 | goto out; | 5656 | goto out; |
5622 | } | 5657 | } |
@@ -6477,7 +6512,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6477 | ei->ordered_data_close = 0; | 6512 | ei->ordered_data_close = 0; |
6478 | ei->orphan_meta_reserved = 0; | 6513 | ei->orphan_meta_reserved = 0; |
6479 | ei->dummy_inode = 0; | 6514 | ei->dummy_inode = 0; |
6480 | ei->force_compress = 0; | 6515 | ei->force_compress = BTRFS_COMPRESS_NONE; |
6481 | 6516 | ||
6482 | inode = &ei->vfs_inode; | 6517 | inode = &ei->vfs_inode; |
6483 | extent_map_tree_init(&ei->extent_tree, GFP_NOFS); | 6518 | extent_map_tree_init(&ei->extent_tree, GFP_NOFS); |
@@ -7098,116 +7133,6 @@ int btrfs_prealloc_file_range_trans(struct inode *inode, | |||
7098 | min_size, actual_len, alloc_hint, trans); | 7133 | min_size, actual_len, alloc_hint, trans); |
7099 | } | 7134 | } |
7100 | 7135 | ||
7101 | static long btrfs_fallocate(struct inode *inode, int mode, | ||
7102 | loff_t offset, loff_t len) | ||
7103 | { | ||
7104 | struct extent_state *cached_state = NULL; | ||
7105 | u64 cur_offset; | ||
7106 | u64 last_byte; | ||
7107 | u64 alloc_start; | ||
7108 | u64 alloc_end; | ||
7109 | u64 alloc_hint = 0; | ||
7110 | u64 locked_end; | ||
7111 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | ||
7112 | struct extent_map *em; | ||
7113 | int ret; | ||
7114 | |||
7115 | alloc_start = offset & ~mask; | ||
7116 | alloc_end = (offset + len + mask) & ~mask; | ||
7117 | |||
7118 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ | ||
7119 | if (mode && (mode != FALLOC_FL_KEEP_SIZE)) | ||
7120 | return -EOPNOTSUPP; | ||
7121 | |||
7122 | /* | ||
7123 | * wait for ordered IO before we have any locks. We'll loop again | ||
7124 | * below with the locks held. | ||
7125 | */ | ||
7126 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); | ||
7127 | |||
7128 | mutex_lock(&inode->i_mutex); | ||
7129 | ret = inode_newsize_ok(inode, alloc_end); | ||
7130 | if (ret) | ||
7131 | goto out; | ||
7132 | |||
7133 | if (alloc_start > inode->i_size) { | ||
7134 | ret = btrfs_cont_expand(inode, alloc_start); | ||
7135 | if (ret) | ||
7136 | goto out; | ||
7137 | } | ||
7138 | |||
7139 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); | ||
7140 | if (ret) | ||
7141 | goto out; | ||
7142 | |||
7143 | locked_end = alloc_end - 1; | ||
7144 | while (1) { | ||
7145 | struct btrfs_ordered_extent *ordered; | ||
7146 | |||
7147 | /* the extent lock is ordered inside the running | ||
7148 | * transaction | ||
7149 | */ | ||
7150 | lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, | ||
7151 | locked_end, 0, &cached_state, GFP_NOFS); | ||
7152 | ordered = btrfs_lookup_first_ordered_extent(inode, | ||
7153 | alloc_end - 1); | ||
7154 | if (ordered && | ||
7155 | ordered->file_offset + ordered->len > alloc_start && | ||
7156 | ordered->file_offset < alloc_end) { | ||
7157 | btrfs_put_ordered_extent(ordered); | ||
7158 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | ||
7159 | alloc_start, locked_end, | ||
7160 | &cached_state, GFP_NOFS); | ||
7161 | /* | ||
7162 | * we can't wait on the range with the transaction | ||
7163 | * running or with the extent lock held | ||
7164 | */ | ||
7165 | btrfs_wait_ordered_range(inode, alloc_start, | ||
7166 | alloc_end - alloc_start); | ||
7167 | } else { | ||
7168 | if (ordered) | ||
7169 | btrfs_put_ordered_extent(ordered); | ||
7170 | break; | ||
7171 | } | ||
7172 | } | ||
7173 | |||
7174 | cur_offset = alloc_start; | ||
7175 | while (1) { | ||
7176 | em = btrfs_get_extent(inode, NULL, 0, cur_offset, | ||
7177 | alloc_end - cur_offset, 0); | ||
7178 | BUG_ON(IS_ERR(em) || !em); | ||
7179 | last_byte = min(extent_map_end(em), alloc_end); | ||
7180 | last_byte = (last_byte + mask) & ~mask; | ||
7181 | if (em->block_start == EXTENT_MAP_HOLE || | ||
7182 | (cur_offset >= inode->i_size && | ||
7183 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | ||
7184 | ret = btrfs_prealloc_file_range(inode, mode, cur_offset, | ||
7185 | last_byte - cur_offset, | ||
7186 | 1 << inode->i_blkbits, | ||
7187 | offset + len, | ||
7188 | &alloc_hint); | ||
7189 | if (ret < 0) { | ||
7190 | free_extent_map(em); | ||
7191 | break; | ||
7192 | } | ||
7193 | } | ||
7194 | free_extent_map(em); | ||
7195 | |||
7196 | cur_offset = last_byte; | ||
7197 | if (cur_offset >= alloc_end) { | ||
7198 | ret = 0; | ||
7199 | break; | ||
7200 | } | ||
7201 | } | ||
7202 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | ||
7203 | &cached_state, GFP_NOFS); | ||
7204 | |||
7205 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); | ||
7206 | out: | ||
7207 | mutex_unlock(&inode->i_mutex); | ||
7208 | return ret; | ||
7209 | } | ||
7210 | |||
7211 | static int btrfs_set_page_dirty(struct page *page) | 7136 | static int btrfs_set_page_dirty(struct page *page) |
7212 | { | 7137 | { |
7213 | return __set_page_dirty_nobuffers(page); | 7138 | return __set_page_dirty_nobuffers(page); |
@@ -7215,6 +7140,10 @@ static int btrfs_set_page_dirty(struct page *page) | |||
7215 | 7140 | ||
7216 | static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) | 7141 | static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) |
7217 | { | 7142 | { |
7143 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
7144 | |||
7145 | if (btrfs_root_readonly(root) && (mask & MAY_WRITE)) | ||
7146 | return -EROFS; | ||
7218 | if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) | 7147 | if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) |
7219 | return -EACCES; | 7148 | return -EACCES; |
7220 | return generic_permission(inode, mask, flags, btrfs_check_acl); | 7149 | return generic_permission(inode, mask, flags, btrfs_check_acl); |
@@ -7310,7 +7239,6 @@ static const struct inode_operations btrfs_file_inode_operations = { | |||
7310 | .listxattr = btrfs_listxattr, | 7239 | .listxattr = btrfs_listxattr, |
7311 | .removexattr = btrfs_removexattr, | 7240 | .removexattr = btrfs_removexattr, |
7312 | .permission = btrfs_permission, | 7241 | .permission = btrfs_permission, |
7313 | .fallocate = btrfs_fallocate, | ||
7314 | .fiemap = btrfs_fiemap, | 7242 | .fiemap = btrfs_fiemap, |
7315 | }; | 7243 | }; |
7316 | static const struct inode_operations btrfs_special_inode_operations = { | 7244 | static const struct inode_operations btrfs_special_inode_operations = { |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f87552a1d7ea..02d224e8c83f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -147,6 +147,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
147 | unsigned int flags, oldflags; | 147 | unsigned int flags, oldflags; |
148 | int ret; | 148 | int ret; |
149 | 149 | ||
150 | if (btrfs_root_readonly(root)) | ||
151 | return -EROFS; | ||
152 | |||
150 | if (copy_from_user(&flags, arg, sizeof(flags))) | 153 | if (copy_from_user(&flags, arg, sizeof(flags))) |
151 | return -EFAULT; | 154 | return -EFAULT; |
152 | 155 | ||
@@ -200,7 +203,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
200 | 203 | ||
201 | 204 | ||
202 | trans = btrfs_join_transaction(root, 1); | 205 | trans = btrfs_join_transaction(root, 1); |
203 | BUG_ON(!trans); | 206 | BUG_ON(IS_ERR(trans)); |
204 | 207 | ||
205 | ret = btrfs_update_inode(trans, root, inode); | 208 | ret = btrfs_update_inode(trans, root, inode); |
206 | BUG_ON(ret); | 209 | BUG_ON(ret); |
@@ -360,7 +363,8 @@ fail: | |||
360 | } | 363 | } |
361 | 364 | ||
362 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | 365 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, |
363 | char *name, int namelen, u64 *async_transid) | 366 | char *name, int namelen, u64 *async_transid, |
367 | bool readonly) | ||
364 | { | 368 | { |
365 | struct inode *inode; | 369 | struct inode *inode; |
366 | struct dentry *parent; | 370 | struct dentry *parent; |
@@ -378,6 +382,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
378 | btrfs_init_block_rsv(&pending_snapshot->block_rsv); | 382 | btrfs_init_block_rsv(&pending_snapshot->block_rsv); |
379 | pending_snapshot->dentry = dentry; | 383 | pending_snapshot->dentry = dentry; |
380 | pending_snapshot->root = root; | 384 | pending_snapshot->root = root; |
385 | pending_snapshot->readonly = readonly; | ||
381 | 386 | ||
382 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); | 387 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); |
383 | if (IS_ERR(trans)) { | 388 | if (IS_ERR(trans)) { |
@@ -509,7 +514,7 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child) | |||
509 | static noinline int btrfs_mksubvol(struct path *parent, | 514 | static noinline int btrfs_mksubvol(struct path *parent, |
510 | char *name, int namelen, | 515 | char *name, int namelen, |
511 | struct btrfs_root *snap_src, | 516 | struct btrfs_root *snap_src, |
512 | u64 *async_transid) | 517 | u64 *async_transid, bool readonly) |
513 | { | 518 | { |
514 | struct inode *dir = parent->dentry->d_inode; | 519 | struct inode *dir = parent->dentry->d_inode; |
515 | struct dentry *dentry; | 520 | struct dentry *dentry; |
@@ -541,7 +546,7 @@ static noinline int btrfs_mksubvol(struct path *parent, | |||
541 | 546 | ||
542 | if (snap_src) { | 547 | if (snap_src) { |
543 | error = create_snapshot(snap_src, dentry, | 548 | error = create_snapshot(snap_src, dentry, |
544 | name, namelen, async_transid); | 549 | name, namelen, async_transid, readonly); |
545 | } else { | 550 | } else { |
546 | error = create_subvol(BTRFS_I(dir)->root, dentry, | 551 | error = create_subvol(BTRFS_I(dir)->root, dentry, |
547 | name, namelen, async_transid); | 552 | name, namelen, async_transid); |
@@ -638,9 +643,11 @@ static int btrfs_defrag_file(struct file *file, | |||
638 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 643 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
639 | struct btrfs_ordered_extent *ordered; | 644 | struct btrfs_ordered_extent *ordered; |
640 | struct page *page; | 645 | struct page *page; |
646 | struct btrfs_super_block *disk_super; | ||
641 | unsigned long last_index; | 647 | unsigned long last_index; |
642 | unsigned long ra_pages = root->fs_info->bdi.ra_pages; | 648 | unsigned long ra_pages = root->fs_info->bdi.ra_pages; |
643 | unsigned long total_read = 0; | 649 | unsigned long total_read = 0; |
650 | u64 features; | ||
644 | u64 page_start; | 651 | u64 page_start; |
645 | u64 page_end; | 652 | u64 page_end; |
646 | u64 last_len = 0; | 653 | u64 last_len = 0; |
@@ -648,6 +655,14 @@ static int btrfs_defrag_file(struct file *file, | |||
648 | u64 defrag_end = 0; | 655 | u64 defrag_end = 0; |
649 | unsigned long i; | 656 | unsigned long i; |
650 | int ret; | 657 | int ret; |
658 | int compress_type = BTRFS_COMPRESS_ZLIB; | ||
659 | |||
660 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { | ||
661 | if (range->compress_type > BTRFS_COMPRESS_TYPES) | ||
662 | return -EINVAL; | ||
663 | if (range->compress_type) | ||
664 | compress_type = range->compress_type; | ||
665 | } | ||
651 | 666 | ||
652 | if (inode->i_size == 0) | 667 | if (inode->i_size == 0) |
653 | return 0; | 668 | return 0; |
@@ -683,7 +698,7 @@ static int btrfs_defrag_file(struct file *file, | |||
683 | total_read++; | 698 | total_read++; |
684 | mutex_lock(&inode->i_mutex); | 699 | mutex_lock(&inode->i_mutex); |
685 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) | 700 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) |
686 | BTRFS_I(inode)->force_compress = 1; | 701 | BTRFS_I(inode)->force_compress = compress_type; |
687 | 702 | ||
688 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); | 703 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
689 | if (ret) | 704 | if (ret) |
@@ -781,10 +796,17 @@ loop_unlock: | |||
781 | atomic_dec(&root->fs_info->async_submit_draining); | 796 | atomic_dec(&root->fs_info->async_submit_draining); |
782 | 797 | ||
783 | mutex_lock(&inode->i_mutex); | 798 | mutex_lock(&inode->i_mutex); |
784 | BTRFS_I(inode)->force_compress = 0; | 799 | BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; |
785 | mutex_unlock(&inode->i_mutex); | 800 | mutex_unlock(&inode->i_mutex); |
786 | } | 801 | } |
787 | 802 | ||
803 | disk_super = &root->fs_info->super_copy; | ||
804 | features = btrfs_super_incompat_flags(disk_super); | ||
805 | if (range->compress_type == BTRFS_COMPRESS_LZO) { | ||
806 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; | ||
807 | btrfs_set_super_incompat_flags(disk_super, features); | ||
808 | } | ||
809 | |||
788 | return 0; | 810 | return 0; |
789 | 811 | ||
790 | err_reservations: | 812 | err_reservations: |
@@ -885,6 +907,10 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
885 | 907 | ||
886 | if (new_size > old_size) { | 908 | if (new_size > old_size) { |
887 | trans = btrfs_start_transaction(root, 0); | 909 | trans = btrfs_start_transaction(root, 0); |
910 | if (IS_ERR(trans)) { | ||
911 | ret = PTR_ERR(trans); | ||
912 | goto out_unlock; | ||
913 | } | ||
888 | ret = btrfs_grow_device(trans, device, new_size); | 914 | ret = btrfs_grow_device(trans, device, new_size); |
889 | btrfs_commit_transaction(trans, root); | 915 | btrfs_commit_transaction(trans, root); |
890 | } else { | 916 | } else { |
@@ -901,7 +927,8 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
901 | char *name, | 927 | char *name, |
902 | unsigned long fd, | 928 | unsigned long fd, |
903 | int subvol, | 929 | int subvol, |
904 | u64 *transid) | 930 | u64 *transid, |
931 | bool readonly) | ||
905 | { | 932 | { |
906 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | 933 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; |
907 | struct file *src_file; | 934 | struct file *src_file; |
@@ -919,7 +946,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
919 | 946 | ||
920 | if (subvol) { | 947 | if (subvol) { |
921 | ret = btrfs_mksubvol(&file->f_path, name, namelen, | 948 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
922 | NULL, transid); | 949 | NULL, transid, readonly); |
923 | } else { | 950 | } else { |
924 | struct inode *src_inode; | 951 | struct inode *src_inode; |
925 | src_file = fget(fd); | 952 | src_file = fget(fd); |
@@ -938,7 +965,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
938 | } | 965 | } |
939 | ret = btrfs_mksubvol(&file->f_path, name, namelen, | 966 | ret = btrfs_mksubvol(&file->f_path, name, namelen, |
940 | BTRFS_I(src_inode)->root, | 967 | BTRFS_I(src_inode)->root, |
941 | transid); | 968 | transid, readonly); |
942 | fput(src_file); | 969 | fput(src_file); |
943 | } | 970 | } |
944 | out: | 971 | out: |
@@ -946,58 +973,139 @@ out: | |||
946 | } | 973 | } |
947 | 974 | ||
948 | static noinline int btrfs_ioctl_snap_create(struct file *file, | 975 | static noinline int btrfs_ioctl_snap_create(struct file *file, |
949 | void __user *arg, int subvol, | 976 | void __user *arg, int subvol) |
950 | int v2) | ||
951 | { | 977 | { |
952 | struct btrfs_ioctl_vol_args *vol_args = NULL; | 978 | struct btrfs_ioctl_vol_args *vol_args; |
953 | struct btrfs_ioctl_vol_args_v2 *vol_args_v2 = NULL; | ||
954 | char *name; | ||
955 | u64 fd; | ||
956 | int ret; | 979 | int ret; |
957 | 980 | ||
958 | if (v2) { | 981 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
959 | u64 transid = 0; | 982 | if (IS_ERR(vol_args)) |
960 | u64 *ptr = NULL; | 983 | return PTR_ERR(vol_args); |
961 | 984 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | |
962 | vol_args_v2 = memdup_user(arg, sizeof(*vol_args_v2)); | ||
963 | if (IS_ERR(vol_args_v2)) | ||
964 | return PTR_ERR(vol_args_v2); | ||
965 | 985 | ||
966 | if (vol_args_v2->flags & ~BTRFS_SUBVOL_CREATE_ASYNC) { | 986 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, |
967 | ret = -EINVAL; | 987 | vol_args->fd, subvol, |
968 | goto out; | 988 | NULL, false); |
969 | } | ||
970 | 989 | ||
971 | name = vol_args_v2->name; | 990 | kfree(vol_args); |
972 | fd = vol_args_v2->fd; | 991 | return ret; |
973 | vol_args_v2->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; | 992 | } |
974 | 993 | ||
975 | if (vol_args_v2->flags & BTRFS_SUBVOL_CREATE_ASYNC) | 994 | static noinline int btrfs_ioctl_snap_create_v2(struct file *file, |
976 | ptr = &transid; | 995 | void __user *arg, int subvol) |
996 | { | ||
997 | struct btrfs_ioctl_vol_args_v2 *vol_args; | ||
998 | int ret; | ||
999 | u64 transid = 0; | ||
1000 | u64 *ptr = NULL; | ||
1001 | bool readonly = false; | ||
977 | 1002 | ||
978 | ret = btrfs_ioctl_snap_create_transid(file, name, fd, | 1003 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
979 | subvol, ptr); | 1004 | if (IS_ERR(vol_args)) |
1005 | return PTR_ERR(vol_args); | ||
1006 | vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; | ||
980 | 1007 | ||
981 | if (ret == 0 && ptr && | 1008 | if (vol_args->flags & |
982 | copy_to_user(arg + | 1009 | ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) { |
983 | offsetof(struct btrfs_ioctl_vol_args_v2, | 1010 | ret = -EOPNOTSUPP; |
984 | transid), ptr, sizeof(*ptr))) | 1011 | goto out; |
985 | ret = -EFAULT; | ||
986 | } else { | ||
987 | vol_args = memdup_user(arg, sizeof(*vol_args)); | ||
988 | if (IS_ERR(vol_args)) | ||
989 | return PTR_ERR(vol_args); | ||
990 | name = vol_args->name; | ||
991 | fd = vol_args->fd; | ||
992 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | ||
993 | |||
994 | ret = btrfs_ioctl_snap_create_transid(file, name, fd, | ||
995 | subvol, NULL); | ||
996 | } | 1012 | } |
1013 | |||
1014 | if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) | ||
1015 | ptr = &transid; | ||
1016 | if (vol_args->flags & BTRFS_SUBVOL_RDONLY) | ||
1017 | readonly = true; | ||
1018 | |||
1019 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, | ||
1020 | vol_args->fd, subvol, | ||
1021 | ptr, readonly); | ||
1022 | |||
1023 | if (ret == 0 && ptr && | ||
1024 | copy_to_user(arg + | ||
1025 | offsetof(struct btrfs_ioctl_vol_args_v2, | ||
1026 | transid), ptr, sizeof(*ptr))) | ||
1027 | ret = -EFAULT; | ||
997 | out: | 1028 | out: |
998 | kfree(vol_args); | 1029 | kfree(vol_args); |
999 | kfree(vol_args_v2); | 1030 | return ret; |
1031 | } | ||
1032 | |||
1033 | static noinline int btrfs_ioctl_subvol_getflags(struct file *file, | ||
1034 | void __user *arg) | ||
1035 | { | ||
1036 | struct inode *inode = fdentry(file)->d_inode; | ||
1037 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1038 | int ret = 0; | ||
1039 | u64 flags = 0; | ||
1040 | |||
1041 | if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) | ||
1042 | return -EINVAL; | ||
1043 | |||
1044 | down_read(&root->fs_info->subvol_sem); | ||
1045 | if (btrfs_root_readonly(root)) | ||
1046 | flags |= BTRFS_SUBVOL_RDONLY; | ||
1047 | up_read(&root->fs_info->subvol_sem); | ||
1048 | |||
1049 | if (copy_to_user(arg, &flags, sizeof(flags))) | ||
1050 | ret = -EFAULT; | ||
1051 | |||
1052 | return ret; | ||
1053 | } | ||
1054 | |||
1055 | static noinline int btrfs_ioctl_subvol_setflags(struct file *file, | ||
1056 | void __user *arg) | ||
1057 | { | ||
1058 | struct inode *inode = fdentry(file)->d_inode; | ||
1059 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1060 | struct btrfs_trans_handle *trans; | ||
1061 | u64 root_flags; | ||
1062 | u64 flags; | ||
1063 | int ret = 0; | ||
1064 | |||
1065 | if (root->fs_info->sb->s_flags & MS_RDONLY) | ||
1066 | return -EROFS; | ||
1067 | |||
1068 | if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) | ||
1069 | return -EINVAL; | ||
1070 | |||
1071 | if (copy_from_user(&flags, arg, sizeof(flags))) | ||
1072 | return -EFAULT; | ||
1073 | |||
1074 | if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC) | ||
1075 | return -EINVAL; | ||
1076 | |||
1077 | if (flags & ~BTRFS_SUBVOL_RDONLY) | ||
1078 | return -EOPNOTSUPP; | ||
1079 | |||
1080 | down_write(&root->fs_info->subvol_sem); | ||
1000 | 1081 | ||
1082 | /* nothing to do */ | ||
1083 | if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) | ||
1084 | goto out; | ||
1085 | |||
1086 | root_flags = btrfs_root_flags(&root->root_item); | ||
1087 | if (flags & BTRFS_SUBVOL_RDONLY) | ||
1088 | btrfs_set_root_flags(&root->root_item, | ||
1089 | root_flags | BTRFS_ROOT_SUBVOL_RDONLY); | ||
1090 | else | ||
1091 | btrfs_set_root_flags(&root->root_item, | ||
1092 | root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); | ||
1093 | |||
1094 | trans = btrfs_start_transaction(root, 1); | ||
1095 | if (IS_ERR(trans)) { | ||
1096 | ret = PTR_ERR(trans); | ||
1097 | goto out_reset; | ||
1098 | } | ||
1099 | |||
1100 | ret = btrfs_update_root(trans, root, | ||
1101 | &root->root_key, &root->root_item); | ||
1102 | |||
1103 | btrfs_commit_transaction(trans, root); | ||
1104 | out_reset: | ||
1105 | if (ret) | ||
1106 | btrfs_set_root_flags(&root->root_item, root_flags); | ||
1107 | out: | ||
1108 | up_write(&root->fs_info->subvol_sem); | ||
1001 | return ret; | 1109 | return ret; |
1002 | } | 1110 | } |
1003 | 1111 | ||
@@ -1509,6 +1617,9 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
1509 | struct btrfs_ioctl_defrag_range_args *range; | 1617 | struct btrfs_ioctl_defrag_range_args *range; |
1510 | int ret; | 1618 | int ret; |
1511 | 1619 | ||
1620 | if (btrfs_root_readonly(root)) | ||
1621 | return -EROFS; | ||
1622 | |||
1512 | ret = mnt_want_write(file->f_path.mnt); | 1623 | ret = mnt_want_write(file->f_path.mnt); |
1513 | if (ret) | 1624 | if (ret) |
1514 | return ret; | 1625 | return ret; |
@@ -1637,6 +1748,9 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1637 | if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) | 1748 | if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) |
1638 | return -EINVAL; | 1749 | return -EINVAL; |
1639 | 1750 | ||
1751 | if (btrfs_root_readonly(root)) | ||
1752 | return -EROFS; | ||
1753 | |||
1640 | ret = mnt_want_write(file->f_path.mnt); | 1754 | ret = mnt_want_write(file->f_path.mnt); |
1641 | if (ret) | 1755 | if (ret) |
1642 | return ret; | 1756 | return ret; |
@@ -1788,7 +1902,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1788 | 1902 | ||
1789 | memcpy(&new_key, &key, sizeof(new_key)); | 1903 | memcpy(&new_key, &key, sizeof(new_key)); |
1790 | new_key.objectid = inode->i_ino; | 1904 | new_key.objectid = inode->i_ino; |
1791 | new_key.offset = key.offset + destoff - off; | 1905 | if (off <= key.offset) |
1906 | new_key.offset = key.offset + destoff - off; | ||
1907 | else | ||
1908 | new_key.offset = destoff; | ||
1792 | 1909 | ||
1793 | trans = btrfs_start_transaction(root, 1); | 1910 | trans = btrfs_start_transaction(root, 1); |
1794 | if (IS_ERR(trans)) { | 1911 | if (IS_ERR(trans)) { |
@@ -1958,6 +2075,10 @@ static long btrfs_ioctl_trans_start(struct file *file) | |||
1958 | if (file->private_data) | 2075 | if (file->private_data) |
1959 | goto out; | 2076 | goto out; |
1960 | 2077 | ||
2078 | ret = -EROFS; | ||
2079 | if (btrfs_root_readonly(root)) | ||
2080 | goto out; | ||
2081 | |||
1961 | ret = mnt_want_write(file->f_path.mnt); | 2082 | ret = mnt_want_write(file->f_path.mnt); |
1962 | if (ret) | 2083 | if (ret) |
1963 | goto out; | 2084 | goto out; |
@@ -1968,7 +2089,7 @@ static long btrfs_ioctl_trans_start(struct file *file) | |||
1968 | 2089 | ||
1969 | ret = -ENOMEM; | 2090 | ret = -ENOMEM; |
1970 | trans = btrfs_start_ioctl_transaction(root, 0); | 2091 | trans = btrfs_start_ioctl_transaction(root, 0); |
1971 | if (!trans) | 2092 | if (IS_ERR(trans)) |
1972 | goto out_drop; | 2093 | goto out_drop; |
1973 | 2094 | ||
1974 | file->private_data = trans; | 2095 | file->private_data = trans; |
@@ -2024,9 +2145,9 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | |||
2024 | path->leave_spinning = 1; | 2145 | path->leave_spinning = 1; |
2025 | 2146 | ||
2026 | trans = btrfs_start_transaction(root, 1); | 2147 | trans = btrfs_start_transaction(root, 1); |
2027 | if (!trans) { | 2148 | if (IS_ERR(trans)) { |
2028 | btrfs_free_path(path); | 2149 | btrfs_free_path(path); |
2029 | return -ENOMEM; | 2150 | return PTR_ERR(trans); |
2030 | } | 2151 | } |
2031 | 2152 | ||
2032 | dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); | 2153 | dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); |
@@ -2220,6 +2341,8 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp | |||
2220 | u64 transid; | 2341 | u64 transid; |
2221 | 2342 | ||
2222 | trans = btrfs_start_transaction(root, 0); | 2343 | trans = btrfs_start_transaction(root, 0); |
2344 | if (IS_ERR(trans)) | ||
2345 | return PTR_ERR(trans); | ||
2223 | transid = trans->transid; | 2346 | transid = trans->transid; |
2224 | btrfs_commit_transaction_async(trans, root, 0); | 2347 | btrfs_commit_transaction_async(trans, root, 0); |
2225 | 2348 | ||
@@ -2257,13 +2380,17 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
2257 | case FS_IOC_GETVERSION: | 2380 | case FS_IOC_GETVERSION: |
2258 | return btrfs_ioctl_getversion(file, argp); | 2381 | return btrfs_ioctl_getversion(file, argp); |
2259 | case BTRFS_IOC_SNAP_CREATE: | 2382 | case BTRFS_IOC_SNAP_CREATE: |
2260 | return btrfs_ioctl_snap_create(file, argp, 0, 0); | 2383 | return btrfs_ioctl_snap_create(file, argp, 0); |
2261 | case BTRFS_IOC_SNAP_CREATE_V2: | 2384 | case BTRFS_IOC_SNAP_CREATE_V2: |
2262 | return btrfs_ioctl_snap_create(file, argp, 0, 1); | 2385 | return btrfs_ioctl_snap_create_v2(file, argp, 0); |
2263 | case BTRFS_IOC_SUBVOL_CREATE: | 2386 | case BTRFS_IOC_SUBVOL_CREATE: |
2264 | return btrfs_ioctl_snap_create(file, argp, 1, 0); | 2387 | return btrfs_ioctl_snap_create(file, argp, 1); |
2265 | case BTRFS_IOC_SNAP_DESTROY: | 2388 | case BTRFS_IOC_SNAP_DESTROY: |
2266 | return btrfs_ioctl_snap_destroy(file, argp); | 2389 | return btrfs_ioctl_snap_destroy(file, argp); |
2390 | case BTRFS_IOC_SUBVOL_GETFLAGS: | ||
2391 | return btrfs_ioctl_subvol_getflags(file, argp); | ||
2392 | case BTRFS_IOC_SUBVOL_SETFLAGS: | ||
2393 | return btrfs_ioctl_subvol_setflags(file, argp); | ||
2267 | case BTRFS_IOC_DEFAULT_SUBVOL: | 2394 | case BTRFS_IOC_DEFAULT_SUBVOL: |
2268 | return btrfs_ioctl_default_subvol(file, argp); | 2395 | return btrfs_ioctl_default_subvol(file, argp); |
2269 | case BTRFS_IOC_DEFRAG: | 2396 | case BTRFS_IOC_DEFRAG: |
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index c344d12c646b..8fb382167b13 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h | |||
@@ -31,6 +31,7 @@ struct btrfs_ioctl_vol_args { | |||
31 | }; | 31 | }; |
32 | 32 | ||
33 | #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) | 33 | #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) |
34 | #define BTRFS_SUBVOL_RDONLY (1ULL << 1) | ||
34 | 35 | ||
35 | #define BTRFS_SUBVOL_NAME_MAX 4039 | 36 | #define BTRFS_SUBVOL_NAME_MAX 4039 |
36 | struct btrfs_ioctl_vol_args_v2 { | 37 | struct btrfs_ioctl_vol_args_v2 { |
@@ -133,8 +134,15 @@ struct btrfs_ioctl_defrag_range_args { | |||
133 | */ | 134 | */ |
134 | __u32 extent_thresh; | 135 | __u32 extent_thresh; |
135 | 136 | ||
137 | /* | ||
138 | * which compression method to use if turning on compression | ||
139 | * for this defrag operation. If unspecified, zlib will | ||
140 | * be used | ||
141 | */ | ||
142 | __u32 compress_type; | ||
143 | |||
136 | /* spare for later */ | 144 | /* spare for later */ |
137 | __u32 unused[5]; | 145 | __u32 unused[4]; |
138 | }; | 146 | }; |
139 | 147 | ||
140 | struct btrfs_ioctl_space_info { | 148 | struct btrfs_ioctl_space_info { |
@@ -193,4 +201,6 @@ struct btrfs_ioctl_space_args { | |||
193 | #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) | 201 | #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) |
194 | #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ | 202 | #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ |
195 | struct btrfs_ioctl_vol_args_v2) | 203 | struct btrfs_ioctl_vol_args_v2) |
204 | #define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64) | ||
205 | #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) | ||
196 | #endif | 206 | #endif |
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c new file mode 100644 index 000000000000..cc9b450399df --- /dev/null +++ b/fs/btrfs/lzo.c | |||
@@ -0,0 +1,420 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/slab.h> | ||
21 | #include <linux/vmalloc.h> | ||
22 | #include <linux/init.h> | ||
23 | #include <linux/err.h> | ||
24 | #include <linux/sched.h> | ||
25 | #include <linux/pagemap.h> | ||
26 | #include <linux/bio.h> | ||
27 | #include <linux/lzo.h> | ||
28 | #include "compression.h" | ||
29 | |||
30 | #define LZO_LEN 4 | ||
31 | |||
32 | struct workspace { | ||
33 | void *mem; | ||
34 | void *buf; /* where compressed data goes */ | ||
35 | void *cbuf; /* where decompressed data goes */ | ||
36 | struct list_head list; | ||
37 | }; | ||
38 | |||
39 | static void lzo_free_workspace(struct list_head *ws) | ||
40 | { | ||
41 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
42 | |||
43 | vfree(workspace->buf); | ||
44 | vfree(workspace->cbuf); | ||
45 | vfree(workspace->mem); | ||
46 | kfree(workspace); | ||
47 | } | ||
48 | |||
49 | static struct list_head *lzo_alloc_workspace(void) | ||
50 | { | ||
51 | struct workspace *workspace; | ||
52 | |||
53 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); | ||
54 | if (!workspace) | ||
55 | return ERR_PTR(-ENOMEM); | ||
56 | |||
57 | workspace->mem = vmalloc(LZO1X_MEM_COMPRESS); | ||
58 | workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE)); | ||
59 | workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE)); | ||
60 | if (!workspace->mem || !workspace->buf || !workspace->cbuf) | ||
61 | goto fail; | ||
62 | |||
63 | INIT_LIST_HEAD(&workspace->list); | ||
64 | |||
65 | return &workspace->list; | ||
66 | fail: | ||
67 | lzo_free_workspace(&workspace->list); | ||
68 | return ERR_PTR(-ENOMEM); | ||
69 | } | ||
70 | |||
71 | static inline void write_compress_length(char *buf, size_t len) | ||
72 | { | ||
73 | __le32 dlen; | ||
74 | |||
75 | dlen = cpu_to_le32(len); | ||
76 | memcpy(buf, &dlen, LZO_LEN); | ||
77 | } | ||
78 | |||
79 | static inline size_t read_compress_length(char *buf) | ||
80 | { | ||
81 | __le32 dlen; | ||
82 | |||
83 | memcpy(&dlen, buf, LZO_LEN); | ||
84 | return le32_to_cpu(dlen); | ||
85 | } | ||
86 | |||
87 | static int lzo_compress_pages(struct list_head *ws, | ||
88 | struct address_space *mapping, | ||
89 | u64 start, unsigned long len, | ||
90 | struct page **pages, | ||
91 | unsigned long nr_dest_pages, | ||
92 | unsigned long *out_pages, | ||
93 | unsigned long *total_in, | ||
94 | unsigned long *total_out, | ||
95 | unsigned long max_out) | ||
96 | { | ||
97 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
98 | int ret = 0; | ||
99 | char *data_in; | ||
100 | char *cpage_out; | ||
101 | int nr_pages = 0; | ||
102 | struct page *in_page = NULL; | ||
103 | struct page *out_page = NULL; | ||
104 | unsigned long bytes_left; | ||
105 | |||
106 | size_t in_len; | ||
107 | size_t out_len; | ||
108 | char *buf; | ||
109 | unsigned long tot_in = 0; | ||
110 | unsigned long tot_out = 0; | ||
111 | unsigned long pg_bytes_left; | ||
112 | unsigned long out_offset; | ||
113 | unsigned long bytes; | ||
114 | |||
115 | *out_pages = 0; | ||
116 | *total_out = 0; | ||
117 | *total_in = 0; | ||
118 | |||
119 | in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); | ||
120 | data_in = kmap(in_page); | ||
121 | |||
122 | /* | ||
123 | * store the size of all chunks of compressed data in | ||
124 | * the first 4 bytes | ||
125 | */ | ||
126 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
127 | if (out_page == NULL) { | ||
128 | ret = -ENOMEM; | ||
129 | goto out; | ||
130 | } | ||
131 | cpage_out = kmap(out_page); | ||
132 | out_offset = LZO_LEN; | ||
133 | tot_out = LZO_LEN; | ||
134 | pages[0] = out_page; | ||
135 | nr_pages = 1; | ||
136 | pg_bytes_left = PAGE_CACHE_SIZE - LZO_LEN; | ||
137 | |||
138 | /* compress at most one page of data each time */ | ||
139 | in_len = min(len, PAGE_CACHE_SIZE); | ||
140 | while (tot_in < len) { | ||
141 | ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf, | ||
142 | &out_len, workspace->mem); | ||
143 | if (ret != LZO_E_OK) { | ||
144 | printk(KERN_DEBUG "btrfs deflate in loop returned %d\n", | ||
145 | ret); | ||
146 | ret = -1; | ||
147 | goto out; | ||
148 | } | ||
149 | |||
150 | /* store the size of this chunk of compressed data */ | ||
151 | write_compress_length(cpage_out + out_offset, out_len); | ||
152 | tot_out += LZO_LEN; | ||
153 | out_offset += LZO_LEN; | ||
154 | pg_bytes_left -= LZO_LEN; | ||
155 | |||
156 | tot_in += in_len; | ||
157 | tot_out += out_len; | ||
158 | |||
159 | /* copy bytes from the working buffer into the pages */ | ||
160 | buf = workspace->cbuf; | ||
161 | while (out_len) { | ||
162 | bytes = min_t(unsigned long, pg_bytes_left, out_len); | ||
163 | |||
164 | memcpy(cpage_out + out_offset, buf, bytes); | ||
165 | |||
166 | out_len -= bytes; | ||
167 | pg_bytes_left -= bytes; | ||
168 | buf += bytes; | ||
169 | out_offset += bytes; | ||
170 | |||
171 | /* | ||
172 | * we need another page for writing out. | ||
173 | * | ||
174 | * Note if there's less than 4 bytes left, we just | ||
175 | * skip to a new page. | ||
176 | */ | ||
177 | if ((out_len == 0 && pg_bytes_left < LZO_LEN) || | ||
178 | pg_bytes_left == 0) { | ||
179 | if (pg_bytes_left) { | ||
180 | memset(cpage_out + out_offset, 0, | ||
181 | pg_bytes_left); | ||
182 | tot_out += pg_bytes_left; | ||
183 | } | ||
184 | |||
185 | /* we're done, don't allocate new page */ | ||
186 | if (out_len == 0 && tot_in >= len) | ||
187 | break; | ||
188 | |||
189 | kunmap(out_page); | ||
190 | if (nr_pages == nr_dest_pages) { | ||
191 | out_page = NULL; | ||
192 | ret = -1; | ||
193 | goto out; | ||
194 | } | ||
195 | |||
196 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
197 | if (out_page == NULL) { | ||
198 | ret = -ENOMEM; | ||
199 | goto out; | ||
200 | } | ||
201 | cpage_out = kmap(out_page); | ||
202 | pages[nr_pages++] = out_page; | ||
203 | |||
204 | pg_bytes_left = PAGE_CACHE_SIZE; | ||
205 | out_offset = 0; | ||
206 | } | ||
207 | } | ||
208 | |||
209 | /* we're making it bigger, give up */ | ||
210 | if (tot_in > 8192 && tot_in < tot_out) | ||
211 | goto out; | ||
212 | |||
213 | /* we're all done */ | ||
214 | if (tot_in >= len) | ||
215 | break; | ||
216 | |||
217 | if (tot_out > max_out) | ||
218 | break; | ||
219 | |||
220 | bytes_left = len - tot_in; | ||
221 | kunmap(in_page); | ||
222 | page_cache_release(in_page); | ||
223 | |||
224 | start += PAGE_CACHE_SIZE; | ||
225 | in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); | ||
226 | data_in = kmap(in_page); | ||
227 | in_len = min(bytes_left, PAGE_CACHE_SIZE); | ||
228 | } | ||
229 | |||
230 | if (tot_out > tot_in) | ||
231 | goto out; | ||
232 | |||
233 | /* store the size of all chunks of compressed data */ | ||
234 | cpage_out = kmap(pages[0]); | ||
235 | write_compress_length(cpage_out, tot_out); | ||
236 | |||
237 | kunmap(pages[0]); | ||
238 | |||
239 | ret = 0; | ||
240 | *total_out = tot_out; | ||
241 | *total_in = tot_in; | ||
242 | out: | ||
243 | *out_pages = nr_pages; | ||
244 | if (out_page) | ||
245 | kunmap(out_page); | ||
246 | |||
247 | if (in_page) { | ||
248 | kunmap(in_page); | ||
249 | page_cache_release(in_page); | ||
250 | } | ||
251 | |||
252 | return ret; | ||
253 | } | ||
254 | |||
255 | static int lzo_decompress_biovec(struct list_head *ws, | ||
256 | struct page **pages_in, | ||
257 | u64 disk_start, | ||
258 | struct bio_vec *bvec, | ||
259 | int vcnt, | ||
260 | size_t srclen) | ||
261 | { | ||
262 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
263 | int ret = 0, ret2; | ||
264 | char *data_in; | ||
265 | unsigned long page_in_index = 0; | ||
266 | unsigned long page_out_index = 0; | ||
267 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / | ||
268 | PAGE_CACHE_SIZE; | ||
269 | unsigned long buf_start; | ||
270 | unsigned long buf_offset = 0; | ||
271 | unsigned long bytes; | ||
272 | unsigned long working_bytes; | ||
273 | unsigned long pg_offset; | ||
274 | |||
275 | size_t in_len; | ||
276 | size_t out_len; | ||
277 | unsigned long in_offset; | ||
278 | unsigned long in_page_bytes_left; | ||
279 | unsigned long tot_in; | ||
280 | unsigned long tot_out; | ||
281 | unsigned long tot_len; | ||
282 | char *buf; | ||
283 | |||
284 | data_in = kmap(pages_in[0]); | ||
285 | tot_len = read_compress_length(data_in); | ||
286 | |||
287 | tot_in = LZO_LEN; | ||
288 | in_offset = LZO_LEN; | ||
289 | tot_len = min_t(size_t, srclen, tot_len); | ||
290 | in_page_bytes_left = PAGE_CACHE_SIZE - LZO_LEN; | ||
291 | |||
292 | tot_out = 0; | ||
293 | pg_offset = 0; | ||
294 | |||
295 | while (tot_in < tot_len) { | ||
296 | in_len = read_compress_length(data_in + in_offset); | ||
297 | in_page_bytes_left -= LZO_LEN; | ||
298 | in_offset += LZO_LEN; | ||
299 | tot_in += LZO_LEN; | ||
300 | |||
301 | tot_in += in_len; | ||
302 | working_bytes = in_len; | ||
303 | |||
304 | /* fast path: avoid using the working buffer */ | ||
305 | if (in_page_bytes_left >= in_len) { | ||
306 | buf = data_in + in_offset; | ||
307 | bytes = in_len; | ||
308 | goto cont; | ||
309 | } | ||
310 | |||
311 | /* copy bytes from the pages into the working buffer */ | ||
312 | buf = workspace->cbuf; | ||
313 | buf_offset = 0; | ||
314 | while (working_bytes) { | ||
315 | bytes = min(working_bytes, in_page_bytes_left); | ||
316 | |||
317 | memcpy(buf + buf_offset, data_in + in_offset, bytes); | ||
318 | buf_offset += bytes; | ||
319 | cont: | ||
320 | working_bytes -= bytes; | ||
321 | in_page_bytes_left -= bytes; | ||
322 | in_offset += bytes; | ||
323 | |||
324 | /* check if we need to pick another page */ | ||
325 | if ((working_bytes == 0 && in_page_bytes_left < LZO_LEN) | ||
326 | || in_page_bytes_left == 0) { | ||
327 | tot_in += in_page_bytes_left; | ||
328 | |||
329 | if (working_bytes == 0 && tot_in >= tot_len) | ||
330 | break; | ||
331 | |||
332 | kunmap(pages_in[page_in_index]); | ||
333 | page_in_index++; | ||
334 | if (page_in_index >= total_pages_in) { | ||
335 | ret = -1; | ||
336 | data_in = NULL; | ||
337 | goto done; | ||
338 | } | ||
339 | data_in = kmap(pages_in[page_in_index]); | ||
340 | |||
341 | in_page_bytes_left = PAGE_CACHE_SIZE; | ||
342 | in_offset = 0; | ||
343 | } | ||
344 | } | ||
345 | |||
346 | out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE); | ||
347 | ret = lzo1x_decompress_safe(buf, in_len, workspace->buf, | ||
348 | &out_len); | ||
349 | if (ret != LZO_E_OK) { | ||
350 | printk(KERN_WARNING "btrfs decompress failed\n"); | ||
351 | ret = -1; | ||
352 | break; | ||
353 | } | ||
354 | |||
355 | buf_start = tot_out; | ||
356 | tot_out += out_len; | ||
357 | |||
358 | ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start, | ||
359 | tot_out, disk_start, | ||
360 | bvec, vcnt, | ||
361 | &page_out_index, &pg_offset); | ||
362 | if (ret2 == 0) | ||
363 | break; | ||
364 | } | ||
365 | done: | ||
366 | if (data_in) | ||
367 | kunmap(pages_in[page_in_index]); | ||
368 | return ret; | ||
369 | } | ||
370 | |||
371 | static int lzo_decompress(struct list_head *ws, unsigned char *data_in, | ||
372 | struct page *dest_page, | ||
373 | unsigned long start_byte, | ||
374 | size_t srclen, size_t destlen) | ||
375 | { | ||
376 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
377 | size_t in_len; | ||
378 | size_t out_len; | ||
379 | size_t tot_len; | ||
380 | int ret = 0; | ||
381 | char *kaddr; | ||
382 | unsigned long bytes; | ||
383 | |||
384 | BUG_ON(srclen < LZO_LEN); | ||
385 | |||
386 | tot_len = read_compress_length(data_in); | ||
387 | data_in += LZO_LEN; | ||
388 | |||
389 | in_len = read_compress_length(data_in); | ||
390 | data_in += LZO_LEN; | ||
391 | |||
392 | out_len = PAGE_CACHE_SIZE; | ||
393 | ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len); | ||
394 | if (ret != LZO_E_OK) { | ||
395 | printk(KERN_WARNING "btrfs decompress failed!\n"); | ||
396 | ret = -1; | ||
397 | goto out; | ||
398 | } | ||
399 | |||
400 | if (out_len < start_byte) { | ||
401 | ret = -1; | ||
402 | goto out; | ||
403 | } | ||
404 | |||
405 | bytes = min_t(unsigned long, destlen, out_len - start_byte); | ||
406 | |||
407 | kaddr = kmap_atomic(dest_page, KM_USER0); | ||
408 | memcpy(kaddr, workspace->buf + start_byte, bytes); | ||
409 | kunmap_atomic(kaddr, KM_USER0); | ||
410 | out: | ||
411 | return ret; | ||
412 | } | ||
413 | |||
414 | struct btrfs_compress_op btrfs_lzo_compress = { | ||
415 | .alloc_workspace = lzo_alloc_workspace, | ||
416 | .free_workspace = lzo_free_workspace, | ||
417 | .compress_pages = lzo_compress_pages, | ||
418 | .decompress_biovec = lzo_decompress_biovec, | ||
419 | .decompress = lzo_decompress, | ||
420 | }; | ||
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index ae7737e352c9..083a55477375 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -141,7 +141,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
141 | u64 file_offset) | 141 | u64 file_offset) |
142 | { | 142 | { |
143 | struct rb_root *root = &tree->tree; | 143 | struct rb_root *root = &tree->tree; |
144 | struct rb_node *prev; | 144 | struct rb_node *prev = NULL; |
145 | struct rb_node *ret; | 145 | struct rb_node *ret; |
146 | struct btrfs_ordered_extent *entry; | 146 | struct btrfs_ordered_extent *entry; |
147 | 147 | ||
@@ -172,7 +172,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
172 | */ | 172 | */ |
173 | static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 173 | static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
174 | u64 start, u64 len, u64 disk_len, | 174 | u64 start, u64 len, u64 disk_len, |
175 | int type, int dio) | 175 | int type, int dio, int compress_type) |
176 | { | 176 | { |
177 | struct btrfs_ordered_inode_tree *tree; | 177 | struct btrfs_ordered_inode_tree *tree; |
178 | struct rb_node *node; | 178 | struct rb_node *node; |
@@ -189,6 +189,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
189 | entry->disk_len = disk_len; | 189 | entry->disk_len = disk_len; |
190 | entry->bytes_left = len; | 190 | entry->bytes_left = len; |
191 | entry->inode = inode; | 191 | entry->inode = inode; |
192 | entry->compress_type = compress_type; | ||
192 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 193 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
193 | set_bit(type, &entry->flags); | 194 | set_bit(type, &entry->flags); |
194 | 195 | ||
@@ -220,14 +221,25 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
220 | u64 start, u64 len, u64 disk_len, int type) | 221 | u64 start, u64 len, u64 disk_len, int type) |
221 | { | 222 | { |
222 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | 223 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, |
223 | disk_len, type, 0); | 224 | disk_len, type, 0, |
225 | BTRFS_COMPRESS_NONE); | ||
224 | } | 226 | } |
225 | 227 | ||
226 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | 228 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, |
227 | u64 start, u64 len, u64 disk_len, int type) | 229 | u64 start, u64 len, u64 disk_len, int type) |
228 | { | 230 | { |
229 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | 231 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, |
230 | disk_len, type, 1); | 232 | disk_len, type, 1, |
233 | BTRFS_COMPRESS_NONE); | ||
234 | } | ||
235 | |||
236 | int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, | ||
237 | u64 start, u64 len, u64 disk_len, | ||
238 | int type, int compress_type) | ||
239 | { | ||
240 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | ||
241 | disk_len, type, 0, | ||
242 | compress_type); | ||
231 | } | 243 | } |
232 | 244 | ||
233 | /* | 245 | /* |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 61dca83119dd..ff1f69aa1883 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -68,7 +68,7 @@ struct btrfs_ordered_sum { | |||
68 | 68 | ||
69 | #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ | 69 | #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */ |
70 | 70 | ||
71 | #define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */ | 71 | #define BTRFS_ORDERED_COMPRESSED 3 /* writing a zlib compressed extent */ |
72 | 72 | ||
73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ | 73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ |
74 | 74 | ||
@@ -93,6 +93,9 @@ struct btrfs_ordered_extent { | |||
93 | /* flags (described above) */ | 93 | /* flags (described above) */ |
94 | unsigned long flags; | 94 | unsigned long flags; |
95 | 95 | ||
96 | /* compression algorithm */ | ||
97 | int compress_type; | ||
98 | |||
96 | /* reference count */ | 99 | /* reference count */ |
97 | atomic_t refs; | 100 | atomic_t refs; |
98 | 101 | ||
@@ -148,6 +151,9 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
148 | u64 start, u64 len, u64 disk_len, int type); | 151 | u64 start, u64 len, u64 disk_len, int type); |
149 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | 152 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, |
150 | u64 start, u64 len, u64 disk_len, int type); | 153 | u64 start, u64 len, u64 disk_len, int type); |
154 | int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset, | ||
155 | u64 start, u64 len, u64 disk_len, | ||
156 | int type, int compress_type); | ||
151 | int btrfs_add_ordered_sum(struct inode *inode, | 157 | int btrfs_add_ordered_sum(struct inode *inode, |
152 | struct btrfs_ordered_extent *entry, | 158 | struct btrfs_ordered_extent *entry, |
153 | struct btrfs_ordered_sum *sum); | 159 | struct btrfs_ordered_sum *sum); |
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 0d126be22b63..fb2605d998e9 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c | |||
@@ -260,6 +260,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) | |||
260 | #else | 260 | #else |
261 | BUG(); | 261 | BUG(); |
262 | #endif | 262 | #endif |
263 | break; | ||
263 | case BTRFS_BLOCK_GROUP_ITEM_KEY: | 264 | case BTRFS_BLOCK_GROUP_ITEM_KEY: |
264 | bi = btrfs_item_ptr(l, i, | 265 | bi = btrfs_item_ptr(l, i, |
265 | struct btrfs_block_group_item); | 266 | struct btrfs_block_group_item); |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 045c9c2b2d7e..1f5556acb530 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -2028,6 +2028,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
2028 | 2028 | ||
2029 | while (1) { | 2029 | while (1) { |
2030 | trans = btrfs_start_transaction(root, 0); | 2030 | trans = btrfs_start_transaction(root, 0); |
2031 | BUG_ON(IS_ERR(trans)); | ||
2031 | trans->block_rsv = rc->block_rsv; | 2032 | trans->block_rsv = rc->block_rsv; |
2032 | 2033 | ||
2033 | ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, | 2034 | ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, |
@@ -2147,6 +2148,12 @@ again: | |||
2147 | } | 2148 | } |
2148 | 2149 | ||
2149 | trans = btrfs_join_transaction(rc->extent_root, 1); | 2150 | trans = btrfs_join_transaction(rc->extent_root, 1); |
2151 | if (IS_ERR(trans)) { | ||
2152 | if (!err) | ||
2153 | btrfs_block_rsv_release(rc->extent_root, | ||
2154 | rc->block_rsv, num_bytes); | ||
2155 | return PTR_ERR(trans); | ||
2156 | } | ||
2150 | 2157 | ||
2151 | if (!err) { | 2158 | if (!err) { |
2152 | if (num_bytes != rc->merging_rsv_size) { | 2159 | if (num_bytes != rc->merging_rsv_size) { |
@@ -3222,6 +3229,7 @@ truncate: | |||
3222 | trans = btrfs_join_transaction(root, 0); | 3229 | trans = btrfs_join_transaction(root, 0); |
3223 | if (IS_ERR(trans)) { | 3230 | if (IS_ERR(trans)) { |
3224 | btrfs_free_path(path); | 3231 | btrfs_free_path(path); |
3232 | ret = PTR_ERR(trans); | ||
3225 | goto out; | 3233 | goto out; |
3226 | } | 3234 | } |
3227 | 3235 | ||
@@ -3628,6 +3636,7 @@ int prepare_to_relocate(struct reloc_control *rc) | |||
3628 | set_reloc_control(rc); | 3636 | set_reloc_control(rc); |
3629 | 3637 | ||
3630 | trans = btrfs_join_transaction(rc->extent_root, 1); | 3638 | trans = btrfs_join_transaction(rc->extent_root, 1); |
3639 | BUG_ON(IS_ERR(trans)); | ||
3631 | btrfs_commit_transaction(trans, rc->extent_root); | 3640 | btrfs_commit_transaction(trans, rc->extent_root); |
3632 | return 0; | 3641 | return 0; |
3633 | } | 3642 | } |
@@ -3657,6 +3666,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3657 | 3666 | ||
3658 | while (1) { | 3667 | while (1) { |
3659 | trans = btrfs_start_transaction(rc->extent_root, 0); | 3668 | trans = btrfs_start_transaction(rc->extent_root, 0); |
3669 | BUG_ON(IS_ERR(trans)); | ||
3660 | 3670 | ||
3661 | if (update_backref_cache(trans, &rc->backref_cache)) { | 3671 | if (update_backref_cache(trans, &rc->backref_cache)) { |
3662 | btrfs_end_transaction(trans, rc->extent_root); | 3672 | btrfs_end_transaction(trans, rc->extent_root); |
@@ -3804,7 +3814,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3804 | 3814 | ||
3805 | /* get rid of pinned extents */ | 3815 | /* get rid of pinned extents */ |
3806 | trans = btrfs_join_transaction(rc->extent_root, 1); | 3816 | trans = btrfs_join_transaction(rc->extent_root, 1); |
3807 | btrfs_commit_transaction(trans, rc->extent_root); | 3817 | if (IS_ERR(trans)) |
3818 | err = PTR_ERR(trans); | ||
3819 | else | ||
3820 | btrfs_commit_transaction(trans, rc->extent_root); | ||
3808 | out_free: | 3821 | out_free: |
3809 | btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); | 3822 | btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); |
3810 | btrfs_free_path(path); | 3823 | btrfs_free_path(path); |
@@ -4022,6 +4035,7 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) | |||
4022 | int ret; | 4035 | int ret; |
4023 | 4036 | ||
4024 | trans = btrfs_start_transaction(root->fs_info->tree_root, 0); | 4037 | trans = btrfs_start_transaction(root->fs_info->tree_root, 0); |
4038 | BUG_ON(IS_ERR(trans)); | ||
4025 | 4039 | ||
4026 | memset(&root->root_item.drop_progress, 0, | 4040 | memset(&root->root_item.drop_progress, 0, |
4027 | sizeof(root->root_item.drop_progress)); | 4041 | sizeof(root->root_item.drop_progress)); |
@@ -4125,6 +4139,11 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
4125 | set_reloc_control(rc); | 4139 | set_reloc_control(rc); |
4126 | 4140 | ||
4127 | trans = btrfs_join_transaction(rc->extent_root, 1); | 4141 | trans = btrfs_join_transaction(rc->extent_root, 1); |
4142 | if (IS_ERR(trans)) { | ||
4143 | unset_reloc_control(rc); | ||
4144 | err = PTR_ERR(trans); | ||
4145 | goto out_free; | ||
4146 | } | ||
4128 | 4147 | ||
4129 | rc->merge_reloc_tree = 1; | 4148 | rc->merge_reloc_tree = 1; |
4130 | 4149 | ||
@@ -4154,9 +4173,13 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
4154 | unset_reloc_control(rc); | 4173 | unset_reloc_control(rc); |
4155 | 4174 | ||
4156 | trans = btrfs_join_transaction(rc->extent_root, 1); | 4175 | trans = btrfs_join_transaction(rc->extent_root, 1); |
4157 | btrfs_commit_transaction(trans, rc->extent_root); | 4176 | if (IS_ERR(trans)) |
4158 | out: | 4177 | err = PTR_ERR(trans); |
4178 | else | ||
4179 | btrfs_commit_transaction(trans, rc->extent_root); | ||
4180 | out_free: | ||
4159 | kfree(rc); | 4181 | kfree(rc); |
4182 | out: | ||
4160 | while (!list_empty(&reloc_roots)) { | 4183 | while (!list_empty(&reloc_roots)) { |
4161 | reloc_root = list_entry(reloc_roots.next, | 4184 | reloc_root = list_entry(reloc_roots.next, |
4162 | struct btrfs_root, root_list); | 4185 | struct btrfs_root, root_list); |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 22acdaa78ce1..a004008f7d28 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -54,6 +54,90 @@ | |||
54 | 54 | ||
55 | static const struct super_operations btrfs_super_ops; | 55 | static const struct super_operations btrfs_super_ops; |
56 | 56 | ||
57 | static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, | ||
58 | char nbuf[16]) | ||
59 | { | ||
60 | char *errstr = NULL; | ||
61 | |||
62 | switch (errno) { | ||
63 | case -EIO: | ||
64 | errstr = "IO failure"; | ||
65 | break; | ||
66 | case -ENOMEM: | ||
67 | errstr = "Out of memory"; | ||
68 | break; | ||
69 | case -EROFS: | ||
70 | errstr = "Readonly filesystem"; | ||
71 | break; | ||
72 | default: | ||
73 | if (nbuf) { | ||
74 | if (snprintf(nbuf, 16, "error %d", -errno) >= 0) | ||
75 | errstr = nbuf; | ||
76 | } | ||
77 | break; | ||
78 | } | ||
79 | |||
80 | return errstr; | ||
81 | } | ||
82 | |||
83 | static void __save_error_info(struct btrfs_fs_info *fs_info) | ||
84 | { | ||
85 | /* | ||
86 | * today we only save the error info into ram. Long term we'll | ||
87 | * also send it down to the disk | ||
88 | */ | ||
89 | fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR; | ||
90 | } | ||
91 | |||
92 | /* NOTE: | ||
93 | * We move write_super stuff at umount in order to avoid deadlock | ||
94 | * for umount hold all lock. | ||
95 | */ | ||
96 | static void save_error_info(struct btrfs_fs_info *fs_info) | ||
97 | { | ||
98 | __save_error_info(fs_info); | ||
99 | } | ||
100 | |||
101 | /* btrfs handle error by forcing the filesystem readonly */ | ||
102 | static void btrfs_handle_error(struct btrfs_fs_info *fs_info) | ||
103 | { | ||
104 | struct super_block *sb = fs_info->sb; | ||
105 | |||
106 | if (sb->s_flags & MS_RDONLY) | ||
107 | return; | ||
108 | |||
109 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
110 | sb->s_flags |= MS_RDONLY; | ||
111 | printk(KERN_INFO "btrfs is forced readonly\n"); | ||
112 | } | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | * __btrfs_std_error decodes expected errors from the caller and | ||
117 | * invokes the approciate error response. | ||
118 | */ | ||
119 | void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, | ||
120 | unsigned int line, int errno) | ||
121 | { | ||
122 | struct super_block *sb = fs_info->sb; | ||
123 | char nbuf[16]; | ||
124 | const char *errstr; | ||
125 | |||
126 | /* | ||
127 | * Special case: if the error is EROFS, and we're already | ||
128 | * under MS_RDONLY, then it is safe here. | ||
129 | */ | ||
130 | if (errno == -EROFS && (sb->s_flags & MS_RDONLY)) | ||
131 | return; | ||
132 | |||
133 | errstr = btrfs_decode_error(fs_info, errno, nbuf); | ||
134 | printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n", | ||
135 | sb->s_id, function, line, errstr); | ||
136 | save_error_info(fs_info); | ||
137 | |||
138 | btrfs_handle_error(fs_info); | ||
139 | } | ||
140 | |||
57 | static void btrfs_put_super(struct super_block *sb) | 141 | static void btrfs_put_super(struct super_block *sb) |
58 | { | 142 | { |
59 | struct btrfs_root *root = btrfs_sb(sb); | 143 | struct btrfs_root *root = btrfs_sb(sb); |
@@ -69,9 +153,9 @@ enum { | |||
69 | Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, | 153 | Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, |
70 | Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, | 154 | Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, |
71 | Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, | 155 | Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, |
72 | Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, | 156 | Opt_compress_type, Opt_compress_force, Opt_compress_force_type, |
73 | Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err, | 157 | Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, |
74 | Opt_user_subvol_rm_allowed, | 158 | Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err, |
75 | }; | 159 | }; |
76 | 160 | ||
77 | static match_table_t tokens = { | 161 | static match_table_t tokens = { |
@@ -86,7 +170,9 @@ static match_table_t tokens = { | |||
86 | {Opt_alloc_start, "alloc_start=%s"}, | 170 | {Opt_alloc_start, "alloc_start=%s"}, |
87 | {Opt_thread_pool, "thread_pool=%d"}, | 171 | {Opt_thread_pool, "thread_pool=%d"}, |
88 | {Opt_compress, "compress"}, | 172 | {Opt_compress, "compress"}, |
173 | {Opt_compress_type, "compress=%s"}, | ||
89 | {Opt_compress_force, "compress-force"}, | 174 | {Opt_compress_force, "compress-force"}, |
175 | {Opt_compress_force_type, "compress-force=%s"}, | ||
90 | {Opt_ssd, "ssd"}, | 176 | {Opt_ssd, "ssd"}, |
91 | {Opt_ssd_spread, "ssd_spread"}, | 177 | {Opt_ssd_spread, "ssd_spread"}, |
92 | {Opt_nossd, "nossd"}, | 178 | {Opt_nossd, "nossd"}, |
@@ -112,6 +198,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
112 | char *p, *num, *orig; | 198 | char *p, *num, *orig; |
113 | int intarg; | 199 | int intarg; |
114 | int ret = 0; | 200 | int ret = 0; |
201 | char *compress_type; | ||
202 | bool compress_force = false; | ||
115 | 203 | ||
116 | if (!options) | 204 | if (!options) |
117 | return 0; | 205 | return 0; |
@@ -154,14 +242,32 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
154 | btrfs_set_opt(info->mount_opt, NODATACOW); | 242 | btrfs_set_opt(info->mount_opt, NODATACOW); |
155 | btrfs_set_opt(info->mount_opt, NODATASUM); | 243 | btrfs_set_opt(info->mount_opt, NODATASUM); |
156 | break; | 244 | break; |
157 | case Opt_compress: | ||
158 | printk(KERN_INFO "btrfs: use compression\n"); | ||
159 | btrfs_set_opt(info->mount_opt, COMPRESS); | ||
160 | break; | ||
161 | case Opt_compress_force: | 245 | case Opt_compress_force: |
162 | printk(KERN_INFO "btrfs: forcing compression\n"); | 246 | case Opt_compress_force_type: |
163 | btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); | 247 | compress_force = true; |
248 | case Opt_compress: | ||
249 | case Opt_compress_type: | ||
250 | if (token == Opt_compress || | ||
251 | token == Opt_compress_force || | ||
252 | strcmp(args[0].from, "zlib") == 0) { | ||
253 | compress_type = "zlib"; | ||
254 | info->compress_type = BTRFS_COMPRESS_ZLIB; | ||
255 | } else if (strcmp(args[0].from, "lzo") == 0) { | ||
256 | compress_type = "lzo"; | ||
257 | info->compress_type = BTRFS_COMPRESS_LZO; | ||
258 | } else { | ||
259 | ret = -EINVAL; | ||
260 | goto out; | ||
261 | } | ||
262 | |||
164 | btrfs_set_opt(info->mount_opt, COMPRESS); | 263 | btrfs_set_opt(info->mount_opt, COMPRESS); |
264 | if (compress_force) { | ||
265 | btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); | ||
266 | pr_info("btrfs: force %s compression\n", | ||
267 | compress_type); | ||
268 | } else | ||
269 | pr_info("btrfs: use %s compression\n", | ||
270 | compress_type); | ||
165 | break; | 271 | break; |
166 | case Opt_ssd: | 272 | case Opt_ssd: |
167 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); | 273 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); |
@@ -277,7 +383,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, | |||
277 | struct btrfs_fs_devices **fs_devices) | 383 | struct btrfs_fs_devices **fs_devices) |
278 | { | 384 | { |
279 | substring_t args[MAX_OPT_ARGS]; | 385 | substring_t args[MAX_OPT_ARGS]; |
280 | char *opts, *p; | 386 | char *opts, *orig, *p; |
281 | int error = 0; | 387 | int error = 0; |
282 | int intarg; | 388 | int intarg; |
283 | 389 | ||
@@ -291,6 +397,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, | |||
291 | opts = kstrdup(options, GFP_KERNEL); | 397 | opts = kstrdup(options, GFP_KERNEL); |
292 | if (!opts) | 398 | if (!opts) |
293 | return -ENOMEM; | 399 | return -ENOMEM; |
400 | orig = opts; | ||
294 | 401 | ||
295 | while ((p = strsep(&opts, ",")) != NULL) { | 402 | while ((p = strsep(&opts, ",")) != NULL) { |
296 | int token; | 403 | int token; |
@@ -326,7 +433,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, | |||
326 | } | 433 | } |
327 | 434 | ||
328 | out_free_opts: | 435 | out_free_opts: |
329 | kfree(opts); | 436 | kfree(orig); |
330 | out: | 437 | out: |
331 | /* | 438 | /* |
332 | * If no subvolume name is specified we use the default one. Allocate | 439 | * If no subvolume name is specified we use the default one. Allocate |
@@ -517,6 +624,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
517 | btrfs_wait_ordered_extents(root, 0, 0); | 624 | btrfs_wait_ordered_extents(root, 0, 0); |
518 | 625 | ||
519 | trans = btrfs_start_transaction(root, 0); | 626 | trans = btrfs_start_transaction(root, 0); |
627 | if (IS_ERR(trans)) | ||
628 | return PTR_ERR(trans); | ||
520 | ret = btrfs_commit_transaction(trans, root); | 629 | ret = btrfs_commit_transaction(trans, root); |
521 | return ret; | 630 | return ret; |
522 | } | 631 | } |
@@ -655,6 +764,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
655 | } | 764 | } |
656 | 765 | ||
657 | btrfs_close_devices(fs_devices); | 766 | btrfs_close_devices(fs_devices); |
767 | kfree(fs_info); | ||
768 | kfree(tree_root); | ||
658 | } else { | 769 | } else { |
659 | char b[BDEVNAME_SIZE]; | 770 | char b[BDEVNAME_SIZE]; |
660 | 771 | ||
@@ -753,6 +864,127 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
753 | return 0; | 864 | return 0; |
754 | } | 865 | } |
755 | 866 | ||
867 | /* | ||
868 | * The helper to calc the free space on the devices that can be used to store | ||
869 | * file data. | ||
870 | */ | ||
871 | static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) | ||
872 | { | ||
873 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
874 | struct btrfs_device_info *devices_info; | ||
875 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | ||
876 | struct btrfs_device *device; | ||
877 | u64 skip_space; | ||
878 | u64 type; | ||
879 | u64 avail_space; | ||
880 | u64 used_space; | ||
881 | u64 min_stripe_size; | ||
882 | int min_stripes = 1; | ||
883 | int i = 0, nr_devices; | ||
884 | int ret; | ||
885 | |||
886 | nr_devices = fs_info->fs_devices->rw_devices; | ||
887 | BUG_ON(!nr_devices); | ||
888 | |||
889 | devices_info = kmalloc(sizeof(*devices_info) * nr_devices, | ||
890 | GFP_NOFS); | ||
891 | if (!devices_info) | ||
892 | return -ENOMEM; | ||
893 | |||
894 | /* calc min stripe number for data space alloction */ | ||
895 | type = btrfs_get_alloc_profile(root, 1); | ||
896 | if (type & BTRFS_BLOCK_GROUP_RAID0) | ||
897 | min_stripes = 2; | ||
898 | else if (type & BTRFS_BLOCK_GROUP_RAID1) | ||
899 | min_stripes = 2; | ||
900 | else if (type & BTRFS_BLOCK_GROUP_RAID10) | ||
901 | min_stripes = 4; | ||
902 | |||
903 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
904 | min_stripe_size = 2 * BTRFS_STRIPE_LEN; | ||
905 | else | ||
906 | min_stripe_size = BTRFS_STRIPE_LEN; | ||
907 | |||
908 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | ||
909 | if (!device->in_fs_metadata) | ||
910 | continue; | ||
911 | |||
912 | avail_space = device->total_bytes - device->bytes_used; | ||
913 | |||
914 | /* align with stripe_len */ | ||
915 | do_div(avail_space, BTRFS_STRIPE_LEN); | ||
916 | avail_space *= BTRFS_STRIPE_LEN; | ||
917 | |||
918 | /* | ||
919 | * In order to avoid overwritting the superblock on the drive, | ||
920 | * btrfs starts at an offset of at least 1MB when doing chunk | ||
921 | * allocation. | ||
922 | */ | ||
923 | skip_space = 1024 * 1024; | ||
924 | |||
925 | /* user can set the offset in fs_info->alloc_start. */ | ||
926 | if (fs_info->alloc_start + BTRFS_STRIPE_LEN <= | ||
927 | device->total_bytes) | ||
928 | skip_space = max(fs_info->alloc_start, skip_space); | ||
929 | |||
930 | /* | ||
931 | * btrfs can not use the free space in [0, skip_space - 1], | ||
932 | * we must subtract it from the total. In order to implement | ||
933 | * it, we account the used space in this range first. | ||
934 | */ | ||
935 | ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1, | ||
936 | &used_space); | ||
937 | if (ret) { | ||
938 | kfree(devices_info); | ||
939 | return ret; | ||
940 | } | ||
941 | |||
942 | /* calc the free space in [0, skip_space - 1] */ | ||
943 | skip_space -= used_space; | ||
944 | |||
945 | /* | ||
946 | * we can use the free space in [0, skip_space - 1], subtract | ||
947 | * it from the total. | ||
948 | */ | ||
949 | if (avail_space && avail_space >= skip_space) | ||
950 | avail_space -= skip_space; | ||
951 | else | ||
952 | avail_space = 0; | ||
953 | |||
954 | if (avail_space < min_stripe_size) | ||
955 | continue; | ||
956 | |||
957 | devices_info[i].dev = device; | ||
958 | devices_info[i].max_avail = avail_space; | ||
959 | |||
960 | i++; | ||
961 | } | ||
962 | |||
963 | nr_devices = i; | ||
964 | |||
965 | btrfs_descending_sort_devices(devices_info, nr_devices); | ||
966 | |||
967 | i = nr_devices - 1; | ||
968 | avail_space = 0; | ||
969 | while (nr_devices >= min_stripes) { | ||
970 | if (devices_info[i].max_avail >= min_stripe_size) { | ||
971 | int j; | ||
972 | u64 alloc_size; | ||
973 | |||
974 | avail_space += devices_info[i].max_avail * min_stripes; | ||
975 | alloc_size = devices_info[i].max_avail; | ||
976 | for (j = i + 1 - min_stripes; j <= i; j++) | ||
977 | devices_info[j].max_avail -= alloc_size; | ||
978 | } | ||
979 | i--; | ||
980 | nr_devices--; | ||
981 | } | ||
982 | |||
983 | kfree(devices_info); | ||
984 | *free_bytes = avail_space; | ||
985 | return 0; | ||
986 | } | ||
987 | |||
756 | static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 988 | static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
757 | { | 989 | { |
758 | struct btrfs_root *root = btrfs_sb(dentry->d_sb); | 990 | struct btrfs_root *root = btrfs_sb(dentry->d_sb); |
@@ -760,17 +992,21 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
760 | struct list_head *head = &root->fs_info->space_info; | 992 | struct list_head *head = &root->fs_info->space_info; |
761 | struct btrfs_space_info *found; | 993 | struct btrfs_space_info *found; |
762 | u64 total_used = 0; | 994 | u64 total_used = 0; |
763 | u64 total_used_data = 0; | 995 | u64 total_free_data = 0; |
764 | int bits = dentry->d_sb->s_blocksize_bits; | 996 | int bits = dentry->d_sb->s_blocksize_bits; |
765 | __be32 *fsid = (__be32 *)root->fs_info->fsid; | 997 | __be32 *fsid = (__be32 *)root->fs_info->fsid; |
998 | int ret; | ||
766 | 999 | ||
1000 | /* holding chunk_muext to avoid allocating new chunks */ | ||
1001 | mutex_lock(&root->fs_info->chunk_mutex); | ||
767 | rcu_read_lock(); | 1002 | rcu_read_lock(); |
768 | list_for_each_entry_rcu(found, head, list) { | 1003 | list_for_each_entry_rcu(found, head, list) { |
769 | if (found->flags & (BTRFS_BLOCK_GROUP_METADATA | | 1004 | if (found->flags & BTRFS_BLOCK_GROUP_DATA) { |
770 | BTRFS_BLOCK_GROUP_SYSTEM)) | 1005 | total_free_data += found->disk_total - found->disk_used; |
771 | total_used_data += found->disk_total; | 1006 | total_free_data -= |
772 | else | 1007 | btrfs_account_ro_block_groups_free_space(found); |
773 | total_used_data += found->disk_used; | 1008 | } |
1009 | |||
774 | total_used += found->disk_used; | 1010 | total_used += found->disk_used; |
775 | } | 1011 | } |
776 | rcu_read_unlock(); | 1012 | rcu_read_unlock(); |
@@ -778,9 +1014,17 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
778 | buf->f_namelen = BTRFS_NAME_LEN; | 1014 | buf->f_namelen = BTRFS_NAME_LEN; |
779 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; | 1015 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; |
780 | buf->f_bfree = buf->f_blocks - (total_used >> bits); | 1016 | buf->f_bfree = buf->f_blocks - (total_used >> bits); |
781 | buf->f_bavail = buf->f_blocks - (total_used_data >> bits); | ||
782 | buf->f_bsize = dentry->d_sb->s_blocksize; | 1017 | buf->f_bsize = dentry->d_sb->s_blocksize; |
783 | buf->f_type = BTRFS_SUPER_MAGIC; | 1018 | buf->f_type = BTRFS_SUPER_MAGIC; |
1019 | buf->f_bavail = total_free_data; | ||
1020 | ret = btrfs_calc_avail_data_space(root, &total_free_data); | ||
1021 | if (ret) { | ||
1022 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
1023 | return ret; | ||
1024 | } | ||
1025 | buf->f_bavail += total_free_data; | ||
1026 | buf->f_bavail = buf->f_bavail >> bits; | ||
1027 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
784 | 1028 | ||
785 | /* We treat it as constant endianness (it doesn't matter _which_) | 1029 | /* We treat it as constant endianness (it doesn't matter _which_) |
786 | because we want the fsid to come out the same whether mounted | 1030 | because we want the fsid to come out the same whether mounted |
@@ -897,10 +1141,14 @@ static int __init init_btrfs_fs(void) | |||
897 | if (err) | 1141 | if (err) |
898 | return err; | 1142 | return err; |
899 | 1143 | ||
900 | err = btrfs_init_cachep(); | 1144 | err = btrfs_init_compress(); |
901 | if (err) | 1145 | if (err) |
902 | goto free_sysfs; | 1146 | goto free_sysfs; |
903 | 1147 | ||
1148 | err = btrfs_init_cachep(); | ||
1149 | if (err) | ||
1150 | goto free_compress; | ||
1151 | |||
904 | err = extent_io_init(); | 1152 | err = extent_io_init(); |
905 | if (err) | 1153 | if (err) |
906 | goto free_cachep; | 1154 | goto free_cachep; |
@@ -928,6 +1176,8 @@ free_extent_io: | |||
928 | extent_io_exit(); | 1176 | extent_io_exit(); |
929 | free_cachep: | 1177 | free_cachep: |
930 | btrfs_destroy_cachep(); | 1178 | btrfs_destroy_cachep(); |
1179 | free_compress: | ||
1180 | btrfs_exit_compress(); | ||
931 | free_sysfs: | 1181 | free_sysfs: |
932 | btrfs_exit_sysfs(); | 1182 | btrfs_exit_sysfs(); |
933 | return err; | 1183 | return err; |
@@ -942,7 +1192,7 @@ static void __exit exit_btrfs_fs(void) | |||
942 | unregister_filesystem(&btrfs_fs_type); | 1192 | unregister_filesystem(&btrfs_fs_type); |
943 | btrfs_exit_sysfs(); | 1193 | btrfs_exit_sysfs(); |
944 | btrfs_cleanup_fs_uuids(); | 1194 | btrfs_cleanup_fs_uuids(); |
945 | btrfs_zlib_exit(); | 1195 | btrfs_exit_compress(); |
946 | } | 1196 | } |
947 | 1197 | ||
948 | module_init(init_btrfs_fs) | 1198 | module_init(init_btrfs_fs) |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f50e931fc217..3d73c8d93bbb 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -181,6 +181,9 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
181 | struct btrfs_trans_handle *h; | 181 | struct btrfs_trans_handle *h; |
182 | struct btrfs_transaction *cur_trans; | 182 | struct btrfs_transaction *cur_trans; |
183 | int ret; | 183 | int ret; |
184 | |||
185 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | ||
186 | return ERR_PTR(-EROFS); | ||
184 | again: | 187 | again: |
185 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | 188 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); |
186 | if (!h) | 189 | if (!h) |
@@ -910,6 +913,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
910 | u64 to_reserve = 0; | 913 | u64 to_reserve = 0; |
911 | u64 index = 0; | 914 | u64 index = 0; |
912 | u64 objectid; | 915 | u64 objectid; |
916 | u64 root_flags; | ||
913 | 917 | ||
914 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); | 918 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); |
915 | if (!new_root_item) { | 919 | if (!new_root_item) { |
@@ -967,6 +971,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
967 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); | 971 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); |
968 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | 972 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); |
969 | 973 | ||
974 | root_flags = btrfs_root_flags(new_root_item); | ||
975 | if (pending->readonly) | ||
976 | root_flags |= BTRFS_ROOT_SUBVOL_RDONLY; | ||
977 | else | ||
978 | root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY; | ||
979 | btrfs_set_root_flags(new_root_item, root_flags); | ||
980 | |||
970 | old = btrfs_lock_root_node(root); | 981 | old = btrfs_lock_root_node(root); |
971 | btrfs_cow_block(trans, root, old, NULL, 0, &old); | 982 | btrfs_cow_block(trans, root, old, NULL, 0, &old); |
972 | btrfs_set_lock_blocking(old); | 983 | btrfs_set_lock_blocking(old); |
@@ -1150,6 +1161,11 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, | |||
1150 | INIT_DELAYED_WORK(&ac->work, do_async_commit); | 1161 | INIT_DELAYED_WORK(&ac->work, do_async_commit); |
1151 | ac->root = root; | 1162 | ac->root = root; |
1152 | ac->newtrans = btrfs_join_transaction(root, 0); | 1163 | ac->newtrans = btrfs_join_transaction(root, 0); |
1164 | if (IS_ERR(ac->newtrans)) { | ||
1165 | int err = PTR_ERR(ac->newtrans); | ||
1166 | kfree(ac); | ||
1167 | return err; | ||
1168 | } | ||
1153 | 1169 | ||
1154 | /* take transaction reference */ | 1170 | /* take transaction reference */ |
1155 | mutex_lock(&root->fs_info->trans_mutex); | 1171 | mutex_lock(&root->fs_info->trans_mutex); |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index f104b57ad4ef..229a594cacd5 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -62,6 +62,7 @@ struct btrfs_pending_snapshot { | |||
62 | struct btrfs_block_rsv block_rsv; | 62 | struct btrfs_block_rsv block_rsv; |
63 | /* extra metadata reseration for relocation */ | 63 | /* extra metadata reseration for relocation */ |
64 | int error; | 64 | int error; |
65 | bool readonly; | ||
65 | struct list_head list; | 66 | struct list_head list; |
66 | }; | 67 | }; |
67 | 68 | ||
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 054744ac5719..a4bbb854dfd2 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -338,6 +338,12 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, | |||
338 | } | 338 | } |
339 | dst_copy = kmalloc(item_size, GFP_NOFS); | 339 | dst_copy = kmalloc(item_size, GFP_NOFS); |
340 | src_copy = kmalloc(item_size, GFP_NOFS); | 340 | src_copy = kmalloc(item_size, GFP_NOFS); |
341 | if (!dst_copy || !src_copy) { | ||
342 | btrfs_release_path(root, path); | ||
343 | kfree(dst_copy); | ||
344 | kfree(src_copy); | ||
345 | return -ENOMEM; | ||
346 | } | ||
341 | 347 | ||
342 | read_extent_buffer(eb, src_copy, src_ptr, item_size); | 348 | read_extent_buffer(eb, src_copy, src_ptr, item_size); |
343 | 349 | ||
@@ -665,6 +671,9 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, | |||
665 | btrfs_dir_item_key_to_cpu(leaf, di, &location); | 671 | btrfs_dir_item_key_to_cpu(leaf, di, &location); |
666 | name_len = btrfs_dir_name_len(leaf, di); | 672 | name_len = btrfs_dir_name_len(leaf, di); |
667 | name = kmalloc(name_len, GFP_NOFS); | 673 | name = kmalloc(name_len, GFP_NOFS); |
674 | if (!name) | ||
675 | return -ENOMEM; | ||
676 | |||
668 | read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); | 677 | read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); |
669 | btrfs_release_path(root, path); | 678 | btrfs_release_path(root, path); |
670 | 679 | ||
@@ -744,6 +753,9 @@ static noinline int backref_in_log(struct btrfs_root *log, | |||
744 | int match = 0; | 753 | int match = 0; |
745 | 754 | ||
746 | path = btrfs_alloc_path(); | 755 | path = btrfs_alloc_path(); |
756 | if (!path) | ||
757 | return -ENOMEM; | ||
758 | |||
747 | ret = btrfs_search_slot(NULL, log, key, path, 0, 0); | 759 | ret = btrfs_search_slot(NULL, log, key, path, 0, 0); |
748 | if (ret != 0) | 760 | if (ret != 0) |
749 | goto out; | 761 | goto out; |
@@ -967,6 +979,8 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
967 | key.offset = (u64)-1; | 979 | key.offset = (u64)-1; |
968 | 980 | ||
969 | path = btrfs_alloc_path(); | 981 | path = btrfs_alloc_path(); |
982 | if (!path) | ||
983 | return -ENOMEM; | ||
970 | 984 | ||
971 | while (1) { | 985 | while (1) { |
972 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 986 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
@@ -1178,6 +1192,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, | |||
1178 | 1192 | ||
1179 | name_len = btrfs_dir_name_len(eb, di); | 1193 | name_len = btrfs_dir_name_len(eb, di); |
1180 | name = kmalloc(name_len, GFP_NOFS); | 1194 | name = kmalloc(name_len, GFP_NOFS); |
1195 | if (!name) | ||
1196 | return -ENOMEM; | ||
1197 | |||
1181 | log_type = btrfs_dir_type(eb, di); | 1198 | log_type = btrfs_dir_type(eb, di); |
1182 | read_extent_buffer(eb, name, (unsigned long)(di + 1), | 1199 | read_extent_buffer(eb, name, (unsigned long)(di + 1), |
1183 | name_len); | 1200 | name_len); |
@@ -1692,6 +1709,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1692 | root_owner = btrfs_header_owner(parent); | 1709 | root_owner = btrfs_header_owner(parent); |
1693 | 1710 | ||
1694 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | 1711 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); |
1712 | if (!next) | ||
1713 | return -ENOMEM; | ||
1695 | 1714 | ||
1696 | if (*level == 1) { | 1715 | if (*level == 1) { |
1697 | wc->process_func(root, next, wc, ptr_gen); | 1716 | wc->process_func(root, next, wc, ptr_gen); |
@@ -2032,6 +2051,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2032 | wait_log_commit(trans, log_root_tree, | 2051 | wait_log_commit(trans, log_root_tree, |
2033 | log_root_tree->log_transid); | 2052 | log_root_tree->log_transid); |
2034 | mutex_unlock(&log_root_tree->log_mutex); | 2053 | mutex_unlock(&log_root_tree->log_mutex); |
2054 | ret = 0; | ||
2035 | goto out; | 2055 | goto out; |
2036 | } | 2056 | } |
2037 | atomic_set(&log_root_tree->log_commit[index2], 1); | 2057 | atomic_set(&log_root_tree->log_commit[index2], 1); |
@@ -2096,7 +2116,7 @@ out: | |||
2096 | smp_mb(); | 2116 | smp_mb(); |
2097 | if (waitqueue_active(&root->log_commit_wait[index1])) | 2117 | if (waitqueue_active(&root->log_commit_wait[index1])) |
2098 | wake_up(&root->log_commit_wait[index1]); | 2118 | wake_up(&root->log_commit_wait[index1]); |
2099 | return 0; | 2119 | return ret; |
2100 | } | 2120 | } |
2101 | 2121 | ||
2102 | static void free_log_tree(struct btrfs_trans_handle *trans, | 2122 | static void free_log_tree(struct btrfs_trans_handle *trans, |
@@ -2194,6 +2214,9 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2194 | 2214 | ||
2195 | log = root->log_root; | 2215 | log = root->log_root; |
2196 | path = btrfs_alloc_path(); | 2216 | path = btrfs_alloc_path(); |
2217 | if (!path) | ||
2218 | return -ENOMEM; | ||
2219 | |||
2197 | di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, | 2220 | di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, |
2198 | name, name_len, -1); | 2221 | name, name_len, -1); |
2199 | if (IS_ERR(di)) { | 2222 | if (IS_ERR(di)) { |
@@ -2594,6 +2617,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2594 | 2617 | ||
2595 | ins_data = kmalloc(nr * sizeof(struct btrfs_key) + | 2618 | ins_data = kmalloc(nr * sizeof(struct btrfs_key) + |
2596 | nr * sizeof(u32), GFP_NOFS); | 2619 | nr * sizeof(u32), GFP_NOFS); |
2620 | if (!ins_data) | ||
2621 | return -ENOMEM; | ||
2622 | |||
2597 | ins_sizes = (u32 *)ins_data; | 2623 | ins_sizes = (u32 *)ins_data; |
2598 | ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); | 2624 | ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); |
2599 | 2625 | ||
@@ -2725,7 +2751,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2725 | log = root->log_root; | 2751 | log = root->log_root; |
2726 | 2752 | ||
2727 | path = btrfs_alloc_path(); | 2753 | path = btrfs_alloc_path(); |
2754 | if (!path) | ||
2755 | return -ENOMEM; | ||
2728 | dst_path = btrfs_alloc_path(); | 2756 | dst_path = btrfs_alloc_path(); |
2757 | if (!dst_path) { | ||
2758 | btrfs_free_path(path); | ||
2759 | return -ENOMEM; | ||
2760 | } | ||
2729 | 2761 | ||
2730 | min_key.objectid = inode->i_ino; | 2762 | min_key.objectid = inode->i_ino; |
2731 | min_key.type = BTRFS_INODE_ITEM_KEY; | 2763 | min_key.type = BTRFS_INODE_ITEM_KEY; |
@@ -3080,6 +3112,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) | |||
3080 | BUG_ON(!path); | 3112 | BUG_ON(!path); |
3081 | 3113 | ||
3082 | trans = btrfs_start_transaction(fs_info->tree_root, 0); | 3114 | trans = btrfs_start_transaction(fs_info->tree_root, 0); |
3115 | BUG_ON(IS_ERR(trans)); | ||
3083 | 3116 | ||
3084 | wc.trans = trans; | 3117 | wc.trans = trans; |
3085 | wc.pin = 1; | 3118 | wc.pin = 1; |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1718e1a5c320..2636a051e4b2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/blkdev.h> | 22 | #include <linux/blkdev.h> |
23 | #include <linux/random.h> | 23 | #include <linux/random.h> |
24 | #include <linux/iocontext.h> | 24 | #include <linux/iocontext.h> |
25 | #include <linux/capability.h> | ||
25 | #include <asm/div64.h> | 26 | #include <asm/div64.h> |
26 | #include "compat.h" | 27 | #include "compat.h" |
27 | #include "ctree.h" | 28 | #include "ctree.h" |
@@ -600,8 +601,10 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
600 | set_blocksize(bdev, 4096); | 601 | set_blocksize(bdev, 4096); |
601 | 602 | ||
602 | bh = btrfs_read_dev_super(bdev); | 603 | bh = btrfs_read_dev_super(bdev); |
603 | if (!bh) | 604 | if (!bh) { |
605 | ret = -EINVAL; | ||
604 | goto error_close; | 606 | goto error_close; |
607 | } | ||
605 | 608 | ||
606 | disk_super = (struct btrfs_super_block *)bh->b_data; | 609 | disk_super = (struct btrfs_super_block *)bh->b_data; |
607 | devid = btrfs_stack_device_id(&disk_super->dev_item); | 610 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
@@ -703,7 +706,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
703 | goto error_close; | 706 | goto error_close; |
704 | bh = btrfs_read_dev_super(bdev); | 707 | bh = btrfs_read_dev_super(bdev); |
705 | if (!bh) { | 708 | if (!bh) { |
706 | ret = -EIO; | 709 | ret = -EINVAL; |
707 | goto error_close; | 710 | goto error_close; |
708 | } | 711 | } |
709 | disk_super = (struct btrfs_super_block *)bh->b_data; | 712 | disk_super = (struct btrfs_super_block *)bh->b_data; |
@@ -729,59 +732,167 @@ error: | |||
729 | return ret; | 732 | return ret; |
730 | } | 733 | } |
731 | 734 | ||
735 | /* helper to account the used device space in the range */ | ||
736 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | ||
737 | u64 end, u64 *length) | ||
738 | { | ||
739 | struct btrfs_key key; | ||
740 | struct btrfs_root *root = device->dev_root; | ||
741 | struct btrfs_dev_extent *dev_extent; | ||
742 | struct btrfs_path *path; | ||
743 | u64 extent_end; | ||
744 | int ret; | ||
745 | int slot; | ||
746 | struct extent_buffer *l; | ||
747 | |||
748 | *length = 0; | ||
749 | |||
750 | if (start >= device->total_bytes) | ||
751 | return 0; | ||
752 | |||
753 | path = btrfs_alloc_path(); | ||
754 | if (!path) | ||
755 | return -ENOMEM; | ||
756 | path->reada = 2; | ||
757 | |||
758 | key.objectid = device->devid; | ||
759 | key.offset = start; | ||
760 | key.type = BTRFS_DEV_EXTENT_KEY; | ||
761 | |||
762 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
763 | if (ret < 0) | ||
764 | goto out; | ||
765 | if (ret > 0) { | ||
766 | ret = btrfs_previous_item(root, path, key.objectid, key.type); | ||
767 | if (ret < 0) | ||
768 | goto out; | ||
769 | } | ||
770 | |||
771 | while (1) { | ||
772 | l = path->nodes[0]; | ||
773 | slot = path->slots[0]; | ||
774 | if (slot >= btrfs_header_nritems(l)) { | ||
775 | ret = btrfs_next_leaf(root, path); | ||
776 | if (ret == 0) | ||
777 | continue; | ||
778 | if (ret < 0) | ||
779 | goto out; | ||
780 | |||
781 | break; | ||
782 | } | ||
783 | btrfs_item_key_to_cpu(l, &key, slot); | ||
784 | |||
785 | if (key.objectid < device->devid) | ||
786 | goto next; | ||
787 | |||
788 | if (key.objectid > device->devid) | ||
789 | break; | ||
790 | |||
791 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) | ||
792 | goto next; | ||
793 | |||
794 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | ||
795 | extent_end = key.offset + btrfs_dev_extent_length(l, | ||
796 | dev_extent); | ||
797 | if (key.offset <= start && extent_end > end) { | ||
798 | *length = end - start + 1; | ||
799 | break; | ||
800 | } else if (key.offset <= start && extent_end > start) | ||
801 | *length += extent_end - start; | ||
802 | else if (key.offset > start && extent_end <= end) | ||
803 | *length += extent_end - key.offset; | ||
804 | else if (key.offset > start && key.offset <= end) { | ||
805 | *length += end - key.offset + 1; | ||
806 | break; | ||
807 | } else if (key.offset > end) | ||
808 | break; | ||
809 | |||
810 | next: | ||
811 | path->slots[0]++; | ||
812 | } | ||
813 | ret = 0; | ||
814 | out: | ||
815 | btrfs_free_path(path); | ||
816 | return ret; | ||
817 | } | ||
818 | |||
732 | /* | 819 | /* |
820 | * find_free_dev_extent - find free space in the specified device | ||
821 | * @trans: transaction handler | ||
822 | * @device: the device which we search the free space in | ||
823 | * @num_bytes: the size of the free space that we need | ||
824 | * @start: store the start of the free space. | ||
825 | * @len: the size of the free space. that we find, or the size of the max | ||
826 | * free space if we don't find suitable free space | ||
827 | * | ||
733 | * this uses a pretty simple search, the expectation is that it is | 828 | * this uses a pretty simple search, the expectation is that it is |
734 | * called very infrequently and that a given device has a small number | 829 | * called very infrequently and that a given device has a small number |
735 | * of extents | 830 | * of extents |
831 | * | ||
832 | * @start is used to store the start of the free space if we find. But if we | ||
833 | * don't find suitable free space, it will be used to store the start position | ||
834 | * of the max free space. | ||
835 | * | ||
836 | * @len is used to store the size of the free space that we find. | ||
837 | * But if we don't find suitable free space, it is used to store the size of | ||
838 | * the max free space. | ||
736 | */ | 839 | */ |
737 | int find_free_dev_extent(struct btrfs_trans_handle *trans, | 840 | int find_free_dev_extent(struct btrfs_trans_handle *trans, |
738 | struct btrfs_device *device, u64 num_bytes, | 841 | struct btrfs_device *device, u64 num_bytes, |
739 | u64 *start, u64 *max_avail) | 842 | u64 *start, u64 *len) |
740 | { | 843 | { |
741 | struct btrfs_key key; | 844 | struct btrfs_key key; |
742 | struct btrfs_root *root = device->dev_root; | 845 | struct btrfs_root *root = device->dev_root; |
743 | struct btrfs_dev_extent *dev_extent = NULL; | 846 | struct btrfs_dev_extent *dev_extent; |
744 | struct btrfs_path *path; | 847 | struct btrfs_path *path; |
745 | u64 hole_size = 0; | 848 | u64 hole_size; |
746 | u64 last_byte = 0; | 849 | u64 max_hole_start; |
747 | u64 search_start = 0; | 850 | u64 max_hole_size; |
851 | u64 extent_end; | ||
852 | u64 search_start; | ||
748 | u64 search_end = device->total_bytes; | 853 | u64 search_end = device->total_bytes; |
749 | int ret; | 854 | int ret; |
750 | int slot = 0; | 855 | int slot; |
751 | int start_found; | ||
752 | struct extent_buffer *l; | 856 | struct extent_buffer *l; |
753 | 857 | ||
754 | path = btrfs_alloc_path(); | ||
755 | if (!path) | ||
756 | return -ENOMEM; | ||
757 | path->reada = 2; | ||
758 | start_found = 0; | ||
759 | |||
760 | /* FIXME use last free of some kind */ | 858 | /* FIXME use last free of some kind */ |
761 | 859 | ||
762 | /* we don't want to overwrite the superblock on the drive, | 860 | /* we don't want to overwrite the superblock on the drive, |
763 | * so we make sure to start at an offset of at least 1MB | 861 | * so we make sure to start at an offset of at least 1MB |
764 | */ | 862 | */ |
765 | search_start = max((u64)1024 * 1024, search_start); | 863 | search_start = 1024 * 1024; |
766 | 864 | ||
767 | if (root->fs_info->alloc_start + num_bytes <= device->total_bytes) | 865 | if (root->fs_info->alloc_start + num_bytes <= search_end) |
768 | search_start = max(root->fs_info->alloc_start, search_start); | 866 | search_start = max(root->fs_info->alloc_start, search_start); |
769 | 867 | ||
868 | max_hole_start = search_start; | ||
869 | max_hole_size = 0; | ||
870 | |||
871 | if (search_start >= search_end) { | ||
872 | ret = -ENOSPC; | ||
873 | goto error; | ||
874 | } | ||
875 | |||
876 | path = btrfs_alloc_path(); | ||
877 | if (!path) { | ||
878 | ret = -ENOMEM; | ||
879 | goto error; | ||
880 | } | ||
881 | path->reada = 2; | ||
882 | |||
770 | key.objectid = device->devid; | 883 | key.objectid = device->devid; |
771 | key.offset = search_start; | 884 | key.offset = search_start; |
772 | key.type = BTRFS_DEV_EXTENT_KEY; | 885 | key.type = BTRFS_DEV_EXTENT_KEY; |
886 | |||
773 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); | 887 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); |
774 | if (ret < 0) | 888 | if (ret < 0) |
775 | goto error; | 889 | goto out; |
776 | if (ret > 0) { | 890 | if (ret > 0) { |
777 | ret = btrfs_previous_item(root, path, key.objectid, key.type); | 891 | ret = btrfs_previous_item(root, path, key.objectid, key.type); |
778 | if (ret < 0) | 892 | if (ret < 0) |
779 | goto error; | 893 | goto out; |
780 | if (ret > 0) | ||
781 | start_found = 1; | ||
782 | } | 894 | } |
783 | l = path->nodes[0]; | 895 | |
784 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | ||
785 | while (1) { | 896 | while (1) { |
786 | l = path->nodes[0]; | 897 | l = path->nodes[0]; |
787 | slot = path->slots[0]; | 898 | slot = path->slots[0]; |
@@ -790,24 +901,9 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, | |||
790 | if (ret == 0) | 901 | if (ret == 0) |
791 | continue; | 902 | continue; |
792 | if (ret < 0) | 903 | if (ret < 0) |
793 | goto error; | 904 | goto out; |
794 | no_more_items: | 905 | |
795 | if (!start_found) { | 906 | break; |
796 | if (search_start >= search_end) { | ||
797 | ret = -ENOSPC; | ||
798 | goto error; | ||
799 | } | ||
800 | *start = search_start; | ||
801 | start_found = 1; | ||
802 | goto check_pending; | ||
803 | } | ||
804 | *start = last_byte > search_start ? | ||
805 | last_byte : search_start; | ||
806 | if (search_end <= *start) { | ||
807 | ret = -ENOSPC; | ||
808 | goto error; | ||
809 | } | ||
810 | goto check_pending; | ||
811 | } | 907 | } |
812 | btrfs_item_key_to_cpu(l, &key, slot); | 908 | btrfs_item_key_to_cpu(l, &key, slot); |
813 | 909 | ||
@@ -815,48 +911,62 @@ no_more_items: | |||
815 | goto next; | 911 | goto next; |
816 | 912 | ||
817 | if (key.objectid > device->devid) | 913 | if (key.objectid > device->devid) |
818 | goto no_more_items; | 914 | break; |
819 | 915 | ||
820 | if (key.offset >= search_start && key.offset > last_byte && | 916 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) |
821 | start_found) { | 917 | goto next; |
822 | if (last_byte < search_start) | ||
823 | last_byte = search_start; | ||
824 | hole_size = key.offset - last_byte; | ||
825 | 918 | ||
826 | if (hole_size > *max_avail) | 919 | if (key.offset > search_start) { |
827 | *max_avail = hole_size; | 920 | hole_size = key.offset - search_start; |
828 | 921 | ||
829 | if (key.offset > last_byte && | 922 | if (hole_size > max_hole_size) { |
830 | hole_size >= num_bytes) { | 923 | max_hole_start = search_start; |
831 | *start = last_byte; | 924 | max_hole_size = hole_size; |
832 | goto check_pending; | 925 | } |
926 | |||
927 | /* | ||
928 | * If this free space is greater than which we need, | ||
929 | * it must be the max free space that we have found | ||
930 | * until now, so max_hole_start must point to the start | ||
931 | * of this free space and the length of this free space | ||
932 | * is stored in max_hole_size. Thus, we return | ||
933 | * max_hole_start and max_hole_size and go back to the | ||
934 | * caller. | ||
935 | */ | ||
936 | if (hole_size >= num_bytes) { | ||
937 | ret = 0; | ||
938 | goto out; | ||
833 | } | 939 | } |
834 | } | 940 | } |
835 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) | ||
836 | goto next; | ||
837 | 941 | ||
838 | start_found = 1; | ||
839 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | 942 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); |
840 | last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent); | 943 | extent_end = key.offset + btrfs_dev_extent_length(l, |
944 | dev_extent); | ||
945 | if (extent_end > search_start) | ||
946 | search_start = extent_end; | ||
841 | next: | 947 | next: |
842 | path->slots[0]++; | 948 | path->slots[0]++; |
843 | cond_resched(); | 949 | cond_resched(); |
844 | } | 950 | } |
845 | check_pending: | ||
846 | /* we have to make sure we didn't find an extent that has already | ||
847 | * been allocated by the map tree or the original allocation | ||
848 | */ | ||
849 | BUG_ON(*start < search_start); | ||
850 | 951 | ||
851 | if (*start + num_bytes > search_end) { | 952 | hole_size = search_end- search_start; |
852 | ret = -ENOSPC; | 953 | if (hole_size > max_hole_size) { |
853 | goto error; | 954 | max_hole_start = search_start; |
955 | max_hole_size = hole_size; | ||
854 | } | 956 | } |
855 | /* check for pending inserts here */ | ||
856 | ret = 0; | ||
857 | 957 | ||
858 | error: | 958 | /* See above. */ |
959 | if (hole_size < num_bytes) | ||
960 | ret = -ENOSPC; | ||
961 | else | ||
962 | ret = 0; | ||
963 | |||
964 | out: | ||
859 | btrfs_free_path(path); | 965 | btrfs_free_path(path); |
966 | error: | ||
967 | *start = max_hole_start; | ||
968 | if (len) | ||
969 | *len = max_hole_size; | ||
860 | return ret; | 970 | return ret; |
861 | } | 971 | } |
862 | 972 | ||
@@ -1103,6 +1213,10 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, | |||
1103 | return -ENOMEM; | 1213 | return -ENOMEM; |
1104 | 1214 | ||
1105 | trans = btrfs_start_transaction(root, 0); | 1215 | trans = btrfs_start_transaction(root, 0); |
1216 | if (IS_ERR(trans)) { | ||
1217 | btrfs_free_path(path); | ||
1218 | return PTR_ERR(trans); | ||
1219 | } | ||
1106 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; | 1220 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; |
1107 | key.type = BTRFS_DEV_ITEM_KEY; | 1221 | key.type = BTRFS_DEV_ITEM_KEY; |
1108 | key.offset = device->devid; | 1222 | key.offset = device->devid; |
@@ -1196,7 +1310,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1196 | set_blocksize(bdev, 4096); | 1310 | set_blocksize(bdev, 4096); |
1197 | bh = btrfs_read_dev_super(bdev); | 1311 | bh = btrfs_read_dev_super(bdev); |
1198 | if (!bh) { | 1312 | if (!bh) { |
1199 | ret = -EIO; | 1313 | ret = -EINVAL; |
1200 | goto error_close; | 1314 | goto error_close; |
1201 | } | 1315 | } |
1202 | disk_super = (struct btrfs_super_block *)bh->b_data; | 1316 | disk_super = (struct btrfs_super_block *)bh->b_data; |
@@ -1496,6 +1610,12 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1496 | } | 1610 | } |
1497 | 1611 | ||
1498 | trans = btrfs_start_transaction(root, 0); | 1612 | trans = btrfs_start_transaction(root, 0); |
1613 | if (IS_ERR(trans)) { | ||
1614 | kfree(device); | ||
1615 | ret = PTR_ERR(trans); | ||
1616 | goto error; | ||
1617 | } | ||
1618 | |||
1499 | lock_chunks(root); | 1619 | lock_chunks(root); |
1500 | 1620 | ||
1501 | device->writeable = 1; | 1621 | device->writeable = 1; |
@@ -1763,7 +1883,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
1763 | return ret; | 1883 | return ret; |
1764 | 1884 | ||
1765 | trans = btrfs_start_transaction(root, 0); | 1885 | trans = btrfs_start_transaction(root, 0); |
1766 | BUG_ON(!trans); | 1886 | BUG_ON(IS_ERR(trans)); |
1767 | 1887 | ||
1768 | lock_chunks(root); | 1888 | lock_chunks(root); |
1769 | 1889 | ||
@@ -1916,6 +2036,9 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1916 | if (dev_root->fs_info->sb->s_flags & MS_RDONLY) | 2036 | if (dev_root->fs_info->sb->s_flags & MS_RDONLY) |
1917 | return -EROFS; | 2037 | return -EROFS; |
1918 | 2038 | ||
2039 | if (!capable(CAP_SYS_ADMIN)) | ||
2040 | return -EPERM; | ||
2041 | |||
1919 | mutex_lock(&dev_root->fs_info->volume_mutex); | 2042 | mutex_lock(&dev_root->fs_info->volume_mutex); |
1920 | dev_root = dev_root->fs_info->dev_root; | 2043 | dev_root = dev_root->fs_info->dev_root; |
1921 | 2044 | ||
@@ -1934,7 +2057,7 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1934 | BUG_ON(ret); | 2057 | BUG_ON(ret); |
1935 | 2058 | ||
1936 | trans = btrfs_start_transaction(dev_root, 0); | 2059 | trans = btrfs_start_transaction(dev_root, 0); |
1937 | BUG_ON(!trans); | 2060 | BUG_ON(IS_ERR(trans)); |
1938 | 2061 | ||
1939 | ret = btrfs_grow_device(trans, device, old_size); | 2062 | ret = btrfs_grow_device(trans, device, old_size); |
1940 | BUG_ON(ret); | 2063 | BUG_ON(ret); |
@@ -2100,6 +2223,11 @@ again: | |||
2100 | 2223 | ||
2101 | /* Shrinking succeeded, else we would be at "done". */ | 2224 | /* Shrinking succeeded, else we would be at "done". */ |
2102 | trans = btrfs_start_transaction(root, 0); | 2225 | trans = btrfs_start_transaction(root, 0); |
2226 | if (IS_ERR(trans)) { | ||
2227 | ret = PTR_ERR(trans); | ||
2228 | goto done; | ||
2229 | } | ||
2230 | |||
2103 | lock_chunks(root); | 2231 | lock_chunks(root); |
2104 | 2232 | ||
2105 | device->disk_total_bytes = new_size; | 2233 | device->disk_total_bytes = new_size; |
@@ -2154,66 +2282,67 @@ static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size, | |||
2154 | return calc_size * num_stripes; | 2282 | return calc_size * num_stripes; |
2155 | } | 2283 | } |
2156 | 2284 | ||
2157 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | 2285 | /* Used to sort the devices by max_avail(descending sort) */ |
2158 | struct btrfs_root *extent_root, | 2286 | int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2) |
2159 | struct map_lookup **map_ret, | ||
2160 | u64 *num_bytes, u64 *stripe_size, | ||
2161 | u64 start, u64 type) | ||
2162 | { | 2287 | { |
2163 | struct btrfs_fs_info *info = extent_root->fs_info; | 2288 | if (((struct btrfs_device_info *)dev_info1)->max_avail > |
2164 | struct btrfs_device *device = NULL; | 2289 | ((struct btrfs_device_info *)dev_info2)->max_avail) |
2165 | struct btrfs_fs_devices *fs_devices = info->fs_devices; | 2290 | return -1; |
2166 | struct list_head *cur; | 2291 | else if (((struct btrfs_device_info *)dev_info1)->max_avail < |
2167 | struct map_lookup *map = NULL; | 2292 | ((struct btrfs_device_info *)dev_info2)->max_avail) |
2168 | struct extent_map_tree *em_tree; | 2293 | return 1; |
2169 | struct extent_map *em; | 2294 | else |
2170 | struct list_head private_devs; | 2295 | return 0; |
2171 | int min_stripe_size = 1 * 1024 * 1024; | 2296 | } |
2172 | u64 calc_size = 1024 * 1024 * 1024; | ||
2173 | u64 max_chunk_size = calc_size; | ||
2174 | u64 min_free; | ||
2175 | u64 avail; | ||
2176 | u64 max_avail = 0; | ||
2177 | u64 dev_offset; | ||
2178 | int num_stripes = 1; | ||
2179 | int min_stripes = 1; | ||
2180 | int sub_stripes = 0; | ||
2181 | int looped = 0; | ||
2182 | int ret; | ||
2183 | int index; | ||
2184 | int stripe_len = 64 * 1024; | ||
2185 | 2297 | ||
2186 | if ((type & BTRFS_BLOCK_GROUP_RAID1) && | 2298 | static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type, |
2187 | (type & BTRFS_BLOCK_GROUP_DUP)) { | 2299 | int *num_stripes, int *min_stripes, |
2188 | WARN_ON(1); | 2300 | int *sub_stripes) |
2189 | type &= ~BTRFS_BLOCK_GROUP_DUP; | 2301 | { |
2190 | } | 2302 | *num_stripes = 1; |
2191 | if (list_empty(&fs_devices->alloc_list)) | 2303 | *min_stripes = 1; |
2192 | return -ENOSPC; | 2304 | *sub_stripes = 0; |
2193 | 2305 | ||
2194 | if (type & (BTRFS_BLOCK_GROUP_RAID0)) { | 2306 | if (type & (BTRFS_BLOCK_GROUP_RAID0)) { |
2195 | num_stripes = fs_devices->rw_devices; | 2307 | *num_stripes = fs_devices->rw_devices; |
2196 | min_stripes = 2; | 2308 | *min_stripes = 2; |
2197 | } | 2309 | } |
2198 | if (type & (BTRFS_BLOCK_GROUP_DUP)) { | 2310 | if (type & (BTRFS_BLOCK_GROUP_DUP)) { |
2199 | num_stripes = 2; | 2311 | *num_stripes = 2; |
2200 | min_stripes = 2; | 2312 | *min_stripes = 2; |
2201 | } | 2313 | } |
2202 | if (type & (BTRFS_BLOCK_GROUP_RAID1)) { | 2314 | if (type & (BTRFS_BLOCK_GROUP_RAID1)) { |
2203 | if (fs_devices->rw_devices < 2) | 2315 | if (fs_devices->rw_devices < 2) |
2204 | return -ENOSPC; | 2316 | return -ENOSPC; |
2205 | num_stripes = 2; | 2317 | *num_stripes = 2; |
2206 | min_stripes = 2; | 2318 | *min_stripes = 2; |
2207 | } | 2319 | } |
2208 | if (type & (BTRFS_BLOCK_GROUP_RAID10)) { | 2320 | if (type & (BTRFS_BLOCK_GROUP_RAID10)) { |
2209 | num_stripes = fs_devices->rw_devices; | 2321 | *num_stripes = fs_devices->rw_devices; |
2210 | if (num_stripes < 4) | 2322 | if (*num_stripes < 4) |
2211 | return -ENOSPC; | 2323 | return -ENOSPC; |
2212 | num_stripes &= ~(u32)1; | 2324 | *num_stripes &= ~(u32)1; |
2213 | sub_stripes = 2; | 2325 | *sub_stripes = 2; |
2214 | min_stripes = 4; | 2326 | *min_stripes = 4; |
2215 | } | 2327 | } |
2216 | 2328 | ||
2329 | return 0; | ||
2330 | } | ||
2331 | |||
2332 | static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices, | ||
2333 | u64 proposed_size, u64 type, | ||
2334 | int num_stripes, int small_stripe) | ||
2335 | { | ||
2336 | int min_stripe_size = 1 * 1024 * 1024; | ||
2337 | u64 calc_size = proposed_size; | ||
2338 | u64 max_chunk_size = calc_size; | ||
2339 | int ncopies = 1; | ||
2340 | |||
2341 | if (type & (BTRFS_BLOCK_GROUP_RAID1 | | ||
2342 | BTRFS_BLOCK_GROUP_DUP | | ||
2343 | BTRFS_BLOCK_GROUP_RAID10)) | ||
2344 | ncopies = 2; | ||
2345 | |||
2217 | if (type & BTRFS_BLOCK_GROUP_DATA) { | 2346 | if (type & BTRFS_BLOCK_GROUP_DATA) { |
2218 | max_chunk_size = 10 * calc_size; | 2347 | max_chunk_size = 10 * calc_size; |
2219 | min_stripe_size = 64 * 1024 * 1024; | 2348 | min_stripe_size = 64 * 1024 * 1024; |
@@ -2230,51 +2359,209 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
2230 | max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), | 2359 | max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), |
2231 | max_chunk_size); | 2360 | max_chunk_size); |
2232 | 2361 | ||
2233 | again: | 2362 | if (calc_size * num_stripes > max_chunk_size * ncopies) { |
2234 | max_avail = 0; | 2363 | calc_size = max_chunk_size * ncopies; |
2235 | if (!map || map->num_stripes != num_stripes) { | ||
2236 | kfree(map); | ||
2237 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | ||
2238 | if (!map) | ||
2239 | return -ENOMEM; | ||
2240 | map->num_stripes = num_stripes; | ||
2241 | } | ||
2242 | |||
2243 | if (calc_size * num_stripes > max_chunk_size) { | ||
2244 | calc_size = max_chunk_size; | ||
2245 | do_div(calc_size, num_stripes); | 2364 | do_div(calc_size, num_stripes); |
2246 | do_div(calc_size, stripe_len); | 2365 | do_div(calc_size, BTRFS_STRIPE_LEN); |
2247 | calc_size *= stripe_len; | 2366 | calc_size *= BTRFS_STRIPE_LEN; |
2248 | } | 2367 | } |
2249 | 2368 | ||
2250 | /* we don't want tiny stripes */ | 2369 | /* we don't want tiny stripes */ |
2251 | if (!looped) | 2370 | if (!small_stripe) |
2252 | calc_size = max_t(u64, min_stripe_size, calc_size); | 2371 | calc_size = max_t(u64, min_stripe_size, calc_size); |
2253 | 2372 | ||
2254 | /* | 2373 | /* |
2255 | * we're about to do_div by the stripe_len so lets make sure | 2374 | * we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure |
2256 | * we end up with something bigger than a stripe | 2375 | * we end up with something bigger than a stripe |
2257 | */ | 2376 | */ |
2258 | calc_size = max_t(u64, calc_size, stripe_len * 4); | 2377 | calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN); |
2378 | |||
2379 | do_div(calc_size, BTRFS_STRIPE_LEN); | ||
2380 | calc_size *= BTRFS_STRIPE_LEN; | ||
2381 | |||
2382 | return calc_size; | ||
2383 | } | ||
2384 | |||
2385 | static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map, | ||
2386 | int num_stripes) | ||
2387 | { | ||
2388 | struct map_lookup *new; | ||
2389 | size_t len = map_lookup_size(num_stripes); | ||
2390 | |||
2391 | BUG_ON(map->num_stripes < num_stripes); | ||
2392 | |||
2393 | if (map->num_stripes == num_stripes) | ||
2394 | return map; | ||
2395 | |||
2396 | new = kmalloc(len, GFP_NOFS); | ||
2397 | if (!new) { | ||
2398 | /* just change map->num_stripes */ | ||
2399 | map->num_stripes = num_stripes; | ||
2400 | return map; | ||
2401 | } | ||
2402 | |||
2403 | memcpy(new, map, len); | ||
2404 | new->num_stripes = num_stripes; | ||
2405 | kfree(map); | ||
2406 | return new; | ||
2407 | } | ||
2408 | |||
2409 | /* | ||
2410 | * helper to allocate device space from btrfs_device_info, in which we stored | ||
2411 | * max free space information of every device. It is used when we can not | ||
2412 | * allocate chunks by default size. | ||
2413 | * | ||
2414 | * By this helper, we can allocate a new chunk as larger as possible. | ||
2415 | */ | ||
2416 | static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans, | ||
2417 | struct btrfs_fs_devices *fs_devices, | ||
2418 | struct btrfs_device_info *devices, | ||
2419 | int nr_device, u64 type, | ||
2420 | struct map_lookup **map_lookup, | ||
2421 | int min_stripes, u64 *stripe_size) | ||
2422 | { | ||
2423 | int i, index, sort_again = 0; | ||
2424 | int min_devices = min_stripes; | ||
2425 | u64 max_avail, min_free; | ||
2426 | struct map_lookup *map = *map_lookup; | ||
2427 | int ret; | ||
2428 | |||
2429 | if (nr_device < min_stripes) | ||
2430 | return -ENOSPC; | ||
2431 | |||
2432 | btrfs_descending_sort_devices(devices, nr_device); | ||
2433 | |||
2434 | max_avail = devices[0].max_avail; | ||
2435 | if (!max_avail) | ||
2436 | return -ENOSPC; | ||
2437 | |||
2438 | for (i = 0; i < nr_device; i++) { | ||
2439 | /* | ||
2440 | * if dev_offset = 0, it means the free space of this device | ||
2441 | * is less than what we need, and we didn't search max avail | ||
2442 | * extent on this device, so do it now. | ||
2443 | */ | ||
2444 | if (!devices[i].dev_offset) { | ||
2445 | ret = find_free_dev_extent(trans, devices[i].dev, | ||
2446 | max_avail, | ||
2447 | &devices[i].dev_offset, | ||
2448 | &devices[i].max_avail); | ||
2449 | if (ret != 0 && ret != -ENOSPC) | ||
2450 | return ret; | ||
2451 | sort_again = 1; | ||
2452 | } | ||
2453 | } | ||
2454 | |||
2455 | /* we update the max avail free extent of each devices, sort again */ | ||
2456 | if (sort_again) | ||
2457 | btrfs_descending_sort_devices(devices, nr_device); | ||
2458 | |||
2459 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
2460 | min_devices = 1; | ||
2461 | |||
2462 | if (!devices[min_devices - 1].max_avail) | ||
2463 | return -ENOSPC; | ||
2464 | |||
2465 | max_avail = devices[min_devices - 1].max_avail; | ||
2466 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
2467 | do_div(max_avail, 2); | ||
2468 | |||
2469 | max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type, | ||
2470 | min_stripes, 1); | ||
2471 | if (type & BTRFS_BLOCK_GROUP_DUP) | ||
2472 | min_free = max_avail * 2; | ||
2473 | else | ||
2474 | min_free = max_avail; | ||
2475 | |||
2476 | if (min_free > devices[min_devices - 1].max_avail) | ||
2477 | return -ENOSPC; | ||
2478 | |||
2479 | map = __shrink_map_lookup_stripes(map, min_stripes); | ||
2480 | *stripe_size = max_avail; | ||
2481 | |||
2482 | index = 0; | ||
2483 | for (i = 0; i < min_stripes; i++) { | ||
2484 | map->stripes[i].dev = devices[index].dev; | ||
2485 | map->stripes[i].physical = devices[index].dev_offset; | ||
2486 | if (type & BTRFS_BLOCK_GROUP_DUP) { | ||
2487 | i++; | ||
2488 | map->stripes[i].dev = devices[index].dev; | ||
2489 | map->stripes[i].physical = devices[index].dev_offset + | ||
2490 | max_avail; | ||
2491 | } | ||
2492 | index++; | ||
2493 | } | ||
2494 | *map_lookup = map; | ||
2259 | 2495 | ||
2260 | do_div(calc_size, stripe_len); | 2496 | return 0; |
2261 | calc_size *= stripe_len; | 2497 | } |
2498 | |||
2499 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | ||
2500 | struct btrfs_root *extent_root, | ||
2501 | struct map_lookup **map_ret, | ||
2502 | u64 *num_bytes, u64 *stripe_size, | ||
2503 | u64 start, u64 type) | ||
2504 | { | ||
2505 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
2506 | struct btrfs_device *device = NULL; | ||
2507 | struct btrfs_fs_devices *fs_devices = info->fs_devices; | ||
2508 | struct list_head *cur; | ||
2509 | struct map_lookup *map; | ||
2510 | struct extent_map_tree *em_tree; | ||
2511 | struct extent_map *em; | ||
2512 | struct btrfs_device_info *devices_info; | ||
2513 | struct list_head private_devs; | ||
2514 | u64 calc_size = 1024 * 1024 * 1024; | ||
2515 | u64 min_free; | ||
2516 | u64 avail; | ||
2517 | u64 dev_offset; | ||
2518 | int num_stripes; | ||
2519 | int min_stripes; | ||
2520 | int sub_stripes; | ||
2521 | int min_devices; /* the min number of devices we need */ | ||
2522 | int i; | ||
2523 | int ret; | ||
2524 | int index; | ||
2525 | |||
2526 | if ((type & BTRFS_BLOCK_GROUP_RAID1) && | ||
2527 | (type & BTRFS_BLOCK_GROUP_DUP)) { | ||
2528 | WARN_ON(1); | ||
2529 | type &= ~BTRFS_BLOCK_GROUP_DUP; | ||
2530 | } | ||
2531 | if (list_empty(&fs_devices->alloc_list)) | ||
2532 | return -ENOSPC; | ||
2533 | |||
2534 | ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes, | ||
2535 | &min_stripes, &sub_stripes); | ||
2536 | if (ret) | ||
2537 | return ret; | ||
2538 | |||
2539 | devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, | ||
2540 | GFP_NOFS); | ||
2541 | if (!devices_info) | ||
2542 | return -ENOMEM; | ||
2543 | |||
2544 | map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); | ||
2545 | if (!map) { | ||
2546 | ret = -ENOMEM; | ||
2547 | goto error; | ||
2548 | } | ||
2549 | map->num_stripes = num_stripes; | ||
2262 | 2550 | ||
2263 | cur = fs_devices->alloc_list.next; | 2551 | cur = fs_devices->alloc_list.next; |
2264 | index = 0; | 2552 | index = 0; |
2553 | i = 0; | ||
2265 | 2554 | ||
2266 | if (type & BTRFS_BLOCK_GROUP_DUP) | 2555 | calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type, |
2556 | num_stripes, 0); | ||
2557 | |||
2558 | if (type & BTRFS_BLOCK_GROUP_DUP) { | ||
2267 | min_free = calc_size * 2; | 2559 | min_free = calc_size * 2; |
2268 | else | 2560 | min_devices = 1; |
2561 | } else { | ||
2269 | min_free = calc_size; | 2562 | min_free = calc_size; |
2270 | 2563 | min_devices = min_stripes; | |
2271 | /* | 2564 | } |
2272 | * we add 1MB because we never use the first 1MB of the device, unless | ||
2273 | * we've looped, then we are likely allocating the maximum amount of | ||
2274 | * space left already | ||
2275 | */ | ||
2276 | if (!looped) | ||
2277 | min_free += 1024 * 1024; | ||
2278 | 2565 | ||
2279 | INIT_LIST_HEAD(&private_devs); | 2566 | INIT_LIST_HEAD(&private_devs); |
2280 | while (index < num_stripes) { | 2567 | while (index < num_stripes) { |
@@ -2287,27 +2574,39 @@ again: | |||
2287 | cur = cur->next; | 2574 | cur = cur->next; |
2288 | 2575 | ||
2289 | if (device->in_fs_metadata && avail >= min_free) { | 2576 | if (device->in_fs_metadata && avail >= min_free) { |
2290 | ret = find_free_dev_extent(trans, device, | 2577 | ret = find_free_dev_extent(trans, device, min_free, |
2291 | min_free, &dev_offset, | 2578 | &devices_info[i].dev_offset, |
2292 | &max_avail); | 2579 | &devices_info[i].max_avail); |
2293 | if (ret == 0) { | 2580 | if (ret == 0) { |
2294 | list_move_tail(&device->dev_alloc_list, | 2581 | list_move_tail(&device->dev_alloc_list, |
2295 | &private_devs); | 2582 | &private_devs); |
2296 | map->stripes[index].dev = device; | 2583 | map->stripes[index].dev = device; |
2297 | map->stripes[index].physical = dev_offset; | 2584 | map->stripes[index].physical = |
2585 | devices_info[i].dev_offset; | ||
2298 | index++; | 2586 | index++; |
2299 | if (type & BTRFS_BLOCK_GROUP_DUP) { | 2587 | if (type & BTRFS_BLOCK_GROUP_DUP) { |
2300 | map->stripes[index].dev = device; | 2588 | map->stripes[index].dev = device; |
2301 | map->stripes[index].physical = | 2589 | map->stripes[index].physical = |
2302 | dev_offset + calc_size; | 2590 | devices_info[i].dev_offset + |
2591 | calc_size; | ||
2303 | index++; | 2592 | index++; |
2304 | } | 2593 | } |
2305 | } | 2594 | } else if (ret != -ENOSPC) |
2306 | } else if (device->in_fs_metadata && avail > max_avail) | 2595 | goto error; |
2307 | max_avail = avail; | 2596 | |
2597 | devices_info[i].dev = device; | ||
2598 | i++; | ||
2599 | } else if (device->in_fs_metadata && | ||
2600 | avail >= BTRFS_STRIPE_LEN) { | ||
2601 | devices_info[i].dev = device; | ||
2602 | devices_info[i].max_avail = avail; | ||
2603 | i++; | ||
2604 | } | ||
2605 | |||
2308 | if (cur == &fs_devices->alloc_list) | 2606 | if (cur == &fs_devices->alloc_list) |
2309 | break; | 2607 | break; |
2310 | } | 2608 | } |
2609 | |||
2311 | list_splice(&private_devs, &fs_devices->alloc_list); | 2610 | list_splice(&private_devs, &fs_devices->alloc_list); |
2312 | if (index < num_stripes) { | 2611 | if (index < num_stripes) { |
2313 | if (index >= min_stripes) { | 2612 | if (index >= min_stripes) { |
@@ -2316,34 +2615,36 @@ again: | |||
2316 | num_stripes /= sub_stripes; | 2615 | num_stripes /= sub_stripes; |
2317 | num_stripes *= sub_stripes; | 2616 | num_stripes *= sub_stripes; |
2318 | } | 2617 | } |
2319 | looped = 1; | 2618 | |
2320 | goto again; | 2619 | map = __shrink_map_lookup_stripes(map, num_stripes); |
2321 | } | 2620 | } else if (i >= min_devices) { |
2322 | if (!looped && max_avail > 0) { | 2621 | ret = __btrfs_alloc_tiny_space(trans, fs_devices, |
2323 | looped = 1; | 2622 | devices_info, i, type, |
2324 | calc_size = max_avail; | 2623 | &map, min_stripes, |
2325 | goto again; | 2624 | &calc_size); |
2625 | if (ret) | ||
2626 | goto error; | ||
2627 | } else { | ||
2628 | ret = -ENOSPC; | ||
2629 | goto error; | ||
2326 | } | 2630 | } |
2327 | kfree(map); | ||
2328 | return -ENOSPC; | ||
2329 | } | 2631 | } |
2330 | map->sector_size = extent_root->sectorsize; | 2632 | map->sector_size = extent_root->sectorsize; |
2331 | map->stripe_len = stripe_len; | 2633 | map->stripe_len = BTRFS_STRIPE_LEN; |
2332 | map->io_align = stripe_len; | 2634 | map->io_align = BTRFS_STRIPE_LEN; |
2333 | map->io_width = stripe_len; | 2635 | map->io_width = BTRFS_STRIPE_LEN; |
2334 | map->type = type; | 2636 | map->type = type; |
2335 | map->num_stripes = num_stripes; | ||
2336 | map->sub_stripes = sub_stripes; | 2637 | map->sub_stripes = sub_stripes; |
2337 | 2638 | ||
2338 | *map_ret = map; | 2639 | *map_ret = map; |
2339 | *stripe_size = calc_size; | 2640 | *stripe_size = calc_size; |
2340 | *num_bytes = chunk_bytes_by_type(type, calc_size, | 2641 | *num_bytes = chunk_bytes_by_type(type, calc_size, |
2341 | num_stripes, sub_stripes); | 2642 | map->num_stripes, sub_stripes); |
2342 | 2643 | ||
2343 | em = alloc_extent_map(GFP_NOFS); | 2644 | em = alloc_extent_map(GFP_NOFS); |
2344 | if (!em) { | 2645 | if (!em) { |
2345 | kfree(map); | 2646 | ret = -ENOMEM; |
2346 | return -ENOMEM; | 2647 | goto error; |
2347 | } | 2648 | } |
2348 | em->bdev = (struct block_device *)map; | 2649 | em->bdev = (struct block_device *)map; |
2349 | em->start = start; | 2650 | em->start = start; |
@@ -2376,7 +2677,13 @@ again: | |||
2376 | index++; | 2677 | index++; |
2377 | } | 2678 | } |
2378 | 2679 | ||
2680 | kfree(devices_info); | ||
2379 | return 0; | 2681 | return 0; |
2682 | |||
2683 | error: | ||
2684 | kfree(map); | ||
2685 | kfree(devices_info); | ||
2686 | return ret; | ||
2380 | } | 2687 | } |
2381 | 2688 | ||
2382 | static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, | 2689 | static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 1be781079450..7fb59d45fe8c 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -20,8 +20,11 @@ | |||
20 | #define __BTRFS_VOLUMES_ | 20 | #define __BTRFS_VOLUMES_ |
21 | 21 | ||
22 | #include <linux/bio.h> | 22 | #include <linux/bio.h> |
23 | #include <linux/sort.h> | ||
23 | #include "async-thread.h" | 24 | #include "async-thread.h" |
24 | 25 | ||
26 | #define BTRFS_STRIPE_LEN (64 * 1024) | ||
27 | |||
25 | struct buffer_head; | 28 | struct buffer_head; |
26 | struct btrfs_pending_bios { | 29 | struct btrfs_pending_bios { |
27 | struct bio *head; | 30 | struct bio *head; |
@@ -136,6 +139,30 @@ struct btrfs_multi_bio { | |||
136 | struct btrfs_bio_stripe stripes[]; | 139 | struct btrfs_bio_stripe stripes[]; |
137 | }; | 140 | }; |
138 | 141 | ||
142 | struct btrfs_device_info { | ||
143 | struct btrfs_device *dev; | ||
144 | u64 dev_offset; | ||
145 | u64 max_avail; | ||
146 | }; | ||
147 | |||
148 | /* Used to sort the devices by max_avail(descending sort) */ | ||
149 | int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); | ||
150 | |||
151 | /* | ||
152 | * sort the devices by max_avail, in which max free extent size of each device | ||
153 | * is stored.(Descending Sort) | ||
154 | */ | ||
155 | static inline void btrfs_descending_sort_devices( | ||
156 | struct btrfs_device_info *devices, | ||
157 | size_t nr_devices) | ||
158 | { | ||
159 | sort(devices, nr_devices, sizeof(struct btrfs_device_info), | ||
160 | btrfs_cmp_device_free_bytes, NULL); | ||
161 | } | ||
162 | |||
163 | int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | ||
164 | u64 end, u64 *length); | ||
165 | |||
139 | #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ | 166 | #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ |
140 | (sizeof(struct btrfs_bio_stripe) * (n))) | 167 | (sizeof(struct btrfs_bio_stripe) * (n))) |
141 | 168 | ||
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 698fdd2c739c..a5776531dc2b 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -316,6 +316,15 @@ ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, | |||
316 | int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, | 316 | int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, |
317 | size_t size, int flags) | 317 | size_t size, int flags) |
318 | { | 318 | { |
319 | struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; | ||
320 | |||
321 | /* | ||
322 | * The permission on security.* and system.* is not checked | ||
323 | * in permission(). | ||
324 | */ | ||
325 | if (btrfs_root_readonly(root)) | ||
326 | return -EROFS; | ||
327 | |||
319 | /* | 328 | /* |
320 | * If this is a request for a synthetic attribute in the system.* | 329 | * If this is a request for a synthetic attribute in the system.* |
321 | * namespace use the generic infrastructure to resolve a handler | 330 | * namespace use the generic infrastructure to resolve a handler |
@@ -336,6 +345,15 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, | |||
336 | 345 | ||
337 | int btrfs_removexattr(struct dentry *dentry, const char *name) | 346 | int btrfs_removexattr(struct dentry *dentry, const char *name) |
338 | { | 347 | { |
348 | struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; | ||
349 | |||
350 | /* | ||
351 | * The permission on security.* and system.* is not checked | ||
352 | * in permission(). | ||
353 | */ | ||
354 | if (btrfs_root_readonly(root)) | ||
355 | return -EROFS; | ||
356 | |||
339 | /* | 357 | /* |
340 | * If this is a request for a synthetic attribute in the system.* | 358 | * If this is a request for a synthetic attribute in the system.* |
341 | * namespace use the generic infrastructure to resolve a handler | 359 | * namespace use the generic infrastructure to resolve a handler |
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index b9cd5445f71c..f5ec2d44150d 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c | |||
@@ -32,15 +32,6 @@ | |||
32 | #include <linux/bio.h> | 32 | #include <linux/bio.h> |
33 | #include "compression.h" | 33 | #include "compression.h" |
34 | 34 | ||
35 | /* Plan: call deflate() with avail_in == *sourcelen, | ||
36 | avail_out = *dstlen - 12 and flush == Z_FINISH. | ||
37 | If it doesn't manage to finish, call it again with | ||
38 | avail_in == 0 and avail_out set to the remaining 12 | ||
39 | bytes for it to clean up. | ||
40 | Q: Is 12 bytes sufficient? | ||
41 | */ | ||
42 | #define STREAM_END_SPACE 12 | ||
43 | |||
44 | struct workspace { | 35 | struct workspace { |
45 | z_stream inf_strm; | 36 | z_stream inf_strm; |
46 | z_stream def_strm; | 37 | z_stream def_strm; |
@@ -48,152 +39,51 @@ struct workspace { | |||
48 | struct list_head list; | 39 | struct list_head list; |
49 | }; | 40 | }; |
50 | 41 | ||
51 | static LIST_HEAD(idle_workspace); | 42 | static void zlib_free_workspace(struct list_head *ws) |
52 | static DEFINE_SPINLOCK(workspace_lock); | 43 | { |
53 | static unsigned long num_workspace; | 44 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
54 | static atomic_t alloc_workspace = ATOMIC_INIT(0); | ||
55 | static DECLARE_WAIT_QUEUE_HEAD(workspace_wait); | ||
56 | 45 | ||
57 | /* | 46 | vfree(workspace->def_strm.workspace); |
58 | * this finds an available zlib workspace or allocates a new one | 47 | vfree(workspace->inf_strm.workspace); |
59 | * NULL or an ERR_PTR is returned if things go bad. | 48 | kfree(workspace->buf); |
60 | */ | 49 | kfree(workspace); |
61 | static struct workspace *find_zlib_workspace(void) | 50 | } |
51 | |||
52 | static struct list_head *zlib_alloc_workspace(void) | ||
62 | { | 53 | { |
63 | struct workspace *workspace; | 54 | struct workspace *workspace; |
64 | int ret; | ||
65 | int cpus = num_online_cpus(); | ||
66 | |||
67 | again: | ||
68 | spin_lock(&workspace_lock); | ||
69 | if (!list_empty(&idle_workspace)) { | ||
70 | workspace = list_entry(idle_workspace.next, struct workspace, | ||
71 | list); | ||
72 | list_del(&workspace->list); | ||
73 | num_workspace--; | ||
74 | spin_unlock(&workspace_lock); | ||
75 | return workspace; | ||
76 | 55 | ||
77 | } | ||
78 | spin_unlock(&workspace_lock); | ||
79 | if (atomic_read(&alloc_workspace) > cpus) { | ||
80 | DEFINE_WAIT(wait); | ||
81 | prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE); | ||
82 | if (atomic_read(&alloc_workspace) > cpus) | ||
83 | schedule(); | ||
84 | finish_wait(&workspace_wait, &wait); | ||
85 | goto again; | ||
86 | } | ||
87 | atomic_inc(&alloc_workspace); | ||
88 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); | 56 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); |
89 | if (!workspace) { | 57 | if (!workspace) |
90 | ret = -ENOMEM; | 58 | return ERR_PTR(-ENOMEM); |
91 | goto fail; | ||
92 | } | ||
93 | 59 | ||
94 | workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); | 60 | workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); |
95 | if (!workspace->def_strm.workspace) { | ||
96 | ret = -ENOMEM; | ||
97 | goto fail; | ||
98 | } | ||
99 | workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); | 61 | workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); |
100 | if (!workspace->inf_strm.workspace) { | ||
101 | ret = -ENOMEM; | ||
102 | goto fail_inflate; | ||
103 | } | ||
104 | workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); | 62 | workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); |
105 | if (!workspace->buf) { | 63 | if (!workspace->def_strm.workspace || |
106 | ret = -ENOMEM; | 64 | !workspace->inf_strm.workspace || !workspace->buf) |
107 | goto fail_kmalloc; | 65 | goto fail; |
108 | } | ||
109 | return workspace; | ||
110 | |||
111 | fail_kmalloc: | ||
112 | vfree(workspace->inf_strm.workspace); | ||
113 | fail_inflate: | ||
114 | vfree(workspace->def_strm.workspace); | ||
115 | fail: | ||
116 | kfree(workspace); | ||
117 | atomic_dec(&alloc_workspace); | ||
118 | wake_up(&workspace_wait); | ||
119 | return ERR_PTR(ret); | ||
120 | } | ||
121 | |||
122 | /* | ||
123 | * put a workspace struct back on the list or free it if we have enough | ||
124 | * idle ones sitting around | ||
125 | */ | ||
126 | static int free_workspace(struct workspace *workspace) | ||
127 | { | ||
128 | spin_lock(&workspace_lock); | ||
129 | if (num_workspace < num_online_cpus()) { | ||
130 | list_add_tail(&workspace->list, &idle_workspace); | ||
131 | num_workspace++; | ||
132 | spin_unlock(&workspace_lock); | ||
133 | if (waitqueue_active(&workspace_wait)) | ||
134 | wake_up(&workspace_wait); | ||
135 | return 0; | ||
136 | } | ||
137 | spin_unlock(&workspace_lock); | ||
138 | vfree(workspace->def_strm.workspace); | ||
139 | vfree(workspace->inf_strm.workspace); | ||
140 | kfree(workspace->buf); | ||
141 | kfree(workspace); | ||
142 | 66 | ||
143 | atomic_dec(&alloc_workspace); | 67 | INIT_LIST_HEAD(&workspace->list); |
144 | if (waitqueue_active(&workspace_wait)) | ||
145 | wake_up(&workspace_wait); | ||
146 | return 0; | ||
147 | } | ||
148 | 68 | ||
149 | /* | 69 | return &workspace->list; |
150 | * cleanup function for module exit | 70 | fail: |
151 | */ | 71 | zlib_free_workspace(&workspace->list); |
152 | static void free_workspaces(void) | 72 | return ERR_PTR(-ENOMEM); |
153 | { | ||
154 | struct workspace *workspace; | ||
155 | while (!list_empty(&idle_workspace)) { | ||
156 | workspace = list_entry(idle_workspace.next, struct workspace, | ||
157 | list); | ||
158 | list_del(&workspace->list); | ||
159 | vfree(workspace->def_strm.workspace); | ||
160 | vfree(workspace->inf_strm.workspace); | ||
161 | kfree(workspace->buf); | ||
162 | kfree(workspace); | ||
163 | atomic_dec(&alloc_workspace); | ||
164 | } | ||
165 | } | 73 | } |
166 | 74 | ||
167 | /* | 75 | static int zlib_compress_pages(struct list_head *ws, |
168 | * given an address space and start/len, compress the bytes. | 76 | struct address_space *mapping, |
169 | * | 77 | u64 start, unsigned long len, |
170 | * pages are allocated to hold the compressed result and stored | 78 | struct page **pages, |
171 | * in 'pages' | 79 | unsigned long nr_dest_pages, |
172 | * | 80 | unsigned long *out_pages, |
173 | * out_pages is used to return the number of pages allocated. There | 81 | unsigned long *total_in, |
174 | * may be pages allocated even if we return an error | 82 | unsigned long *total_out, |
175 | * | 83 | unsigned long max_out) |
176 | * total_in is used to return the number of bytes actually read. It | ||
177 | * may be smaller then len if we had to exit early because we | ||
178 | * ran out of room in the pages array or because we cross the | ||
179 | * max_out threshold. | ||
180 | * | ||
181 | * total_out is used to return the total number of compressed bytes | ||
182 | * | ||
183 | * max_out tells us the max number of bytes that we're allowed to | ||
184 | * stuff into pages | ||
185 | */ | ||
186 | int btrfs_zlib_compress_pages(struct address_space *mapping, | ||
187 | u64 start, unsigned long len, | ||
188 | struct page **pages, | ||
189 | unsigned long nr_dest_pages, | ||
190 | unsigned long *out_pages, | ||
191 | unsigned long *total_in, | ||
192 | unsigned long *total_out, | ||
193 | unsigned long max_out) | ||
194 | { | 84 | { |
85 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
195 | int ret; | 86 | int ret; |
196 | struct workspace *workspace; | ||
197 | char *data_in; | 87 | char *data_in; |
198 | char *cpage_out; | 88 | char *cpage_out; |
199 | int nr_pages = 0; | 89 | int nr_pages = 0; |
@@ -205,10 +95,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, | |||
205 | *total_out = 0; | 95 | *total_out = 0; |
206 | *total_in = 0; | 96 | *total_in = 0; |
207 | 97 | ||
208 | workspace = find_zlib_workspace(); | ||
209 | if (IS_ERR(workspace)) | ||
210 | return -1; | ||
211 | |||
212 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { | 98 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { |
213 | printk(KERN_WARNING "deflateInit failed\n"); | 99 | printk(KERN_WARNING "deflateInit failed\n"); |
214 | ret = -1; | 100 | ret = -1; |
@@ -222,6 +108,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, | |||
222 | data_in = kmap(in_page); | 108 | data_in = kmap(in_page); |
223 | 109 | ||
224 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | 110 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
111 | if (out_page == NULL) { | ||
112 | ret = -1; | ||
113 | goto out; | ||
114 | } | ||
225 | cpage_out = kmap(out_page); | 115 | cpage_out = kmap(out_page); |
226 | pages[0] = out_page; | 116 | pages[0] = out_page; |
227 | nr_pages = 1; | 117 | nr_pages = 1; |
@@ -260,6 +150,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping, | |||
260 | goto out; | 150 | goto out; |
261 | } | 151 | } |
262 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | 152 | out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
153 | if (out_page == NULL) { | ||
154 | ret = -1; | ||
155 | goto out; | ||
156 | } | ||
263 | cpage_out = kmap(out_page); | 157 | cpage_out = kmap(out_page); |
264 | pages[nr_pages] = out_page; | 158 | pages[nr_pages] = out_page; |
265 | nr_pages++; | 159 | nr_pages++; |
@@ -314,55 +208,26 @@ out: | |||
314 | kunmap(in_page); | 208 | kunmap(in_page); |
315 | page_cache_release(in_page); | 209 | page_cache_release(in_page); |
316 | } | 210 | } |
317 | free_workspace(workspace); | ||
318 | return ret; | 211 | return ret; |
319 | } | 212 | } |
320 | 213 | ||
321 | /* | 214 | static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, |
322 | * pages_in is an array of pages with compressed data. | 215 | u64 disk_start, |
323 | * | 216 | struct bio_vec *bvec, |
324 | * disk_start is the starting logical offset of this array in the file | 217 | int vcnt, |
325 | * | 218 | size_t srclen) |
326 | * bvec is a bio_vec of pages from the file that we want to decompress into | ||
327 | * | ||
328 | * vcnt is the count of pages in the biovec | ||
329 | * | ||
330 | * srclen is the number of bytes in pages_in | ||
331 | * | ||
332 | * The basic idea is that we have a bio that was created by readpages. | ||
333 | * The pages in the bio are for the uncompressed data, and they may not | ||
334 | * be contiguous. They all correspond to the range of bytes covered by | ||
335 | * the compressed extent. | ||
336 | */ | ||
337 | int btrfs_zlib_decompress_biovec(struct page **pages_in, | ||
338 | u64 disk_start, | ||
339 | struct bio_vec *bvec, | ||
340 | int vcnt, | ||
341 | size_t srclen) | ||
342 | { | 219 | { |
343 | int ret = 0; | 220 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
221 | int ret = 0, ret2; | ||
344 | int wbits = MAX_WBITS; | 222 | int wbits = MAX_WBITS; |
345 | struct workspace *workspace; | ||
346 | char *data_in; | 223 | char *data_in; |
347 | size_t total_out = 0; | 224 | size_t total_out = 0; |
348 | unsigned long page_bytes_left; | ||
349 | unsigned long page_in_index = 0; | 225 | unsigned long page_in_index = 0; |
350 | unsigned long page_out_index = 0; | 226 | unsigned long page_out_index = 0; |
351 | struct page *page_out; | ||
352 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / | 227 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / |
353 | PAGE_CACHE_SIZE; | 228 | PAGE_CACHE_SIZE; |
354 | unsigned long buf_start; | 229 | unsigned long buf_start; |
355 | unsigned long buf_offset; | ||
356 | unsigned long bytes; | ||
357 | unsigned long working_bytes; | ||
358 | unsigned long pg_offset; | 230 | unsigned long pg_offset; |
359 | unsigned long start_byte; | ||
360 | unsigned long current_buf_start; | ||
361 | char *kaddr; | ||
362 | |||
363 | workspace = find_zlib_workspace(); | ||
364 | if (IS_ERR(workspace)) | ||
365 | return -ENOMEM; | ||
366 | 231 | ||
367 | data_in = kmap(pages_in[page_in_index]); | 232 | data_in = kmap(pages_in[page_in_index]); |
368 | workspace->inf_strm.next_in = data_in; | 233 | workspace->inf_strm.next_in = data_in; |
@@ -372,8 +237,6 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, | |||
372 | workspace->inf_strm.total_out = 0; | 237 | workspace->inf_strm.total_out = 0; |
373 | workspace->inf_strm.next_out = workspace->buf; | 238 | workspace->inf_strm.next_out = workspace->buf; |
374 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | 239 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; |
375 | page_out = bvec[page_out_index].bv_page; | ||
376 | page_bytes_left = PAGE_CACHE_SIZE; | ||
377 | pg_offset = 0; | 240 | pg_offset = 0; |
378 | 241 | ||
379 | /* If it's deflate, and it's got no preset dictionary, then | 242 | /* If it's deflate, and it's got no preset dictionary, then |
@@ -389,107 +252,29 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in, | |||
389 | 252 | ||
390 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 253 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
391 | printk(KERN_WARNING "inflateInit failed\n"); | 254 | printk(KERN_WARNING "inflateInit failed\n"); |
392 | ret = -1; | 255 | return -1; |
393 | goto out; | ||
394 | } | 256 | } |
395 | while (workspace->inf_strm.total_in < srclen) { | 257 | while (workspace->inf_strm.total_in < srclen) { |
396 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); | 258 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); |
397 | if (ret != Z_OK && ret != Z_STREAM_END) | 259 | if (ret != Z_OK && ret != Z_STREAM_END) |
398 | break; | 260 | break; |
399 | /* | ||
400 | * buf start is the byte offset we're of the start of | ||
401 | * our workspace buffer | ||
402 | */ | ||
403 | buf_start = total_out; | ||
404 | 261 | ||
405 | /* total_out is the last byte of the workspace buffer */ | 262 | buf_start = total_out; |
406 | total_out = workspace->inf_strm.total_out; | 263 | total_out = workspace->inf_strm.total_out; |
407 | 264 | ||
408 | working_bytes = total_out - buf_start; | 265 | /* we didn't make progress in this inflate call, we're done */ |
409 | 266 | if (buf_start == total_out) | |
410 | /* | ||
411 | * start byte is the first byte of the page we're currently | ||
412 | * copying into relative to the start of the compressed data. | ||
413 | */ | ||
414 | start_byte = page_offset(page_out) - disk_start; | ||
415 | |||
416 | if (working_bytes == 0) { | ||
417 | /* we didn't make progress in this inflate | ||
418 | * call, we're done | ||
419 | */ | ||
420 | if (ret != Z_STREAM_END) | ||
421 | ret = -1; | ||
422 | break; | 267 | break; |
423 | } | ||
424 | 268 | ||
425 | /* we haven't yet hit data corresponding to this page */ | 269 | ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start, |
426 | if (total_out <= start_byte) | 270 | total_out, disk_start, |
427 | goto next; | 271 | bvec, vcnt, |
428 | 272 | &page_out_index, &pg_offset); | |
429 | /* | 273 | if (ret2 == 0) { |
430 | * the start of the data we care about is offset into | 274 | ret = 0; |
431 | * the middle of our working buffer | 275 | goto done; |
432 | */ | ||
433 | if (total_out > start_byte && buf_start < start_byte) { | ||
434 | buf_offset = start_byte - buf_start; | ||
435 | working_bytes -= buf_offset; | ||
436 | } else { | ||
437 | buf_offset = 0; | ||
438 | } | ||
439 | current_buf_start = buf_start; | ||
440 | |||
441 | /* copy bytes from the working buffer into the pages */ | ||
442 | while (working_bytes > 0) { | ||
443 | bytes = min(PAGE_CACHE_SIZE - pg_offset, | ||
444 | PAGE_CACHE_SIZE - buf_offset); | ||
445 | bytes = min(bytes, working_bytes); | ||
446 | kaddr = kmap_atomic(page_out, KM_USER0); | ||
447 | memcpy(kaddr + pg_offset, workspace->buf + buf_offset, | ||
448 | bytes); | ||
449 | kunmap_atomic(kaddr, KM_USER0); | ||
450 | flush_dcache_page(page_out); | ||
451 | |||
452 | pg_offset += bytes; | ||
453 | page_bytes_left -= bytes; | ||
454 | buf_offset += bytes; | ||
455 | working_bytes -= bytes; | ||
456 | current_buf_start += bytes; | ||
457 | |||
458 | /* check if we need to pick another page */ | ||
459 | if (page_bytes_left == 0) { | ||
460 | page_out_index++; | ||
461 | if (page_out_index >= vcnt) { | ||
462 | ret = 0; | ||
463 | goto done; | ||
464 | } | ||
465 | |||
466 | page_out = bvec[page_out_index].bv_page; | ||
467 | pg_offset = 0; | ||
468 | page_bytes_left = PAGE_CACHE_SIZE; | ||
469 | start_byte = page_offset(page_out) - disk_start; | ||
470 | |||
471 | /* | ||
472 | * make sure our new page is covered by this | ||
473 | * working buffer | ||
474 | */ | ||
475 | if (total_out <= start_byte) | ||
476 | goto next; | ||
477 | |||
478 | /* the next page in the biovec might not | ||
479 | * be adjacent to the last page, but it | ||
480 | * might still be found inside this working | ||
481 | * buffer. bump our offset pointer | ||
482 | */ | ||
483 | if (total_out > start_byte && | ||
484 | current_buf_start < start_byte) { | ||
485 | buf_offset = start_byte - buf_start; | ||
486 | working_bytes = total_out - start_byte; | ||
487 | current_buf_start = buf_start + | ||
488 | buf_offset; | ||
489 | } | ||
490 | } | ||
491 | } | 276 | } |
492 | next: | 277 | |
493 | workspace->inf_strm.next_out = workspace->buf; | 278 | workspace->inf_strm.next_out = workspace->buf; |
494 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | 279 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; |
495 | 280 | ||
@@ -516,35 +301,21 @@ done: | |||
516 | zlib_inflateEnd(&workspace->inf_strm); | 301 | zlib_inflateEnd(&workspace->inf_strm); |
517 | if (data_in) | 302 | if (data_in) |
518 | kunmap(pages_in[page_in_index]); | 303 | kunmap(pages_in[page_in_index]); |
519 | out: | ||
520 | free_workspace(workspace); | ||
521 | return ret; | 304 | return ret; |
522 | } | 305 | } |
523 | 306 | ||
524 | /* | 307 | static int zlib_decompress(struct list_head *ws, unsigned char *data_in, |
525 | * a less complex decompression routine. Our compressed data fits in a | 308 | struct page *dest_page, |
526 | * single page, and we want to read a single page out of it. | 309 | unsigned long start_byte, |
527 | * start_byte tells us the offset into the compressed data we're interested in | 310 | size_t srclen, size_t destlen) |
528 | */ | ||
529 | int btrfs_zlib_decompress(unsigned char *data_in, | ||
530 | struct page *dest_page, | ||
531 | unsigned long start_byte, | ||
532 | size_t srclen, size_t destlen) | ||
533 | { | 311 | { |
312 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
534 | int ret = 0; | 313 | int ret = 0; |
535 | int wbits = MAX_WBITS; | 314 | int wbits = MAX_WBITS; |
536 | struct workspace *workspace; | ||
537 | unsigned long bytes_left = destlen; | 315 | unsigned long bytes_left = destlen; |
538 | unsigned long total_out = 0; | 316 | unsigned long total_out = 0; |
539 | char *kaddr; | 317 | char *kaddr; |
540 | 318 | ||
541 | if (destlen > PAGE_CACHE_SIZE) | ||
542 | return -ENOMEM; | ||
543 | |||
544 | workspace = find_zlib_workspace(); | ||
545 | if (IS_ERR(workspace)) | ||
546 | return -ENOMEM; | ||
547 | |||
548 | workspace->inf_strm.next_in = data_in; | 319 | workspace->inf_strm.next_in = data_in; |
549 | workspace->inf_strm.avail_in = srclen; | 320 | workspace->inf_strm.avail_in = srclen; |
550 | workspace->inf_strm.total_in = 0; | 321 | workspace->inf_strm.total_in = 0; |
@@ -565,8 +336,7 @@ int btrfs_zlib_decompress(unsigned char *data_in, | |||
565 | 336 | ||
566 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 337 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { |
567 | printk(KERN_WARNING "inflateInit failed\n"); | 338 | printk(KERN_WARNING "inflateInit failed\n"); |
568 | ret = -1; | 339 | return -1; |
569 | goto out; | ||
570 | } | 340 | } |
571 | 341 | ||
572 | while (bytes_left > 0) { | 342 | while (bytes_left > 0) { |
@@ -616,12 +386,13 @@ next: | |||
616 | ret = 0; | 386 | ret = 0; |
617 | 387 | ||
618 | zlib_inflateEnd(&workspace->inf_strm); | 388 | zlib_inflateEnd(&workspace->inf_strm); |
619 | out: | ||
620 | free_workspace(workspace); | ||
621 | return ret; | 389 | return ret; |
622 | } | 390 | } |
623 | 391 | ||
624 | void btrfs_zlib_exit(void) | 392 | struct btrfs_compress_op btrfs_zlib_compress = { |
625 | { | 393 | .alloc_workspace = zlib_alloc_workspace, |
626 | free_workspaces(); | 394 | .free_workspace = zlib_free_workspace, |
627 | } | 395 | .compress_pages = zlib_compress_pages, |
396 | .decompress_biovec = zlib_decompress_biovec, | ||
397 | .decompress = zlib_decompress, | ||
398 | }; | ||
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 60d27bc9eb83..6b61ded701e1 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -1560,9 +1560,10 @@ retry_locked: | |||
1560 | /* NOTE: no side-effects allowed, until we take s_mutex */ | 1560 | /* NOTE: no side-effects allowed, until we take s_mutex */ |
1561 | 1561 | ||
1562 | revoking = cap->implemented & ~cap->issued; | 1562 | revoking = cap->implemented & ~cap->issued; |
1563 | if (revoking) | 1563 | dout(" mds%d cap %p issued %s implemented %s revoking %s\n", |
1564 | dout(" mds%d revoking %s\n", cap->mds, | 1564 | cap->mds, cap, ceph_cap_string(cap->issued), |
1565 | ceph_cap_string(revoking)); | 1565 | ceph_cap_string(cap->implemented), |
1566 | ceph_cap_string(revoking)); | ||
1566 | 1567 | ||
1567 | if (cap == ci->i_auth_cap && | 1568 | if (cap == ci->i_auth_cap && |
1568 | (cap->issued & CEPH_CAP_FILE_WR)) { | 1569 | (cap->issued & CEPH_CAP_FILE_WR)) { |
@@ -1658,6 +1659,8 @@ ack: | |||
1658 | 1659 | ||
1659 | if (cap == ci->i_auth_cap && ci->i_dirty_caps) | 1660 | if (cap == ci->i_auth_cap && ci->i_dirty_caps) |
1660 | flushing = __mark_caps_flushing(inode, session); | 1661 | flushing = __mark_caps_flushing(inode, session); |
1662 | else | ||
1663 | flushing = 0; | ||
1661 | 1664 | ||
1662 | mds = cap->mds; /* remember mds, so we don't repeat */ | 1665 | mds = cap->mds; /* remember mds, so we don't repeat */ |
1663 | sent++; | 1666 | sent++; |
@@ -1940,6 +1943,35 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, | |||
1940 | } | 1943 | } |
1941 | } | 1944 | } |
1942 | 1945 | ||
1946 | static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc, | ||
1947 | struct ceph_mds_session *session, | ||
1948 | struct inode *inode) | ||
1949 | { | ||
1950 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
1951 | struct ceph_cap *cap; | ||
1952 | int delayed = 0; | ||
1953 | |||
1954 | spin_lock(&inode->i_lock); | ||
1955 | cap = ci->i_auth_cap; | ||
1956 | dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode, | ||
1957 | ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq); | ||
1958 | __ceph_flush_snaps(ci, &session, 1); | ||
1959 | if (ci->i_flushing_caps) { | ||
1960 | delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, | ||
1961 | __ceph_caps_used(ci), | ||
1962 | __ceph_caps_wanted(ci), | ||
1963 | cap->issued | cap->implemented, | ||
1964 | ci->i_flushing_caps, NULL); | ||
1965 | if (delayed) { | ||
1966 | spin_lock(&inode->i_lock); | ||
1967 | __cap_delay_requeue(mdsc, ci); | ||
1968 | spin_unlock(&inode->i_lock); | ||
1969 | } | ||
1970 | } else { | ||
1971 | spin_unlock(&inode->i_lock); | ||
1972 | } | ||
1973 | } | ||
1974 | |||
1943 | 1975 | ||
1944 | /* | 1976 | /* |
1945 | * Take references to capabilities we hold, so that we don't release | 1977 | * Take references to capabilities we hold, so that we don't release |
@@ -2687,7 +2719,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, | |||
2687 | ceph_add_cap(inode, session, cap_id, -1, | 2719 | ceph_add_cap(inode, session, cap_id, -1, |
2688 | issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH, | 2720 | issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH, |
2689 | NULL /* no caps context */); | 2721 | NULL /* no caps context */); |
2690 | try_flush_caps(inode, session, NULL); | 2722 | kick_flushing_inode_caps(mdsc, session, inode); |
2691 | up_read(&mdsc->snap_rwsem); | 2723 | up_read(&mdsc->snap_rwsem); |
2692 | 2724 | ||
2693 | /* make sure we re-request max_size, if necessary */ | 2725 | /* make sure we re-request max_size, if necessary */ |
@@ -2785,8 +2817,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2785 | case CEPH_CAP_OP_IMPORT: | 2817 | case CEPH_CAP_OP_IMPORT: |
2786 | handle_cap_import(mdsc, inode, h, session, | 2818 | handle_cap_import(mdsc, inode, h, session, |
2787 | snaptrace, snaptrace_len); | 2819 | snaptrace, snaptrace_len); |
2788 | ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, | 2820 | ceph_check_caps(ceph_inode(inode), 0, session); |
2789 | session); | ||
2790 | goto done_unlocked; | 2821 | goto done_unlocked; |
2791 | } | 2822 | } |
2792 | 2823 | ||
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e835eff551e3..5625463aa479 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -710,10 +710,6 @@ static int fill_inode(struct inode *inode, | |||
710 | ci->i_ceph_flags |= CEPH_I_COMPLETE; | 710 | ci->i_ceph_flags |= CEPH_I_COMPLETE; |
711 | ci->i_max_offset = 2; | 711 | ci->i_max_offset = 2; |
712 | } | 712 | } |
713 | |||
714 | /* it may be better to set st_size in getattr instead? */ | ||
715 | if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), RBYTES)) | ||
716 | inode->i_size = ci->i_rbytes; | ||
717 | break; | 713 | break; |
718 | default: | 714 | default: |
719 | pr_err("fill_inode %llx.%llx BAD mode 0%o\n", | 715 | pr_err("fill_inode %llx.%llx BAD mode 0%o\n", |
@@ -1819,7 +1815,11 @@ int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
1819 | else | 1815 | else |
1820 | stat->dev = 0; | 1816 | stat->dev = 0; |
1821 | if (S_ISDIR(inode->i_mode)) { | 1817 | if (S_ISDIR(inode->i_mode)) { |
1822 | stat->size = ci->i_rbytes; | 1818 | if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), |
1819 | RBYTES)) | ||
1820 | stat->size = ci->i_rbytes; | ||
1821 | else | ||
1822 | stat->size = ci->i_files + ci->i_subdirs; | ||
1823 | stat->blocks = 0; | 1823 | stat->blocks = 0; |
1824 | stat->blksize = 65536; | 1824 | stat->blksize = 65536; |
1825 | } | 1825 | } |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 1e30d194a8e3..a1ee8fa3a8e7 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -693,9 +693,11 @@ static int __choose_mds(struct ceph_mds_client *mdsc, | |||
693 | dout("choose_mds %p %llx.%llx " | 693 | dout("choose_mds %p %llx.%llx " |
694 | "frag %u mds%d (%d/%d)\n", | 694 | "frag %u mds%d (%d/%d)\n", |
695 | inode, ceph_vinop(inode), | 695 | inode, ceph_vinop(inode), |
696 | frag.frag, frag.mds, | 696 | frag.frag, mds, |
697 | (int)r, frag.ndist); | 697 | (int)r, frag.ndist); |
698 | return mds; | 698 | if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= |
699 | CEPH_MDS_STATE_ACTIVE) | ||
700 | return mds; | ||
699 | } | 701 | } |
700 | 702 | ||
701 | /* since this file/dir wasn't known to be | 703 | /* since this file/dir wasn't known to be |
@@ -708,7 +710,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc, | |||
708 | dout("choose_mds %p %llx.%llx " | 710 | dout("choose_mds %p %llx.%llx " |
709 | "frag %u mds%d (auth)\n", | 711 | "frag %u mds%d (auth)\n", |
710 | inode, ceph_vinop(inode), frag.frag, mds); | 712 | inode, ceph_vinop(inode), frag.frag, mds); |
711 | return mds; | 713 | if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= |
714 | CEPH_MDS_STATE_ACTIVE) | ||
715 | return mds; | ||
712 | } | 716 | } |
713 | } | 717 | } |
714 | } | 718 | } |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index bf6f0f34082a..9c5085465a63 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -290,6 +290,8 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, | |||
290 | 290 | ||
291 | fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT; | 291 | fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT; |
292 | fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); | 292 | fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); |
293 | fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; | ||
294 | fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; | ||
293 | fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; | 295 | fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; |
294 | fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; | 296 | fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; |
295 | fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; | 297 | fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 6e12a6ba5f79..8c9eba6ef9df 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -219,6 +219,7 @@ static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci, | |||
219 | struct rb_node **p; | 219 | struct rb_node **p; |
220 | struct rb_node *parent = NULL; | 220 | struct rb_node *parent = NULL; |
221 | struct ceph_inode_xattr *xattr = NULL; | 221 | struct ceph_inode_xattr *xattr = NULL; |
222 | int name_len = strlen(name); | ||
222 | int c; | 223 | int c; |
223 | 224 | ||
224 | p = &ci->i_xattrs.index.rb_node; | 225 | p = &ci->i_xattrs.index.rb_node; |
@@ -226,6 +227,8 @@ static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci, | |||
226 | parent = *p; | 227 | parent = *p; |
227 | xattr = rb_entry(parent, struct ceph_inode_xattr, node); | 228 | xattr = rb_entry(parent, struct ceph_inode_xattr, node); |
228 | c = strncmp(name, xattr->name, xattr->name_len); | 229 | c = strncmp(name, xattr->name, xattr->name_len); |
230 | if (c == 0 && name_len > xattr->name_len) | ||
231 | c = 1; | ||
229 | if (c < 0) | 232 | if (c < 0) |
230 | p = &(*p)->rb_left; | 233 | p = &(*p)->rb_left; |
231 | else if (c > 0) | 234 | else if (c > 0) |
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index ee45648b0d1a..7cb0f7f847e4 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig | |||
@@ -3,6 +3,7 @@ config CIFS | |||
3 | depends on INET | 3 | depends on INET |
4 | select NLS | 4 | select NLS |
5 | select CRYPTO | 5 | select CRYPTO |
6 | select CRYPTO_MD4 | ||
6 | select CRYPTO_MD5 | 7 | select CRYPTO_MD5 |
7 | select CRYPTO_HMAC | 8 | select CRYPTO_HMAC |
8 | select CRYPTO_ARC4 | 9 | select CRYPTO_ARC4 |
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index 43b19dd39191..d87558448e3d 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile | |||
@@ -5,7 +5,7 @@ obj-$(CONFIG_CIFS) += cifs.o | |||
5 | 5 | ||
6 | cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ | 6 | cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ |
7 | link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \ | 7 | link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \ |
8 | md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ | 8 | cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ |
9 | readdir.o ioctl.o sess.o export.o | 9 | readdir.o ioctl.o sess.o export.o |
10 | 10 | ||
11 | cifs-$(CONFIG_CIFS_ACL) += cifsacl.o | 11 | cifs-$(CONFIG_CIFS_ACL) += cifsacl.o |
diff --git a/fs/cifs/README b/fs/cifs/README index 46af99ab3614..fe1683590828 100644 --- a/fs/cifs/README +++ b/fs/cifs/README | |||
@@ -452,6 +452,11 @@ A partial list of the supported mount options follows: | |||
452 | if oplock (caching token) is granted and held. Note that | 452 | if oplock (caching token) is granted and held. Note that |
453 | direct allows write operations larger than page size | 453 | direct allows write operations larger than page size |
454 | to be sent to the server. | 454 | to be sent to the server. |
455 | strictcache Use for switching on strict cache mode. In this mode the | ||
456 | client read from the cache all the time it has Oplock Level II, | ||
457 | otherwise - read from the server. All written data are stored | ||
458 | in the cache, but if the client doesn't have Exclusive Oplock, | ||
459 | it writes the data to the server. | ||
455 | acl Allow setfacl and getfacl to manage posix ACLs if server | 460 | acl Allow setfacl and getfacl to manage posix ACLs if server |
456 | supports them. (default) | 461 | supports them. (default) |
457 | noacl Do not allow setfacl and getfacl calls on this mount | 462 | noacl Do not allow setfacl and getfacl calls on this mount |
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index ede98300a8cd..65829d32128c 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c | |||
@@ -79,11 +79,11 @@ void cifs_dump_mids(struct TCP_Server_Info *server) | |||
79 | spin_lock(&GlobalMid_Lock); | 79 | spin_lock(&GlobalMid_Lock); |
80 | list_for_each(tmp, &server->pending_mid_q) { | 80 | list_for_each(tmp, &server->pending_mid_q) { |
81 | mid_entry = list_entry(tmp, struct mid_q_entry, qhead); | 81 | mid_entry = list_entry(tmp, struct mid_q_entry, qhead); |
82 | cERROR(1, "State: %d Cmd: %d Pid: %d Tsk: %p Mid %d", | 82 | cERROR(1, "State: %d Cmd: %d Pid: %d Cbdata: %p Mid %d", |
83 | mid_entry->midState, | 83 | mid_entry->midState, |
84 | (int)mid_entry->command, | 84 | (int)mid_entry->command, |
85 | mid_entry->pid, | 85 | mid_entry->pid, |
86 | mid_entry->tsk, | 86 | mid_entry->callback_data, |
87 | mid_entry->mid); | 87 | mid_entry->mid); |
88 | #ifdef CONFIG_CIFS_STATS2 | 88 | #ifdef CONFIG_CIFS_STATS2 |
89 | cERROR(1, "IsLarge: %d buf: %p time rcv: %ld now: %ld", | 89 | cERROR(1, "IsLarge: %d buf: %p time rcv: %ld now: %ld", |
@@ -218,11 +218,11 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) | |||
218 | mid_entry = list_entry(tmp3, struct mid_q_entry, | 218 | mid_entry = list_entry(tmp3, struct mid_q_entry, |
219 | qhead); | 219 | qhead); |
220 | seq_printf(m, "\tState: %d com: %d pid:" | 220 | seq_printf(m, "\tState: %d com: %d pid:" |
221 | " %d tsk: %p mid %d\n", | 221 | " %d cbdata: %p mid %d\n", |
222 | mid_entry->midState, | 222 | mid_entry->midState, |
223 | (int)mid_entry->command, | 223 | (int)mid_entry->command, |
224 | mid_entry->pid, | 224 | mid_entry->pid, |
225 | mid_entry->tsk, | 225 | mid_entry->callback_data, |
226 | mid_entry->mid); | 226 | mid_entry->mid); |
227 | } | 227 | } |
228 | spin_unlock(&GlobalMid_Lock); | 228 | spin_unlock(&GlobalMid_Lock); |
@@ -331,7 +331,7 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v) | |||
331 | atomic_read(&totSmBufAllocCount)); | 331 | atomic_read(&totSmBufAllocCount)); |
332 | #endif /* CONFIG_CIFS_STATS2 */ | 332 | #endif /* CONFIG_CIFS_STATS2 */ |
333 | 333 | ||
334 | seq_printf(m, "Operations (MIDs): %d\n", midCount.counter); | 334 | seq_printf(m, "Operations (MIDs): %d\n", atomic_read(&midCount)); |
335 | seq_printf(m, | 335 | seq_printf(m, |
336 | "\n%d session %d share reconnects\n", | 336 | "\n%d session %d share reconnects\n", |
337 | tcpSesReconnectCount.counter, tconInfoReconnectCount.counter); | 337 | tcpSesReconnectCount.counter, tconInfoReconnectCount.counter); |
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index c68a056f27fd..0a265ad9e426 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c | |||
@@ -255,35 +255,6 @@ static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb, | |||
255 | 255 | ||
256 | } | 256 | } |
257 | 257 | ||
258 | static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd, | ||
259 | struct list_head *mntlist) | ||
260 | { | ||
261 | /* stolen from afs code */ | ||
262 | int err; | ||
263 | |||
264 | mntget(newmnt); | ||
265 | err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags | MNT_SHRINKABLE, mntlist); | ||
266 | switch (err) { | ||
267 | case 0: | ||
268 | path_put(&nd->path); | ||
269 | nd->path.mnt = newmnt; | ||
270 | nd->path.dentry = dget(newmnt->mnt_root); | ||
271 | schedule_delayed_work(&cifs_dfs_automount_task, | ||
272 | cifs_dfs_mountpoint_expiry_timeout); | ||
273 | break; | ||
274 | case -EBUSY: | ||
275 | /* someone else made a mount here whilst we were busy */ | ||
276 | while (d_mountpoint(nd->path.dentry) && | ||
277 | follow_down(&nd->path)) | ||
278 | ; | ||
279 | err = 0; | ||
280 | default: | ||
281 | mntput(newmnt); | ||
282 | break; | ||
283 | } | ||
284 | return err; | ||
285 | } | ||
286 | |||
287 | static void dump_referral(const struct dfs_info3_param *ref) | 258 | static void dump_referral(const struct dfs_info3_param *ref) |
288 | { | 259 | { |
289 | cFYI(1, "DFS: ref path: %s", ref->path_name); | 260 | cFYI(1, "DFS: ref path: %s", ref->path_name); |
@@ -293,27 +264,23 @@ static void dump_referral(const struct dfs_info3_param *ref) | |||
293 | ref->path_consumed); | 264 | ref->path_consumed); |
294 | } | 265 | } |
295 | 266 | ||
296 | 267 | /* | |
297 | static void* | 268 | * Create a vfsmount that we can automount |
298 | cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) | 269 | */ |
270 | static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) | ||
299 | { | 271 | { |
300 | struct dfs_info3_param *referrals = NULL; | 272 | struct dfs_info3_param *referrals = NULL; |
301 | unsigned int num_referrals = 0; | 273 | unsigned int num_referrals = 0; |
302 | struct cifs_sb_info *cifs_sb; | 274 | struct cifs_sb_info *cifs_sb; |
303 | struct cifsSesInfo *ses; | 275 | struct cifsSesInfo *ses; |
304 | char *full_path = NULL; | 276 | char *full_path; |
305 | int xid, i; | 277 | int xid, i; |
306 | int rc = 0; | 278 | int rc; |
307 | struct vfsmount *mnt = ERR_PTR(-ENOENT); | 279 | struct vfsmount *mnt; |
308 | struct tcon_link *tlink; | 280 | struct tcon_link *tlink; |
309 | 281 | ||
310 | cFYI(1, "in %s", __func__); | 282 | cFYI(1, "in %s", __func__); |
311 | BUG_ON(IS_ROOT(dentry)); | 283 | BUG_ON(IS_ROOT(mntpt)); |
312 | |||
313 | xid = GetXid(); | ||
314 | |||
315 | dput(nd->path.dentry); | ||
316 | nd->path.dentry = dget(dentry); | ||
317 | 284 | ||
318 | /* | 285 | /* |
319 | * The MSDFS spec states that paths in DFS referral requests and | 286 | * The MSDFS spec states that paths in DFS referral requests and |
@@ -321,66 +288,83 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) | |||
321 | * the double backslashes usually used in the UNC. This function | 288 | * the double backslashes usually used in the UNC. This function |
322 | * gives us the latter, so we must adjust the result. | 289 | * gives us the latter, so we must adjust the result. |
323 | */ | 290 | */ |
324 | full_path = build_path_from_dentry(dentry); | 291 | mnt = ERR_PTR(-ENOMEM); |
325 | if (full_path == NULL) { | 292 | full_path = build_path_from_dentry(mntpt); |
326 | rc = -ENOMEM; | 293 | if (full_path == NULL) |
327 | goto out_err; | 294 | goto cdda_exit; |
328 | } | ||
329 | 295 | ||
330 | cifs_sb = CIFS_SB(dentry->d_inode->i_sb); | 296 | cifs_sb = CIFS_SB(mntpt->d_inode->i_sb); |
331 | tlink = cifs_sb_tlink(cifs_sb); | 297 | tlink = cifs_sb_tlink(cifs_sb); |
332 | if (IS_ERR(tlink)) { | 298 | if (IS_ERR(tlink)) { |
333 | rc = PTR_ERR(tlink); | 299 | mnt = ERR_CAST(tlink); |
334 | goto out_err; | 300 | goto free_full_path; |
335 | } | 301 | } |
336 | ses = tlink_tcon(tlink)->ses; | 302 | ses = tlink_tcon(tlink)->ses; |
337 | 303 | ||
304 | xid = GetXid(); | ||
338 | rc = get_dfs_path(xid, ses, full_path + 1, cifs_sb->local_nls, | 305 | rc = get_dfs_path(xid, ses, full_path + 1, cifs_sb->local_nls, |
339 | &num_referrals, &referrals, | 306 | &num_referrals, &referrals, |
340 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 307 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); |
308 | FreeXid(xid); | ||
341 | 309 | ||
342 | cifs_put_tlink(tlink); | 310 | cifs_put_tlink(tlink); |
343 | 311 | ||
312 | mnt = ERR_PTR(-ENOENT); | ||
344 | for (i = 0; i < num_referrals; i++) { | 313 | for (i = 0; i < num_referrals; i++) { |
345 | int len; | 314 | int len; |
346 | dump_referral(referrals+i); | 315 | dump_referral(referrals + i); |
347 | /* connect to a node */ | 316 | /* connect to a node */ |
348 | len = strlen(referrals[i].node_name); | 317 | len = strlen(referrals[i].node_name); |
349 | if (len < 2) { | 318 | if (len < 2) { |
350 | cERROR(1, "%s: Net Address path too short: %s", | 319 | cERROR(1, "%s: Net Address path too short: %s", |
351 | __func__, referrals[i].node_name); | 320 | __func__, referrals[i].node_name); |
352 | rc = -EINVAL; | 321 | mnt = ERR_PTR(-EINVAL); |
353 | goto out_err; | 322 | break; |
354 | } | 323 | } |
355 | mnt = cifs_dfs_do_refmount(cifs_sb, | 324 | mnt = cifs_dfs_do_refmount(cifs_sb, |
356 | full_path, referrals + i); | 325 | full_path, referrals + i); |
357 | cFYI(1, "%s: cifs_dfs_do_refmount:%s , mnt:%p", __func__, | 326 | cFYI(1, "%s: cifs_dfs_do_refmount:%s , mnt:%p", __func__, |
358 | referrals[i].node_name, mnt); | 327 | referrals[i].node_name, mnt); |
359 | |||
360 | /* complete mount procedure if we accured submount */ | ||
361 | if (!IS_ERR(mnt)) | 328 | if (!IS_ERR(mnt)) |
362 | break; | 329 | goto success; |
363 | } | 330 | } |
364 | 331 | ||
365 | /* we need it cause for() above could exit without valid submount */ | 332 | /* no valid submounts were found; return error from get_dfs_path() by |
366 | rc = PTR_ERR(mnt); | 333 | * preference */ |
367 | if (IS_ERR(mnt)) | 334 | if (rc != 0) |
368 | goto out_err; | 335 | mnt = ERR_PTR(rc); |
369 | 336 | ||
370 | rc = add_mount_helper(mnt, nd, &cifs_dfs_automount_list); | 337 | success: |
371 | |||
372 | out: | ||
373 | FreeXid(xid); | ||
374 | free_dfs_info_array(referrals, num_referrals); | 338 | free_dfs_info_array(referrals, num_referrals); |
339 | free_full_path: | ||
375 | kfree(full_path); | 340 | kfree(full_path); |
341 | cdda_exit: | ||
376 | cFYI(1, "leaving %s" , __func__); | 342 | cFYI(1, "leaving %s" , __func__); |
377 | return ERR_PTR(rc); | 343 | return mnt; |
378 | out_err: | 344 | } |
379 | path_put(&nd->path); | 345 | |
380 | goto out; | 346 | /* |
347 | * Attempt to automount the referral | ||
348 | */ | ||
349 | struct vfsmount *cifs_dfs_d_automount(struct path *path) | ||
350 | { | ||
351 | struct vfsmount *newmnt; | ||
352 | |||
353 | cFYI(1, "in %s", __func__); | ||
354 | |||
355 | newmnt = cifs_dfs_do_automount(path->dentry); | ||
356 | if (IS_ERR(newmnt)) { | ||
357 | cFYI(1, "leaving %s [automount failed]" , __func__); | ||
358 | return newmnt; | ||
359 | } | ||
360 | |||
361 | mntget(newmnt); /* prevent immediate expiration */ | ||
362 | mnt_set_expiry(newmnt, &cifs_dfs_automount_list); | ||
363 | schedule_delayed_work(&cifs_dfs_automount_task, | ||
364 | cifs_dfs_mountpoint_expiry_timeout); | ||
365 | cFYI(1, "leaving %s [ok]" , __func__); | ||
366 | return newmnt; | ||
381 | } | 367 | } |
382 | 368 | ||
383 | const struct inode_operations cifs_dfs_referral_inode_operations = { | 369 | const struct inode_operations cifs_dfs_referral_inode_operations = { |
384 | .follow_link = cifs_dfs_follow_mountpoint, | ||
385 | }; | 370 | }; |
386 | |||
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 7852cd677051..ac51cd2d33ae 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h | |||
@@ -40,6 +40,7 @@ | |||
40 | #define CIFS_MOUNT_FSCACHE 0x8000 /* local caching enabled */ | 40 | #define CIFS_MOUNT_FSCACHE 0x8000 /* local caching enabled */ |
41 | #define CIFS_MOUNT_MF_SYMLINKS 0x10000 /* Minshall+French Symlinks enabled */ | 41 | #define CIFS_MOUNT_MF_SYMLINKS 0x10000 /* Minshall+French Symlinks enabled */ |
42 | #define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */ | 42 | #define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */ |
43 | #define CIFS_MOUNT_STRICT_IO 0x40000 /* strict cache mode */ | ||
43 | 44 | ||
44 | struct cifs_sb_info { | 45 | struct cifs_sb_info { |
45 | struct rb_root tlink_tree; | 46 | struct rb_root tlink_tree; |
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 430f510a1720..fc0fd4fde306 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c | |||
@@ -44,10 +44,14 @@ cifs_ucs2_bytes(const __le16 *from, int maxbytes, | |||
44 | int charlen, outlen = 0; | 44 | int charlen, outlen = 0; |
45 | int maxwords = maxbytes / 2; | 45 | int maxwords = maxbytes / 2; |
46 | char tmp[NLS_MAX_CHARSET_SIZE]; | 46 | char tmp[NLS_MAX_CHARSET_SIZE]; |
47 | __u16 ftmp; | ||
47 | 48 | ||
48 | for (i = 0; i < maxwords && from[i]; i++) { | 49 | for (i = 0; i < maxwords; i++) { |
49 | charlen = codepage->uni2char(le16_to_cpu(from[i]), tmp, | 50 | ftmp = get_unaligned_le16(&from[i]); |
50 | NLS_MAX_CHARSET_SIZE); | 51 | if (ftmp == 0) |
52 | break; | ||
53 | |||
54 | charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE); | ||
51 | if (charlen > 0) | 55 | if (charlen > 0) |
52 | outlen += charlen; | 56 | outlen += charlen; |
53 | else | 57 | else |
@@ -58,9 +62,9 @@ cifs_ucs2_bytes(const __le16 *from, int maxbytes, | |||
58 | } | 62 | } |
59 | 63 | ||
60 | /* | 64 | /* |
61 | * cifs_mapchar - convert a little-endian char to proper char in codepage | 65 | * cifs_mapchar - convert a host-endian char to proper char in codepage |
62 | * @target - where converted character should be copied | 66 | * @target - where converted character should be copied |
63 | * @src_char - 2 byte little-endian source character | 67 | * @src_char - 2 byte host-endian source character |
64 | * @cp - codepage to which character should be converted | 68 | * @cp - codepage to which character should be converted |
65 | * @mapchar - should character be mapped according to mapchars mount option? | 69 | * @mapchar - should character be mapped according to mapchars mount option? |
66 | * | 70 | * |
@@ -69,7 +73,7 @@ cifs_ucs2_bytes(const __le16 *from, int maxbytes, | |||
69 | * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE). | 73 | * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE). |
70 | */ | 74 | */ |
71 | static int | 75 | static int |
72 | cifs_mapchar(char *target, const __le16 src_char, const struct nls_table *cp, | 76 | cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp, |
73 | bool mapchar) | 77 | bool mapchar) |
74 | { | 78 | { |
75 | int len = 1; | 79 | int len = 1; |
@@ -82,7 +86,7 @@ cifs_mapchar(char *target, const __le16 src_char, const struct nls_table *cp, | |||
82 | * build_path_from_dentry are modified, as they use slash as | 86 | * build_path_from_dentry are modified, as they use slash as |
83 | * separator. | 87 | * separator. |
84 | */ | 88 | */ |
85 | switch (le16_to_cpu(src_char)) { | 89 | switch (src_char) { |
86 | case UNI_COLON: | 90 | case UNI_COLON: |
87 | *target = ':'; | 91 | *target = ':'; |
88 | break; | 92 | break; |
@@ -109,8 +113,7 @@ out: | |||
109 | return len; | 113 | return len; |
110 | 114 | ||
111 | cp_convert: | 115 | cp_convert: |
112 | len = cp->uni2char(le16_to_cpu(src_char), target, | 116 | len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE); |
113 | NLS_MAX_CHARSET_SIZE); | ||
114 | if (len <= 0) { | 117 | if (len <= 0) { |
115 | *target = '?'; | 118 | *target = '?'; |
116 | len = 1; | 119 | len = 1; |
@@ -149,6 +152,7 @@ cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen, | |||
149 | int nullsize = nls_nullsize(codepage); | 152 | int nullsize = nls_nullsize(codepage); |
150 | int fromwords = fromlen / 2; | 153 | int fromwords = fromlen / 2; |
151 | char tmp[NLS_MAX_CHARSET_SIZE]; | 154 | char tmp[NLS_MAX_CHARSET_SIZE]; |
155 | __u16 ftmp; | ||
152 | 156 | ||
153 | /* | 157 | /* |
154 | * because the chars can be of varying widths, we need to take care | 158 | * because the chars can be of varying widths, we need to take care |
@@ -158,19 +162,23 @@ cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen, | |||
158 | */ | 162 | */ |
159 | safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize); | 163 | safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize); |
160 | 164 | ||
161 | for (i = 0; i < fromwords && from[i]; i++) { | 165 | for (i = 0; i < fromwords; i++) { |
166 | ftmp = get_unaligned_le16(&from[i]); | ||
167 | if (ftmp == 0) | ||
168 | break; | ||
169 | |||
162 | /* | 170 | /* |
163 | * check to see if converting this character might make the | 171 | * check to see if converting this character might make the |
164 | * conversion bleed into the null terminator | 172 | * conversion bleed into the null terminator |
165 | */ | 173 | */ |
166 | if (outlen >= safelen) { | 174 | if (outlen >= safelen) { |
167 | charlen = cifs_mapchar(tmp, from[i], codepage, mapchar); | 175 | charlen = cifs_mapchar(tmp, ftmp, codepage, mapchar); |
168 | if ((outlen + charlen) > (tolen - nullsize)) | 176 | if ((outlen + charlen) > (tolen - nullsize)) |
169 | break; | 177 | break; |
170 | } | 178 | } |
171 | 179 | ||
172 | /* put converted char into 'to' buffer */ | 180 | /* put converted char into 'to' buffer */ |
173 | charlen = cifs_mapchar(&to[outlen], from[i], codepage, mapchar); | 181 | charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar); |
174 | outlen += charlen; | 182 | outlen += charlen; |
175 | } | 183 | } |
176 | 184 | ||
@@ -193,24 +201,21 @@ cifs_strtoUCS(__le16 *to, const char *from, int len, | |||
193 | { | 201 | { |
194 | int charlen; | 202 | int charlen; |
195 | int i; | 203 | int i; |
196 | wchar_t *wchar_to = (wchar_t *)to; /* needed to quiet sparse */ | 204 | wchar_t wchar_to; /* needed to quiet sparse */ |
197 | 205 | ||
198 | for (i = 0; len && *from; i++, from += charlen, len -= charlen) { | 206 | for (i = 0; len && *from; i++, from += charlen, len -= charlen) { |
199 | 207 | charlen = codepage->char2uni(from, len, &wchar_to); | |
200 | /* works for 2.4.0 kernel or later */ | ||
201 | charlen = codepage->char2uni(from, len, &wchar_to[i]); | ||
202 | if (charlen < 1) { | 208 | if (charlen < 1) { |
203 | cERROR(1, "strtoUCS: char2uni of %d returned %d", | 209 | cERROR(1, "strtoUCS: char2uni of 0x%x returned %d", |
204 | (int)*from, charlen); | 210 | *from, charlen); |
205 | /* A question mark */ | 211 | /* A question mark */ |
206 | to[i] = cpu_to_le16(0x003f); | 212 | wchar_to = 0x003f; |
207 | charlen = 1; | 213 | charlen = 1; |
208 | } else | 214 | } |
209 | to[i] = cpu_to_le16(wchar_to[i]); | 215 | put_unaligned_le16(wchar_to, &to[i]); |
210 | |||
211 | } | 216 | } |
212 | 217 | ||
213 | to[i] = 0; | 218 | put_unaligned_le16(0, &to[i]); |
214 | return i; | 219 | return i; |
215 | } | 220 | } |
216 | 221 | ||
@@ -252,3 +257,79 @@ cifs_strndup_from_ucs(const char *src, const int maxlen, const bool is_unicode, | |||
252 | return dst; | 257 | return dst; |
253 | } | 258 | } |
254 | 259 | ||
260 | /* | ||
261 | * Convert 16 bit Unicode pathname to wire format from string in current code | ||
262 | * page. Conversion may involve remapping up the six characters that are | ||
263 | * only legal in POSIX-like OS (if they are present in the string). Path | ||
264 | * names are little endian 16 bit Unicode on the wire | ||
265 | */ | ||
266 | int | ||
267 | cifsConvertToUCS(__le16 *target, const char *source, int maxlen, | ||
268 | const struct nls_table *cp, int mapChars) | ||
269 | { | ||
270 | int i, j, charlen; | ||
271 | int len_remaining = maxlen; | ||
272 | char src_char; | ||
273 | __u16 temp; | ||
274 | |||
275 | if (!mapChars) | ||
276 | return cifs_strtoUCS(target, source, PATH_MAX, cp); | ||
277 | |||
278 | for (i = 0, j = 0; i < maxlen; j++) { | ||
279 | src_char = source[i]; | ||
280 | switch (src_char) { | ||
281 | case 0: | ||
282 | put_unaligned_le16(0, &target[j]); | ||
283 | goto ctoUCS_out; | ||
284 | case ':': | ||
285 | temp = UNI_COLON; | ||
286 | break; | ||
287 | case '*': | ||
288 | temp = UNI_ASTERIK; | ||
289 | break; | ||
290 | case '?': | ||
291 | temp = UNI_QUESTION; | ||
292 | break; | ||
293 | case '<': | ||
294 | temp = UNI_LESSTHAN; | ||
295 | break; | ||
296 | case '>': | ||
297 | temp = UNI_GRTRTHAN; | ||
298 | break; | ||
299 | case '|': | ||
300 | temp = UNI_PIPE; | ||
301 | break; | ||
302 | /* | ||
303 | * FIXME: We can not handle remapping backslash (UNI_SLASH) | ||
304 | * until all the calls to build_path_from_dentry are modified, | ||
305 | * as they use backslash as separator. | ||
306 | */ | ||
307 | default: | ||
308 | charlen = cp->char2uni(source+i, len_remaining, | ||
309 | &temp); | ||
310 | /* | ||
311 | * if no match, use question mark, which at least in | ||
312 | * some cases serves as wild card | ||
313 | */ | ||
314 | if (charlen < 1) { | ||
315 | temp = 0x003f; | ||
316 | charlen = 1; | ||
317 | } | ||
318 | len_remaining -= charlen; | ||
319 | /* | ||
320 | * character may take more than one byte in the source | ||
321 | * string, but will take exactly two bytes in the | ||
322 | * target string | ||
323 | */ | ||
324 | i += charlen; | ||
325 | continue; | ||
326 | } | ||
327 | put_unaligned_le16(temp, &target[j]); | ||
328 | i++; /* move to next char in source string */ | ||
329 | len_remaining--; | ||
330 | } | ||
331 | |||
332 | ctoUCS_out: | ||
333 | return i; | ||
334 | } | ||
335 | |||
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index a437ec391a01..beeebf194234 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c | |||
@@ -41,9 +41,12 @@ static struct cifs_wksid wksidarr[NUM_WK_SIDS] = { | |||
41 | ; | 41 | ; |
42 | 42 | ||
43 | 43 | ||
44 | /* security id for everyone */ | 44 | /* security id for everyone/world system group */ |
45 | static const struct cifs_sid sid_everyone = { | 45 | static const struct cifs_sid sid_everyone = { |
46 | 1, 1, {0, 0, 0, 0, 0, 1}, {0} }; | 46 | 1, 1, {0, 0, 0, 0, 0, 1}, {0} }; |
47 | /* security id for Authenticated Users system group */ | ||
48 | static const struct cifs_sid sid_authusers = { | ||
49 | 1, 1, {0, 0, 0, 0, 0, 5}, {11} }; | ||
47 | /* group users */ | 50 | /* group users */ |
48 | static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} }; | 51 | static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} }; |
49 | 52 | ||
@@ -365,10 +368,14 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, | |||
365 | if (num_aces > 0) { | 368 | if (num_aces > 0) { |
366 | umode_t user_mask = S_IRWXU; | 369 | umode_t user_mask = S_IRWXU; |
367 | umode_t group_mask = S_IRWXG; | 370 | umode_t group_mask = S_IRWXG; |
368 | umode_t other_mask = S_IRWXO; | 371 | umode_t other_mask = S_IRWXU | S_IRWXG | S_IRWXO; |
369 | 372 | ||
370 | ppace = kmalloc(num_aces * sizeof(struct cifs_ace *), | 373 | ppace = kmalloc(num_aces * sizeof(struct cifs_ace *), |
371 | GFP_KERNEL); | 374 | GFP_KERNEL); |
375 | if (!ppace) { | ||
376 | cERROR(1, "DACL memory allocation error"); | ||
377 | return; | ||
378 | } | ||
372 | 379 | ||
373 | for (i = 0; i < num_aces; ++i) { | 380 | for (i = 0; i < num_aces; ++i) { |
374 | ppace[i] = (struct cifs_ace *) (acl_base + acl_size); | 381 | ppace[i] = (struct cifs_ace *) (acl_base + acl_size); |
@@ -390,6 +397,12 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, | |||
390 | ppace[i]->type, | 397 | ppace[i]->type, |
391 | &fattr->cf_mode, | 398 | &fattr->cf_mode, |
392 | &other_mask); | 399 | &other_mask); |
400 | if (compare_sids(&(ppace[i]->sid), &sid_authusers)) | ||
401 | access_flags_to_mode(ppace[i]->access_req, | ||
402 | ppace[i]->type, | ||
403 | &fattr->cf_mode, | ||
404 | &other_mask); | ||
405 | |||
393 | 406 | ||
394 | /* memcpy((void *)(&(cifscred->aces[i])), | 407 | /* memcpy((void *)(&(cifscred->aces[i])), |
395 | (void *)ppace[i], | 408 | (void *)ppace[i], |
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 66f3d50d0676..a51585f9852b 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c | |||
@@ -24,7 +24,6 @@ | |||
24 | #include "cifspdu.h" | 24 | #include "cifspdu.h" |
25 | #include "cifsglob.h" | 25 | #include "cifsglob.h" |
26 | #include "cifs_debug.h" | 26 | #include "cifs_debug.h" |
27 | #include "md5.h" | ||
28 | #include "cifs_unicode.h" | 27 | #include "cifs_unicode.h" |
29 | #include "cifsproto.h" | 28 | #include "cifsproto.h" |
30 | #include "ntlmssp.h" | 29 | #include "ntlmssp.h" |
@@ -37,11 +36,6 @@ | |||
37 | /* Note that the smb header signature field on input contains the | 36 | /* Note that the smb header signature field on input contains the |
38 | sequence number before this function is called */ | 37 | sequence number before this function is called */ |
39 | 38 | ||
40 | extern void mdfour(unsigned char *out, unsigned char *in, int n); | ||
41 | extern void E_md4hash(const unsigned char *passwd, unsigned char *p16); | ||
42 | extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8, | ||
43 | unsigned char *p24); | ||
44 | |||
45 | static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, | 39 | static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, |
46 | struct TCP_Server_Info *server, char *signature) | 40 | struct TCP_Server_Info *server, char *signature) |
47 | { | 41 | { |
@@ -234,6 +228,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu, | |||
234 | /* first calculate 24 bytes ntlm response and then 16 byte session key */ | 228 | /* first calculate 24 bytes ntlm response and then 16 byte session key */ |
235 | int setup_ntlm_response(struct cifsSesInfo *ses) | 229 | int setup_ntlm_response(struct cifsSesInfo *ses) |
236 | { | 230 | { |
231 | int rc = 0; | ||
237 | unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE; | 232 | unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE; |
238 | char temp_key[CIFS_SESS_KEY_SIZE]; | 233 | char temp_key[CIFS_SESS_KEY_SIZE]; |
239 | 234 | ||
@@ -247,13 +242,26 @@ int setup_ntlm_response(struct cifsSesInfo *ses) | |||
247 | } | 242 | } |
248 | ses->auth_key.len = temp_len; | 243 | ses->auth_key.len = temp_len; |
249 | 244 | ||
250 | SMBNTencrypt(ses->password, ses->server->cryptkey, | 245 | rc = SMBNTencrypt(ses->password, ses->server->cryptkey, |
251 | ses->auth_key.response + CIFS_SESS_KEY_SIZE); | 246 | ses->auth_key.response + CIFS_SESS_KEY_SIZE); |
247 | if (rc) { | ||
248 | cFYI(1, "%s Can't generate NTLM response, error: %d", | ||
249 | __func__, rc); | ||
250 | return rc; | ||
251 | } | ||
252 | 252 | ||
253 | E_md4hash(ses->password, temp_key); | 253 | rc = E_md4hash(ses->password, temp_key); |
254 | mdfour(ses->auth_key.response, temp_key, CIFS_SESS_KEY_SIZE); | 254 | if (rc) { |
255 | cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc); | ||
256 | return rc; | ||
257 | } | ||
255 | 258 | ||
256 | return 0; | 259 | rc = mdfour(ses->auth_key.response, temp_key, CIFS_SESS_KEY_SIZE); |
260 | if (rc) | ||
261 | cFYI(1, "%s Can't generate NTLM session key, error: %d", | ||
262 | __func__, rc); | ||
263 | |||
264 | return rc; | ||
257 | } | 265 | } |
258 | 266 | ||
259 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | 267 | #ifdef CONFIG_CIFS_WEAK_PW_HASH |
@@ -649,9 +657,10 @@ calc_seckey(struct cifsSesInfo *ses) | |||
649 | get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE); | 657 | get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE); |
650 | 658 | ||
651 | tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); | 659 | tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); |
652 | if (!tfm_arc4 || IS_ERR(tfm_arc4)) { | 660 | if (IS_ERR(tfm_arc4)) { |
661 | rc = PTR_ERR(tfm_arc4); | ||
653 | cERROR(1, "could not allocate crypto API arc4\n"); | 662 | cERROR(1, "could not allocate crypto API arc4\n"); |
654 | return PTR_ERR(tfm_arc4); | 663 | return rc; |
655 | } | 664 | } |
656 | 665 | ||
657 | desc.tfm = tfm_arc4; | 666 | desc.tfm = tfm_arc4; |
@@ -700,14 +709,13 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server) | |||
700 | unsigned int size; | 709 | unsigned int size; |
701 | 710 | ||
702 | server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0); | 711 | server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0); |
703 | if (!server->secmech.hmacmd5 || | 712 | if (IS_ERR(server->secmech.hmacmd5)) { |
704 | IS_ERR(server->secmech.hmacmd5)) { | ||
705 | cERROR(1, "could not allocate crypto hmacmd5\n"); | 713 | cERROR(1, "could not allocate crypto hmacmd5\n"); |
706 | return PTR_ERR(server->secmech.hmacmd5); | 714 | return PTR_ERR(server->secmech.hmacmd5); |
707 | } | 715 | } |
708 | 716 | ||
709 | server->secmech.md5 = crypto_alloc_shash("md5", 0, 0); | 717 | server->secmech.md5 = crypto_alloc_shash("md5", 0, 0); |
710 | if (!server->secmech.md5 || IS_ERR(server->secmech.md5)) { | 718 | if (IS_ERR(server->secmech.md5)) { |
711 | cERROR(1, "could not allocate crypto md5\n"); | 719 | cERROR(1, "could not allocate crypto md5\n"); |
712 | rc = PTR_ERR(server->secmech.md5); | 720 | rc = PTR_ERR(server->secmech.md5); |
713 | goto crypto_allocate_md5_fail; | 721 | goto crypto_allocate_md5_fail; |
diff --git a/fs/cifs/cifsencrypt.h b/fs/cifs/cifsencrypt.h deleted file mode 100644 index 15d2ec006474..000000000000 --- a/fs/cifs/cifsencrypt.h +++ /dev/null | |||
@@ -1,33 +0,0 @@ | |||
1 | /* | ||
2 | * fs/cifs/cifsencrypt.h | ||
3 | * | ||
4 | * Copyright (c) International Business Machines Corp., 2005 | ||
5 | * Author(s): Steve French (sfrench@us.ibm.com) | ||
6 | * | ||
7 | * Externs for misc. small encryption routines | ||
8 | * so we do not have to put them in cifsproto.h | ||
9 | * | ||
10 | * This library is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU Lesser General Public License as published | ||
12 | * by the Free Software Foundation; either version 2.1 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This library is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
18 | * the GNU Lesser General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU Lesser General Public License | ||
21 | * along with this library; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
23 | */ | ||
24 | |||
25 | /* md4.c */ | ||
26 | extern void mdfour(unsigned char *out, unsigned char *in, int n); | ||
27 | /* smbdes.c */ | ||
28 | extern void E_P16(unsigned char *p14, unsigned char *p16); | ||
29 | extern void E_P24(unsigned char *p21, const unsigned char *c8, | ||
30 | unsigned char *p24); | ||
31 | |||
32 | |||
33 | |||
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index d9f652a522a6..f2970136d17d 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -77,7 +77,11 @@ unsigned int cifs_max_pending = CIFS_MAX_REQ; | |||
77 | module_param(cifs_max_pending, int, 0); | 77 | module_param(cifs_max_pending, int, 0); |
78 | MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server. " | 78 | MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server. " |
79 | "Default: 50 Range: 2 to 256"); | 79 | "Default: 50 Range: 2 to 256"); |
80 | 80 | unsigned short echo_retries = 5; | |
81 | module_param(echo_retries, ushort, 0644); | ||
82 | MODULE_PARM_DESC(echo_retries, "Number of echo attempts before giving up and " | ||
83 | "reconnecting server. Default: 5. 0 means " | ||
84 | "never reconnect."); | ||
81 | extern mempool_t *cifs_sm_req_poolp; | 85 | extern mempool_t *cifs_sm_req_poolp; |
82 | extern mempool_t *cifs_req_poolp; | 86 | extern mempool_t *cifs_req_poolp; |
83 | extern mempool_t *cifs_mid_poolp; | 87 | extern mempool_t *cifs_mid_poolp; |
@@ -596,10 +600,17 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
596 | { | 600 | { |
597 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; | 601 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; |
598 | ssize_t written; | 602 | ssize_t written; |
603 | int rc; | ||
599 | 604 | ||
600 | written = generic_file_aio_write(iocb, iov, nr_segs, pos); | 605 | written = generic_file_aio_write(iocb, iov, nr_segs, pos); |
601 | if (!CIFS_I(inode)->clientCanCacheAll) | 606 | |
602 | filemap_fdatawrite(inode->i_mapping); | 607 | if (CIFS_I(inode)->clientCanCacheAll) |
608 | return written; | ||
609 | |||
610 | rc = filemap_fdatawrite(inode->i_mapping); | ||
611 | if (rc) | ||
612 | cFYI(1, "cifs_file_aio_write: %d rc on %p inode", rc, inode); | ||
613 | |||
603 | return written; | 614 | return written; |
604 | } | 615 | } |
605 | 616 | ||
@@ -729,6 +740,25 @@ const struct file_operations cifs_file_ops = { | |||
729 | .setlease = cifs_setlease, | 740 | .setlease = cifs_setlease, |
730 | }; | 741 | }; |
731 | 742 | ||
743 | const struct file_operations cifs_file_strict_ops = { | ||
744 | .read = do_sync_read, | ||
745 | .write = do_sync_write, | ||
746 | .aio_read = cifs_strict_readv, | ||
747 | .aio_write = cifs_strict_writev, | ||
748 | .open = cifs_open, | ||
749 | .release = cifs_close, | ||
750 | .lock = cifs_lock, | ||
751 | .fsync = cifs_strict_fsync, | ||
752 | .flush = cifs_flush, | ||
753 | .mmap = cifs_file_strict_mmap, | ||
754 | .splice_read = generic_file_splice_read, | ||
755 | .llseek = cifs_llseek, | ||
756 | #ifdef CONFIG_CIFS_POSIX | ||
757 | .unlocked_ioctl = cifs_ioctl, | ||
758 | #endif /* CONFIG_CIFS_POSIX */ | ||
759 | .setlease = cifs_setlease, | ||
760 | }; | ||
761 | |||
732 | const struct file_operations cifs_file_direct_ops = { | 762 | const struct file_operations cifs_file_direct_ops = { |
733 | /* no aio, no readv - | 763 | /* no aio, no readv - |
734 | BB reevaluate whether they can be done with directio, no cache */ | 764 | BB reevaluate whether they can be done with directio, no cache */ |
@@ -747,6 +777,7 @@ const struct file_operations cifs_file_direct_ops = { | |||
747 | .llseek = cifs_llseek, | 777 | .llseek = cifs_llseek, |
748 | .setlease = cifs_setlease, | 778 | .setlease = cifs_setlease, |
749 | }; | 779 | }; |
780 | |||
750 | const struct file_operations cifs_file_nobrl_ops = { | 781 | const struct file_operations cifs_file_nobrl_ops = { |
751 | .read = do_sync_read, | 782 | .read = do_sync_read, |
752 | .write = do_sync_write, | 783 | .write = do_sync_write, |
@@ -765,6 +796,24 @@ const struct file_operations cifs_file_nobrl_ops = { | |||
765 | .setlease = cifs_setlease, | 796 | .setlease = cifs_setlease, |
766 | }; | 797 | }; |
767 | 798 | ||
799 | const struct file_operations cifs_file_strict_nobrl_ops = { | ||
800 | .read = do_sync_read, | ||
801 | .write = do_sync_write, | ||
802 | .aio_read = cifs_strict_readv, | ||
803 | .aio_write = cifs_strict_writev, | ||
804 | .open = cifs_open, | ||
805 | .release = cifs_close, | ||
806 | .fsync = cifs_strict_fsync, | ||
807 | .flush = cifs_flush, | ||
808 | .mmap = cifs_file_strict_mmap, | ||
809 | .splice_read = generic_file_splice_read, | ||
810 | .llseek = cifs_llseek, | ||
811 | #ifdef CONFIG_CIFS_POSIX | ||
812 | .unlocked_ioctl = cifs_ioctl, | ||
813 | #endif /* CONFIG_CIFS_POSIX */ | ||
814 | .setlease = cifs_setlease, | ||
815 | }; | ||
816 | |||
768 | const struct file_operations cifs_file_direct_nobrl_ops = { | 817 | const struct file_operations cifs_file_direct_nobrl_ops = { |
769 | /* no mmap, no aio, no readv - | 818 | /* no mmap, no aio, no readv - |
770 | BB reevaluate whether they can be done with directio, no cache */ | 819 | BB reevaluate whether they can be done with directio, no cache */ |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 897b2b2b28b5..4a3330235d55 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -61,6 +61,7 @@ extern int cifs_rename(struct inode *, struct dentry *, struct inode *, | |||
61 | struct dentry *); | 61 | struct dentry *); |
62 | extern int cifs_revalidate_file(struct file *filp); | 62 | extern int cifs_revalidate_file(struct file *filp); |
63 | extern int cifs_revalidate_dentry(struct dentry *); | 63 | extern int cifs_revalidate_dentry(struct dentry *); |
64 | extern void cifs_invalidate_mapping(struct inode *inode); | ||
64 | extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *); | 65 | extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *); |
65 | extern int cifs_setattr(struct dentry *, struct iattr *); | 66 | extern int cifs_setattr(struct dentry *, struct iattr *); |
66 | 67 | ||
@@ -72,19 +73,27 @@ extern const struct inode_operations cifs_dfs_referral_inode_operations; | |||
72 | /* Functions related to files and directories */ | 73 | /* Functions related to files and directories */ |
73 | extern const struct file_operations cifs_file_ops; | 74 | extern const struct file_operations cifs_file_ops; |
74 | extern const struct file_operations cifs_file_direct_ops; /* if directio mnt */ | 75 | extern const struct file_operations cifs_file_direct_ops; /* if directio mnt */ |
75 | extern const struct file_operations cifs_file_nobrl_ops; | 76 | extern const struct file_operations cifs_file_strict_ops; /* if strictio mnt */ |
76 | extern const struct file_operations cifs_file_direct_nobrl_ops; /* no brlocks */ | 77 | extern const struct file_operations cifs_file_nobrl_ops; /* no brlocks */ |
78 | extern const struct file_operations cifs_file_direct_nobrl_ops; | ||
79 | extern const struct file_operations cifs_file_strict_nobrl_ops; | ||
77 | extern int cifs_open(struct inode *inode, struct file *file); | 80 | extern int cifs_open(struct inode *inode, struct file *file); |
78 | extern int cifs_close(struct inode *inode, struct file *file); | 81 | extern int cifs_close(struct inode *inode, struct file *file); |
79 | extern int cifs_closedir(struct inode *inode, struct file *file); | 82 | extern int cifs_closedir(struct inode *inode, struct file *file); |
80 | extern ssize_t cifs_user_read(struct file *file, char __user *read_data, | 83 | extern ssize_t cifs_user_read(struct file *file, char __user *read_data, |
81 | size_t read_size, loff_t *poffset); | 84 | size_t read_size, loff_t *poffset); |
85 | extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, | ||
86 | unsigned long nr_segs, loff_t pos); | ||
82 | extern ssize_t cifs_user_write(struct file *file, const char __user *write_data, | 87 | extern ssize_t cifs_user_write(struct file *file, const char __user *write_data, |
83 | size_t write_size, loff_t *poffset); | 88 | size_t write_size, loff_t *poffset); |
89 | extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, | ||
90 | unsigned long nr_segs, loff_t pos); | ||
84 | extern int cifs_lock(struct file *, int, struct file_lock *); | 91 | extern int cifs_lock(struct file *, int, struct file_lock *); |
85 | extern int cifs_fsync(struct file *, int); | 92 | extern int cifs_fsync(struct file *, int); |
93 | extern int cifs_strict_fsync(struct file *, int); | ||
86 | extern int cifs_flush(struct file *, fl_owner_t id); | 94 | extern int cifs_flush(struct file *, fl_owner_t id); |
87 | extern int cifs_file_mmap(struct file * , struct vm_area_struct *); | 95 | extern int cifs_file_mmap(struct file * , struct vm_area_struct *); |
96 | extern int cifs_file_strict_mmap(struct file * , struct vm_area_struct *); | ||
88 | extern const struct file_operations cifs_dir_ops; | 97 | extern const struct file_operations cifs_dir_ops; |
89 | extern int cifs_dir_open(struct inode *inode, struct file *file); | 98 | extern int cifs_dir_open(struct inode *inode, struct file *file); |
90 | extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir); | 99 | extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir); |
@@ -93,6 +102,12 @@ extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir); | |||
93 | extern const struct dentry_operations cifs_dentry_ops; | 102 | extern const struct dentry_operations cifs_dentry_ops; |
94 | extern const struct dentry_operations cifs_ci_dentry_ops; | 103 | extern const struct dentry_operations cifs_ci_dentry_ops; |
95 | 104 | ||
105 | #ifdef CONFIG_CIFS_DFS_UPCALL | ||
106 | extern struct vfsmount *cifs_dfs_d_automount(struct path *path); | ||
107 | #else | ||
108 | #define cifs_dfs_d_automount NULL | ||
109 | #endif | ||
110 | |||
96 | /* Functions related to symlinks */ | 111 | /* Functions related to symlinks */ |
97 | extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd); | 112 | extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd); |
98 | extern void cifs_put_link(struct dentry *direntry, | 113 | extern void cifs_put_link(struct dentry *direntry, |
@@ -112,5 +127,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); | |||
112 | extern const struct export_operations cifs_export_ops; | 127 | extern const struct export_operations cifs_export_ops; |
113 | #endif /* EXPERIMENTAL */ | 128 | #endif /* EXPERIMENTAL */ |
114 | 129 | ||
115 | #define CIFS_VERSION "1.68" | 130 | #define CIFS_VERSION "1.70" |
116 | #endif /* _CIFSFS_H */ | 131 | #endif /* _CIFSFS_H */ |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 606ca8bb7102..17afb0fbcaed 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -161,46 +161,41 @@ struct TCP_Server_Info { | |||
161 | int srv_count; /* reference counter */ | 161 | int srv_count; /* reference counter */ |
162 | /* 15 character server name + 0x20 16th byte indicating type = srv */ | 162 | /* 15 character server name + 0x20 16th byte indicating type = srv */ |
163 | char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; | 163 | char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; |
164 | enum statusEnum tcpStatus; /* what we think the status is */ | ||
164 | char *hostname; /* hostname portion of UNC string */ | 165 | char *hostname; /* hostname portion of UNC string */ |
165 | struct socket *ssocket; | 166 | struct socket *ssocket; |
166 | struct sockaddr_storage dstaddr; | 167 | struct sockaddr_storage dstaddr; |
167 | struct sockaddr_storage srcaddr; /* locally bind to this IP */ | 168 | struct sockaddr_storage srcaddr; /* locally bind to this IP */ |
169 | #ifdef CONFIG_NET_NS | ||
170 | struct net *net; | ||
171 | #endif | ||
168 | wait_queue_head_t response_q; | 172 | wait_queue_head_t response_q; |
169 | wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ | 173 | wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ |
170 | struct list_head pending_mid_q; | 174 | struct list_head pending_mid_q; |
171 | void *Server_NlsInfo; /* BB - placeholder for future NLS info */ | ||
172 | unsigned short server_codepage; /* codepage for the server */ | ||
173 | enum protocolEnum protocolType; | ||
174 | char versionMajor; | ||
175 | char versionMinor; | ||
176 | bool svlocal:1; /* local server or remote */ | ||
177 | bool noblocksnd; /* use blocking sendmsg */ | 175 | bool noblocksnd; /* use blocking sendmsg */ |
178 | bool noautotune; /* do not autotune send buf sizes */ | 176 | bool noautotune; /* do not autotune send buf sizes */ |
179 | bool tcp_nodelay; | 177 | bool tcp_nodelay; |
180 | atomic_t inFlight; /* number of requests on the wire to server */ | 178 | atomic_t inFlight; /* number of requests on the wire to server */ |
181 | #ifdef CONFIG_CIFS_STATS2 | ||
182 | atomic_t inSend; /* requests trying to send */ | ||
183 | atomic_t num_waiters; /* blocked waiting to get in sendrecv */ | ||
184 | #endif | ||
185 | enum statusEnum tcpStatus; /* what we think the status is */ | ||
186 | struct mutex srv_mutex; | 179 | struct mutex srv_mutex; |
187 | struct task_struct *tsk; | 180 | struct task_struct *tsk; |
188 | char server_GUID[16]; | 181 | char server_GUID[16]; |
189 | char secMode; | 182 | char secMode; |
183 | bool session_estab; /* mark when very first sess is established */ | ||
184 | u16 dialect; /* dialect index that server chose */ | ||
190 | enum securityEnum secType; | 185 | enum securityEnum secType; |
191 | unsigned int maxReq; /* Clients should submit no more */ | 186 | unsigned int maxReq; /* Clients should submit no more */ |
192 | /* than maxReq distinct unanswered SMBs to the server when using */ | 187 | /* than maxReq distinct unanswered SMBs to the server when using */ |
193 | /* multiplexed reads or writes */ | 188 | /* multiplexed reads or writes */ |
194 | unsigned int maxBuf; /* maxBuf specifies the maximum */ | 189 | unsigned int maxBuf; /* maxBuf specifies the maximum */ |
195 | /* message size the server can send or receive for non-raw SMBs */ | 190 | /* message size the server can send or receive for non-raw SMBs */ |
191 | /* maxBuf is returned by SMB NegotiateProtocol so maxBuf is only 0 */ | ||
192 | /* when socket is setup (and during reconnect) before NegProt sent */ | ||
196 | unsigned int max_rw; /* maxRw specifies the maximum */ | 193 | unsigned int max_rw; /* maxRw specifies the maximum */ |
197 | /* message size the server can send or receive for */ | 194 | /* message size the server can send or receive for */ |
198 | /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */ | 195 | /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */ |
199 | unsigned int max_vcs; /* maximum number of smb sessions, at least | 196 | unsigned int max_vcs; /* maximum number of smb sessions, at least |
200 | those that can be specified uniquely with | 197 | those that can be specified uniquely with |
201 | vcnumbers */ | 198 | vcnumbers */ |
202 | char sessid[4]; /* unique token id for this session */ | ||
203 | /* (returned on Negotiate */ | ||
204 | int capabilities; /* allow selective disabling of caps by smb sess */ | 199 | int capabilities; /* allow selective disabling of caps by smb sess */ |
205 | int timeAdj; /* Adjust for difference in server time zone in sec */ | 200 | int timeAdj; /* Adjust for difference in server time zone in sec */ |
206 | __u16 CurrentMid; /* multiplex id - rotating counter */ | 201 | __u16 CurrentMid; /* multiplex id - rotating counter */ |
@@ -210,20 +205,53 @@ struct TCP_Server_Info { | |||
210 | __u32 sequence_number; /* for signing, protected by srv_mutex */ | 205 | __u32 sequence_number; /* for signing, protected by srv_mutex */ |
211 | struct session_key session_key; | 206 | struct session_key session_key; |
212 | unsigned long lstrp; /* when we got last response from this server */ | 207 | unsigned long lstrp; /* when we got last response from this server */ |
213 | u16 dialect; /* dialect index that server chose */ | ||
214 | struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */ | 208 | struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */ |
215 | /* extended security flavors that server supports */ | 209 | /* extended security flavors that server supports */ |
210 | bool sec_ntlmssp; /* supports NTLMSSP */ | ||
211 | bool sec_kerberosu2u; /* supports U2U Kerberos */ | ||
216 | bool sec_kerberos; /* supports plain Kerberos */ | 212 | bool sec_kerberos; /* supports plain Kerberos */ |
217 | bool sec_mskerberos; /* supports legacy MS Kerberos */ | 213 | bool sec_mskerberos; /* supports legacy MS Kerberos */ |
218 | bool sec_kerberosu2u; /* supports U2U Kerberos */ | 214 | struct delayed_work echo; /* echo ping workqueue job */ |
219 | bool sec_ntlmssp; /* supports NTLMSSP */ | ||
220 | bool session_estab; /* mark when very first sess is established */ | ||
221 | #ifdef CONFIG_CIFS_FSCACHE | 215 | #ifdef CONFIG_CIFS_FSCACHE |
222 | struct fscache_cookie *fscache; /* client index cache cookie */ | 216 | struct fscache_cookie *fscache; /* client index cache cookie */ |
223 | #endif | 217 | #endif |
218 | #ifdef CONFIG_CIFS_STATS2 | ||
219 | atomic_t inSend; /* requests trying to send */ | ||
220 | atomic_t num_waiters; /* blocked waiting to get in sendrecv */ | ||
221 | #endif | ||
224 | }; | 222 | }; |
225 | 223 | ||
226 | /* | 224 | /* |
225 | * Macros to allow the TCP_Server_Info->net field and related code to drop out | ||
226 | * when CONFIG_NET_NS isn't set. | ||
227 | */ | ||
228 | |||
229 | #ifdef CONFIG_NET_NS | ||
230 | |||
231 | static inline struct net *cifs_net_ns(struct TCP_Server_Info *srv) | ||
232 | { | ||
233 | return srv->net; | ||
234 | } | ||
235 | |||
236 | static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net) | ||
237 | { | ||
238 | srv->net = net; | ||
239 | } | ||
240 | |||
241 | #else | ||
242 | |||
243 | static inline struct net *cifs_net_ns(struct TCP_Server_Info *srv) | ||
244 | { | ||
245 | return &init_net; | ||
246 | } | ||
247 | |||
248 | static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net) | ||
249 | { | ||
250 | } | ||
251 | |||
252 | #endif | ||
253 | |||
254 | /* | ||
227 | * Session structure. One of these for each uid session with a particular host | 255 | * Session structure. One of these for each uid session with a particular host |
228 | */ | 256 | */ |
229 | struct cifsSesInfo { | 257 | struct cifsSesInfo { |
@@ -446,11 +474,11 @@ struct cifsInodeInfo { | |||
446 | /* BB add in lists for dirty pages i.e. write caching info for oplock */ | 474 | /* BB add in lists for dirty pages i.e. write caching info for oplock */ |
447 | struct list_head openFileList; | 475 | struct list_head openFileList; |
448 | __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ | 476 | __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ |
449 | unsigned long time; /* jiffies of last update/check of inode */ | 477 | bool clientCanCacheRead; /* read oplock */ |
450 | bool clientCanCacheRead:1; /* read oplock */ | 478 | bool clientCanCacheAll; /* read and writebehind oplock */ |
451 | bool clientCanCacheAll:1; /* read and writebehind oplock */ | 479 | bool delete_pending; /* DELETE_ON_CLOSE is set */ |
452 | bool delete_pending:1; /* DELETE_ON_CLOSE is set */ | 480 | bool invalid_mapping; /* pagecache is invalid */ |
453 | bool invalid_mapping:1; /* pagecache is invalid */ | 481 | unsigned long time; /* jiffies of last update of inode */ |
454 | u64 server_eof; /* current file size on server */ | 482 | u64 server_eof; /* current file size on server */ |
455 | u64 uniqueid; /* server inode number */ | 483 | u64 uniqueid; /* server inode number */ |
456 | u64 createtime; /* creation time on server */ | 484 | u64 createtime; /* creation time on server */ |
@@ -508,6 +536,18 @@ static inline void cifs_stats_bytes_read(struct cifsTconInfo *tcon, | |||
508 | 536 | ||
509 | #endif | 537 | #endif |
510 | 538 | ||
539 | struct mid_q_entry; | ||
540 | |||
541 | /* | ||
542 | * This is the prototype for the mid callback function. When creating one, | ||
543 | * take special care to avoid deadlocks. Things to bear in mind: | ||
544 | * | ||
545 | * - it will be called by cifsd | ||
546 | * - the GlobalMid_Lock will be held | ||
547 | * - the mid will be removed from the pending_mid_q list | ||
548 | */ | ||
549 | typedef void (mid_callback_t)(struct mid_q_entry *mid); | ||
550 | |||
511 | /* one of these for every pending CIFS request to the server */ | 551 | /* one of these for every pending CIFS request to the server */ |
512 | struct mid_q_entry { | 552 | struct mid_q_entry { |
513 | struct list_head qhead; /* mids waiting on reply from this server */ | 553 | struct list_head qhead; /* mids waiting on reply from this server */ |
@@ -519,7 +559,8 @@ struct mid_q_entry { | |||
519 | unsigned long when_sent; /* time when smb send finished */ | 559 | unsigned long when_sent; /* time when smb send finished */ |
520 | unsigned long when_received; /* when demux complete (taken off wire) */ | 560 | unsigned long when_received; /* when demux complete (taken off wire) */ |
521 | #endif | 561 | #endif |
522 | struct task_struct *tsk; /* task waiting for response */ | 562 | mid_callback_t *callback; /* call completion callback */ |
563 | void *callback_data; /* general purpose pointer for callback */ | ||
523 | struct smb_hdr *resp_buf; /* response buffer */ | 564 | struct smb_hdr *resp_buf; /* response buffer */ |
524 | int midState; /* wish this were enum but can not pass to wait_event */ | 565 | int midState; /* wish this were enum but can not pass to wait_event */ |
525 | __u8 command; /* smb command code */ | 566 | __u8 command; /* smb command code */ |
@@ -613,7 +654,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param, | |||
613 | #define MID_REQUEST_SUBMITTED 2 | 654 | #define MID_REQUEST_SUBMITTED 2 |
614 | #define MID_RESPONSE_RECEIVED 4 | 655 | #define MID_RESPONSE_RECEIVED 4 |
615 | #define MID_RETRY_NEEDED 8 /* session closed while this request out */ | 656 | #define MID_RETRY_NEEDED 8 /* session closed while this request out */ |
616 | #define MID_NO_RESP_NEEDED 0x10 | 657 | #define MID_RESPONSE_MALFORMED 0x10 |
617 | 658 | ||
618 | /* Types of response buffer returned from SendReceive2 */ | 659 | /* Types of response buffer returned from SendReceive2 */ |
619 | #define CIFS_NO_BUFFER 0 /* Response buffer not returned */ | 660 | #define CIFS_NO_BUFFER 0 /* Response buffer not returned */ |
@@ -622,12 +663,9 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param, | |||
622 | #define CIFS_IOVEC 4 /* array of response buffers */ | 663 | #define CIFS_IOVEC 4 /* array of response buffers */ |
623 | 664 | ||
624 | /* Type of Request to SendReceive2 */ | 665 | /* Type of Request to SendReceive2 */ |
625 | #define CIFS_STD_OP 0 /* normal request timeout */ | 666 | #define CIFS_BLOCKING_OP 1 /* operation can block */ |
626 | #define CIFS_LONG_OP 1 /* long op (up to 45 sec, oplock time) */ | 667 | #define CIFS_ASYNC_OP 2 /* do not wait for response */ |
627 | #define CIFS_VLONG_OP 2 /* sloow op - can take up to 180 seconds */ | 668 | #define CIFS_TIMEOUT_MASK 0x003 /* only one of above set in req */ |
628 | #define CIFS_BLOCKING_OP 4 /* operation can block */ | ||
629 | #define CIFS_ASYNC_OP 8 /* do not wait for response */ | ||
630 | #define CIFS_TIMEOUT_MASK 0x00F /* only one of 5 above set in req */ | ||
631 | #define CIFS_LOG_ERROR 0x010 /* log NT STATUS if non-zero */ | 669 | #define CIFS_LOG_ERROR 0x010 /* log NT STATUS if non-zero */ |
632 | #define CIFS_LARGE_BUF_OP 0x020 /* large request buffer */ | 670 | #define CIFS_LARGE_BUF_OP 0x020 /* large request buffer */ |
633 | #define CIFS_NO_RESP 0x040 /* no response buffer required */ | 671 | #define CIFS_NO_RESP 0x040 /* no response buffer required */ |
@@ -790,6 +828,9 @@ GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */ | |||
790 | GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */ | 828 | GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */ |
791 | GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/ | 829 | GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/ |
792 | 830 | ||
831 | /* reconnect after this many failed echo attempts */ | ||
832 | GLOBAL_EXTERN unsigned short echo_retries; | ||
833 | |||
793 | void cifs_oplock_break(struct work_struct *work); | 834 | void cifs_oplock_break(struct work_struct *work); |
794 | void cifs_oplock_break_get(struct cifsFileInfo *cfile); | 835 | void cifs_oplock_break_get(struct cifsFileInfo *cfile); |
795 | void cifs_oplock_break_put(struct cifsFileInfo *cfile); | 836 | void cifs_oplock_break_put(struct cifsFileInfo *cfile); |
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index de36b09763a8..b5c8cc5d7a7f 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h | |||
@@ -23,6 +23,7 @@ | |||
23 | #define _CIFSPDU_H | 23 | #define _CIFSPDU_H |
24 | 24 | ||
25 | #include <net/sock.h> | 25 | #include <net/sock.h> |
26 | #include <asm/unaligned.h> | ||
26 | #include "smbfsctl.h" | 27 | #include "smbfsctl.h" |
27 | 28 | ||
28 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | 29 | #ifdef CONFIG_CIFS_WEAK_PW_HASH |
@@ -50,6 +51,7 @@ | |||
50 | #define SMB_COM_SETATTR 0x09 /* trivial response */ | 51 | #define SMB_COM_SETATTR 0x09 /* trivial response */ |
51 | #define SMB_COM_LOCKING_ANDX 0x24 /* trivial response */ | 52 | #define SMB_COM_LOCKING_ANDX 0x24 /* trivial response */ |
52 | #define SMB_COM_COPY 0x29 /* trivial rsp, fail filename ignrd*/ | 53 | #define SMB_COM_COPY 0x29 /* trivial rsp, fail filename ignrd*/ |
54 | #define SMB_COM_ECHO 0x2B /* echo request */ | ||
53 | #define SMB_COM_OPEN_ANDX 0x2D /* Legacy open for old servers */ | 55 | #define SMB_COM_OPEN_ANDX 0x2D /* Legacy open for old servers */ |
54 | #define SMB_COM_READ_ANDX 0x2E | 56 | #define SMB_COM_READ_ANDX 0x2E |
55 | #define SMB_COM_WRITE_ANDX 0x2F | 57 | #define SMB_COM_WRITE_ANDX 0x2F |
@@ -425,11 +427,49 @@ struct smb_hdr { | |||
425 | __u16 Mid; | 427 | __u16 Mid; |
426 | __u8 WordCount; | 428 | __u8 WordCount; |
427 | } __attribute__((packed)); | 429 | } __attribute__((packed)); |
428 | /* given a pointer to an smb_hdr retrieve the value of byte count */ | 430 | |
429 | #define BCC(smb_var) (*(__u16 *)((char *)(smb_var) + sizeof(struct smb_hdr) + (2 * (smb_var)->WordCount))) | 431 | /* given a pointer to an smb_hdr retrieve a char pointer to the byte count */ |
430 | #define BCC_LE(smb_var) (*(__le16 *)((char *)(smb_var) + sizeof(struct smb_hdr) + (2 * (smb_var)->WordCount))) | 432 | #define BCC(smb_var) ((unsigned char *)(smb_var) + sizeof(struct smb_hdr) + \ |
433 | (2 * (smb_var)->WordCount)) | ||
434 | |||
431 | /* given a pointer to an smb_hdr retrieve the pointer to the byte area */ | 435 | /* given a pointer to an smb_hdr retrieve the pointer to the byte area */ |
432 | #define pByteArea(smb_var) ((unsigned char *)(smb_var) + sizeof(struct smb_hdr) + (2 * (smb_var)->WordCount) + 2) | 436 | #define pByteArea(smb_var) (BCC(smb_var) + 2) |
437 | |||
438 | /* get the converted ByteCount for a SMB packet and return it */ | ||
439 | static inline __u16 | ||
440 | get_bcc(struct smb_hdr *hdr) | ||
441 | { | ||
442 | __u16 *bc_ptr = (__u16 *)BCC(hdr); | ||
443 | |||
444 | return get_unaligned(bc_ptr); | ||
445 | } | ||
446 | |||
447 | /* get the unconverted ByteCount for a SMB packet and return it */ | ||
448 | static inline __u16 | ||
449 | get_bcc_le(struct smb_hdr *hdr) | ||
450 | { | ||
451 | __le16 *bc_ptr = (__le16 *)BCC(hdr); | ||
452 | |||
453 | return get_unaligned_le16(bc_ptr); | ||
454 | } | ||
455 | |||
456 | /* set the ByteCount for a SMB packet in host-byte order */ | ||
457 | static inline void | ||
458 | put_bcc(__u16 count, struct smb_hdr *hdr) | ||
459 | { | ||
460 | __u16 *bc_ptr = (__u16 *)BCC(hdr); | ||
461 | |||
462 | put_unaligned(count, bc_ptr); | ||
463 | } | ||
464 | |||
465 | /* set the ByteCount for a SMB packet in little-endian */ | ||
466 | static inline void | ||
467 | put_bcc_le(__u16 count, struct smb_hdr *hdr) | ||
468 | { | ||
469 | __le16 *bc_ptr = (__le16 *)BCC(hdr); | ||
470 | |||
471 | put_unaligned_le16(count, bc_ptr); | ||
472 | } | ||
433 | 473 | ||
434 | /* | 474 | /* |
435 | * Computer Name Length (since Netbios name was length 16 with last byte 0x20) | 475 | * Computer Name Length (since Netbios name was length 16 with last byte 0x20) |
@@ -760,6 +800,20 @@ typedef struct smb_com_tconx_rsp_ext { | |||
760 | * | 800 | * |
761 | */ | 801 | */ |
762 | 802 | ||
803 | typedef struct smb_com_echo_req { | ||
804 | struct smb_hdr hdr; | ||
805 | __le16 EchoCount; | ||
806 | __le16 ByteCount; | ||
807 | char Data[1]; | ||
808 | } __attribute__((packed)) ECHO_REQ; | ||
809 | |||
810 | typedef struct smb_com_echo_rsp { | ||
811 | struct smb_hdr hdr; | ||
812 | __le16 SequenceNumber; | ||
813 | __le16 ByteCount; | ||
814 | char Data[1]; | ||
815 | } __attribute__((packed)) ECHO_RSP; | ||
816 | |||
763 | typedef struct smb_com_logoff_andx_req { | 817 | typedef struct smb_com_logoff_andx_req { |
764 | struct smb_hdr hdr; /* wct = 2 */ | 818 | struct smb_hdr hdr; /* wct = 2 */ |
765 | __u8 AndXCommand; | 819 | __u8 AndXCommand; |
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index e6d1481b16c1..8096f27ad9a8 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h | |||
@@ -61,6 +61,12 @@ extern char *cifs_compose_mount_options(const char *sb_mountdata, | |||
61 | const char *fullpath, const struct dfs_info3_param *ref, | 61 | const char *fullpath, const struct dfs_info3_param *ref, |
62 | char **devname); | 62 | char **devname); |
63 | /* extern void renew_parental_timestamps(struct dentry *direntry);*/ | 63 | /* extern void renew_parental_timestamps(struct dentry *direntry);*/ |
64 | extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer, | ||
65 | struct TCP_Server_Info *server); | ||
66 | extern void DeleteMidQEntry(struct mid_q_entry *midEntry); | ||
67 | extern int cifs_call_async(struct TCP_Server_Info *server, | ||
68 | struct smb_hdr *in_buf, mid_callback_t *callback, | ||
69 | void *cbdata); | ||
64 | extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *, | 70 | extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *, |
65 | struct smb_hdr * /* input */ , | 71 | struct smb_hdr * /* input */ , |
66 | struct smb_hdr * /* out */ , | 72 | struct smb_hdr * /* out */ , |
@@ -79,6 +85,8 @@ extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length); | |||
79 | extern bool is_valid_oplock_break(struct smb_hdr *smb, | 85 | extern bool is_valid_oplock_break(struct smb_hdr *smb, |
80 | struct TCP_Server_Info *); | 86 | struct TCP_Server_Info *); |
81 | extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); | 87 | extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); |
88 | extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, | ||
89 | unsigned int bytes_written); | ||
82 | extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); | 90 | extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); |
83 | extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); | 91 | extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); |
84 | extern unsigned int smbCalcSize(struct smb_hdr *ptr); | 92 | extern unsigned int smbCalcSize(struct smb_hdr *ptr); |
@@ -347,12 +355,13 @@ extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, | |||
347 | const __u16 netfid, const __u64 len, | 355 | const __u16 netfid, const __u64 len, |
348 | const __u64 offset, const __u32 numUnlock, | 356 | const __u64 offset, const __u32 numUnlock, |
349 | const __u32 numLock, const __u8 lockType, | 357 | const __u32 numLock, const __u8 lockType, |
350 | const bool waitFlag); | 358 | const bool waitFlag, const __u8 oplock_level); |
351 | extern int CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, | 359 | extern int CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, |
352 | const __u16 smb_file_id, const int get_flag, | 360 | const __u16 smb_file_id, const int get_flag, |
353 | const __u64 len, struct file_lock *, | 361 | const __u64 len, struct file_lock *, |
354 | const __u16 lock_type, const bool waitFlag); | 362 | const __u16 lock_type, const bool waitFlag); |
355 | extern int CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon); | 363 | extern int CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon); |
364 | extern int CIFSSMBEcho(struct TCP_Server_Info *server); | ||
356 | extern int CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses); | 365 | extern int CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses); |
357 | 366 | ||
358 | extern struct cifsSesInfo *sesInfoAlloc(void); | 367 | extern struct cifsSesInfo *sesInfoAlloc(void); |
@@ -366,7 +375,7 @@ extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *, | |||
366 | extern int cifs_verify_signature(struct smb_hdr *, | 375 | extern int cifs_verify_signature(struct smb_hdr *, |
367 | struct TCP_Server_Info *server, | 376 | struct TCP_Server_Info *server, |
368 | __u32 expected_sequence_number); | 377 | __u32 expected_sequence_number); |
369 | extern void SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *); | 378 | extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *); |
370 | extern int setup_ntlm_response(struct cifsSesInfo *); | 379 | extern int setup_ntlm_response(struct cifsSesInfo *); |
371 | extern int setup_ntlmv2_rsp(struct cifsSesInfo *, const struct nls_table *); | 380 | extern int setup_ntlmv2_rsp(struct cifsSesInfo *, const struct nls_table *); |
372 | extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *); | 381 | extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *); |
@@ -416,4 +425,11 @@ extern bool CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr); | |||
416 | extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr, | 425 | extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr, |
417 | const unsigned char *path, | 426 | const unsigned char *path, |
418 | struct cifs_sb_info *cifs_sb, int xid); | 427 | struct cifs_sb_info *cifs_sb, int xid); |
428 | extern int mdfour(unsigned char *, unsigned char *, int); | ||
429 | extern int E_md4hash(const unsigned char *passwd, unsigned char *p16); | ||
430 | extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8, | ||
431 | unsigned char *p24); | ||
432 | extern void E_P16(unsigned char *p14, unsigned char *p16); | ||
433 | extern void E_P24(unsigned char *p21, const unsigned char *c8, | ||
434 | unsigned char *p24); | ||
419 | #endif /* _CIFSPROTO_H */ | 435 | #endif /* _CIFSPROTO_H */ |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 2f6795e524d3..904aa47e3515 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -136,9 +136,6 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command) | |||
136 | } | 136 | } |
137 | } | 137 | } |
138 | 138 | ||
139 | if (ses->status == CifsExiting) | ||
140 | return -EIO; | ||
141 | |||
142 | /* | 139 | /* |
143 | * Give demultiplex thread up to 10 seconds to reconnect, should be | 140 | * Give demultiplex thread up to 10 seconds to reconnect, should be |
144 | * greater than cifs socket timeout which is 7 seconds | 141 | * greater than cifs socket timeout which is 7 seconds |
@@ -156,7 +153,7 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command) | |||
156 | * retrying until process is killed or server comes | 153 | * retrying until process is killed or server comes |
157 | * back on-line | 154 | * back on-line |
158 | */ | 155 | */ |
159 | if (!tcon->retry || ses->status == CifsExiting) { | 156 | if (!tcon->retry) { |
160 | cFYI(1, "gave up waiting on reconnect in smb_init"); | 157 | cFYI(1, "gave up waiting on reconnect in smb_init"); |
161 | return -EHOSTDOWN; | 158 | return -EHOSTDOWN; |
162 | } | 159 | } |
@@ -331,37 +328,35 @@ smb_init_no_reconnect(int smb_command, int wct, struct cifsTconInfo *tcon, | |||
331 | 328 | ||
332 | static int validate_t2(struct smb_t2_rsp *pSMB) | 329 | static int validate_t2(struct smb_t2_rsp *pSMB) |
333 | { | 330 | { |
334 | int rc = -EINVAL; | 331 | unsigned int total_size; |
335 | int total_size; | 332 | |
336 | char *pBCC; | 333 | /* check for plausible wct */ |
334 | if (pSMB->hdr.WordCount < 10) | ||
335 | goto vt2_err; | ||
337 | 336 | ||
338 | /* check for plausible wct, bcc and t2 data and parm sizes */ | ||
339 | /* check for parm and data offset going beyond end of smb */ | 337 | /* check for parm and data offset going beyond end of smb */ |
340 | if (pSMB->hdr.WordCount >= 10) { | 338 | if (get_unaligned_le16(&pSMB->t2_rsp.ParameterOffset) > 1024 || |
341 | if ((le16_to_cpu(pSMB->t2_rsp.ParameterOffset) <= 1024) && | 339 | get_unaligned_le16(&pSMB->t2_rsp.DataOffset) > 1024) |
342 | (le16_to_cpu(pSMB->t2_rsp.DataOffset) <= 1024)) { | 340 | goto vt2_err; |
343 | /* check that bcc is at least as big as parms + data */ | 341 | |
344 | /* check that bcc is less than negotiated smb buffer */ | 342 | /* check that bcc is at least as big as parms + data */ |
345 | total_size = le16_to_cpu(pSMB->t2_rsp.ParameterCount); | 343 | /* check that bcc is less than negotiated smb buffer */ |
346 | if (total_size < 512) { | 344 | total_size = get_unaligned_le16(&pSMB->t2_rsp.ParameterCount); |
347 | total_size += | 345 | if (total_size >= 512) |
348 | le16_to_cpu(pSMB->t2_rsp.DataCount); | 346 | goto vt2_err; |
349 | /* BCC le converted in SendReceive */ | 347 | |
350 | pBCC = (pSMB->hdr.WordCount * 2) + | 348 | total_size += get_unaligned_le16(&pSMB->t2_rsp.DataCount); |
351 | sizeof(struct smb_hdr) + | 349 | if (total_size > get_bcc(&pSMB->hdr) || |
352 | (char *)pSMB; | 350 | total_size >= CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) |
353 | if ((total_size <= (*(u16 *)pBCC)) && | 351 | goto vt2_err; |
354 | (total_size < | 352 | |
355 | CIFSMaxBufSize+MAX_CIFS_HDR_SIZE)) { | 353 | return 0; |
356 | return 0; | 354 | vt2_err: |
357 | } | ||
358 | } | ||
359 | } | ||
360 | } | ||
361 | cifs_dump_mem("Invalid transact2 SMB: ", (char *)pSMB, | 355 | cifs_dump_mem("Invalid transact2 SMB: ", (char *)pSMB, |
362 | sizeof(struct smb_t2_rsp) + 16); | 356 | sizeof(struct smb_t2_rsp) + 16); |
363 | return rc; | 357 | return -EINVAL; |
364 | } | 358 | } |
359 | |||
365 | int | 360 | int |
366 | CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) | 361 | CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) |
367 | { | 362 | { |
@@ -452,7 +447,6 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) | |||
452 | server->maxBuf = min((__u32)le16_to_cpu(rsp->MaxBufSize), | 447 | server->maxBuf = min((__u32)le16_to_cpu(rsp->MaxBufSize), |
453 | (__u32)CIFSMaxBufSize + MAX_CIFS_HDR_SIZE); | 448 | (__u32)CIFSMaxBufSize + MAX_CIFS_HDR_SIZE); |
454 | server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs); | 449 | server->max_vcs = le16_to_cpu(rsp->MaxNumberVcs); |
455 | GETU32(server->sessid) = le32_to_cpu(rsp->SessionKey); | ||
456 | /* even though we do not use raw we might as well set this | 450 | /* even though we do not use raw we might as well set this |
457 | accurately, in case we ever find a need for it */ | 451 | accurately, in case we ever find a need for it */ |
458 | if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) { | 452 | if ((le16_to_cpu(rsp->RawMode) & RAW_ENABLE) == RAW_ENABLE) { |
@@ -566,7 +560,6 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) | |||
566 | (__u32) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE); | 560 | (__u32) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE); |
567 | server->max_rw = le32_to_cpu(pSMBr->MaxRawSize); | 561 | server->max_rw = le32_to_cpu(pSMBr->MaxRawSize); |
568 | cFYI(DBG2, "Max buf = %d", ses->server->maxBuf); | 562 | cFYI(DBG2, "Max buf = %d", ses->server->maxBuf); |
569 | GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey); | ||
570 | server->capabilities = le32_to_cpu(pSMBr->Capabilities); | 563 | server->capabilities = le32_to_cpu(pSMBr->Capabilities); |
571 | server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone); | 564 | server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone); |
572 | server->timeAdj *= 60; | 565 | server->timeAdj *= 60; |
@@ -706,6 +699,53 @@ CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon) | |||
706 | return rc; | 699 | return rc; |
707 | } | 700 | } |
708 | 701 | ||
702 | /* | ||
703 | * This is a no-op for now. We're not really interested in the reply, but | ||
704 | * rather in the fact that the server sent one and that server->lstrp | ||
705 | * gets updated. | ||
706 | * | ||
707 | * FIXME: maybe we should consider checking that the reply matches request? | ||
708 | */ | ||
709 | static void | ||
710 | cifs_echo_callback(struct mid_q_entry *mid) | ||
711 | { | ||
712 | struct TCP_Server_Info *server = mid->callback_data; | ||
713 | |||
714 | DeleteMidQEntry(mid); | ||
715 | atomic_dec(&server->inFlight); | ||
716 | wake_up(&server->request_q); | ||
717 | } | ||
718 | |||
719 | int | ||
720 | CIFSSMBEcho(struct TCP_Server_Info *server) | ||
721 | { | ||
722 | ECHO_REQ *smb; | ||
723 | int rc = 0; | ||
724 | |||
725 | cFYI(1, "In echo request"); | ||
726 | |||
727 | rc = small_smb_init(SMB_COM_ECHO, 0, NULL, (void **)&smb); | ||
728 | if (rc) | ||
729 | return rc; | ||
730 | |||
731 | /* set up echo request */ | ||
732 | smb->hdr.Tid = cpu_to_le16(0xffff); | ||
733 | smb->hdr.WordCount = 1; | ||
734 | put_unaligned_le16(1, &smb->EchoCount); | ||
735 | put_bcc_le(1, &smb->hdr); | ||
736 | smb->Data[0] = 'a'; | ||
737 | smb->hdr.smb_buf_length += 3; | ||
738 | |||
739 | rc = cifs_call_async(server, (struct smb_hdr *)smb, | ||
740 | cifs_echo_callback, server); | ||
741 | if (rc) | ||
742 | cFYI(1, "Echo request failed: %d", rc); | ||
743 | |||
744 | cifs_small_buf_release(smb); | ||
745 | |||
746 | return rc; | ||
747 | } | ||
748 | |||
709 | int | 749 | int |
710 | CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses) | 750 | CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses) |
711 | { | 751 | { |
@@ -1193,7 +1233,7 @@ OldOpenRetry: | |||
1193 | pSMB->ByteCount = cpu_to_le16(count); | 1233 | pSMB->ByteCount = cpu_to_le16(count); |
1194 | /* long_op set to 1 to allow for oplock break timeouts */ | 1234 | /* long_op set to 1 to allow for oplock break timeouts */ |
1195 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, | 1235 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, |
1196 | (struct smb_hdr *)pSMBr, &bytes_returned, CIFS_LONG_OP); | 1236 | (struct smb_hdr *)pSMBr, &bytes_returned, 0); |
1197 | cifs_stats_inc(&tcon->num_opens); | 1237 | cifs_stats_inc(&tcon->num_opens); |
1198 | if (rc) { | 1238 | if (rc) { |
1199 | cFYI(1, "Error in Open = %d", rc); | 1239 | cFYI(1, "Error in Open = %d", rc); |
@@ -1306,7 +1346,7 @@ openRetry: | |||
1306 | pSMB->ByteCount = cpu_to_le16(count); | 1346 | pSMB->ByteCount = cpu_to_le16(count); |
1307 | /* long_op set to 1 to allow for oplock break timeouts */ | 1347 | /* long_op set to 1 to allow for oplock break timeouts */ |
1308 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, | 1348 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, |
1309 | (struct smb_hdr *)pSMBr, &bytes_returned, CIFS_LONG_OP); | 1349 | (struct smb_hdr *)pSMBr, &bytes_returned, 0); |
1310 | cifs_stats_inc(&tcon->num_opens); | 1350 | cifs_stats_inc(&tcon->num_opens); |
1311 | if (rc) { | 1351 | if (rc) { |
1312 | cFYI(1, "Error in Open = %d", rc); | 1352 | cFYI(1, "Error in Open = %d", rc); |
@@ -1388,7 +1428,7 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid, | |||
1388 | iov[0].iov_base = (char *)pSMB; | 1428 | iov[0].iov_base = (char *)pSMB; |
1389 | iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; | 1429 | iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; |
1390 | rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */, | 1430 | rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */, |
1391 | &resp_buf_type, CIFS_STD_OP | CIFS_LOG_ERROR); | 1431 | &resp_buf_type, CIFS_LOG_ERROR); |
1392 | cifs_stats_inc(&tcon->num_reads); | 1432 | cifs_stats_inc(&tcon->num_reads); |
1393 | pSMBr = (READ_RSP *)iov[0].iov_base; | 1433 | pSMBr = (READ_RSP *)iov[0].iov_base; |
1394 | if (rc) { | 1434 | if (rc) { |
@@ -1663,7 +1703,8 @@ int | |||
1663 | CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, | 1703 | CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, |
1664 | const __u16 smb_file_id, const __u64 len, | 1704 | const __u16 smb_file_id, const __u64 len, |
1665 | const __u64 offset, const __u32 numUnlock, | 1705 | const __u64 offset, const __u32 numUnlock, |
1666 | const __u32 numLock, const __u8 lockType, const bool waitFlag) | 1706 | const __u32 numLock, const __u8 lockType, |
1707 | const bool waitFlag, const __u8 oplock_level) | ||
1667 | { | 1708 | { |
1668 | int rc = 0; | 1709 | int rc = 0; |
1669 | LOCK_REQ *pSMB = NULL; | 1710 | LOCK_REQ *pSMB = NULL; |
@@ -1691,6 +1732,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, | |||
1691 | pSMB->NumberOfLocks = cpu_to_le16(numLock); | 1732 | pSMB->NumberOfLocks = cpu_to_le16(numLock); |
1692 | pSMB->NumberOfUnlocks = cpu_to_le16(numUnlock); | 1733 | pSMB->NumberOfUnlocks = cpu_to_le16(numUnlock); |
1693 | pSMB->LockType = lockType; | 1734 | pSMB->LockType = lockType; |
1735 | pSMB->OplockLevel = oplock_level; | ||
1694 | pSMB->AndXCommand = 0xFF; /* none */ | 1736 | pSMB->AndXCommand = 0xFF; /* none */ |
1695 | pSMB->Fid = smb_file_id; /* netfid stays le */ | 1737 | pSMB->Fid = smb_file_id; /* netfid stays le */ |
1696 | 1738 | ||
@@ -3087,7 +3129,7 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid, | |||
3087 | iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; | 3129 | iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; |
3088 | 3130 | ||
3089 | rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type, | 3131 | rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type, |
3090 | CIFS_STD_OP); | 3132 | 0); |
3091 | cifs_stats_inc(&tcon->num_acl_get); | 3133 | cifs_stats_inc(&tcon->num_acl_get); |
3092 | if (rc) { | 3134 | if (rc) { |
3093 | cFYI(1, "Send error in QuerySecDesc = %d", rc); | 3135 | cFYI(1, "Send error in QuerySecDesc = %d", rc); |
@@ -4869,7 +4911,6 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, | |||
4869 | __u16 fid, __u32 pid_of_opener, bool SetAllocation) | 4911 | __u16 fid, __u32 pid_of_opener, bool SetAllocation) |
4870 | { | 4912 | { |
4871 | struct smb_com_transaction2_sfi_req *pSMB = NULL; | 4913 | struct smb_com_transaction2_sfi_req *pSMB = NULL; |
4872 | char *data_offset; | ||
4873 | struct file_end_of_file_info *parm_data; | 4914 | struct file_end_of_file_info *parm_data; |
4874 | int rc = 0; | 4915 | int rc = 0; |
4875 | __u16 params, param_offset, offset, byte_count, count; | 4916 | __u16 params, param_offset, offset, byte_count, count; |
@@ -4893,8 +4934,6 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, | |||
4893 | param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; | 4934 | param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; |
4894 | offset = param_offset + params; | 4935 | offset = param_offset + params; |
4895 | 4936 | ||
4896 | data_offset = (char *) (&pSMB->hdr.Protocol) + offset; | ||
4897 | |||
4898 | count = sizeof(struct file_end_of_file_info); | 4937 | count = sizeof(struct file_end_of_file_info); |
4899 | pSMB->MaxParameterCount = cpu_to_le16(2); | 4938 | pSMB->MaxParameterCount = cpu_to_le16(2); |
4900 | /* BB find exact max SMB PDU from sess structure BB */ | 4939 | /* BB find exact max SMB PDU from sess structure BB */ |
@@ -5562,7 +5601,7 @@ QAllEAsRetry: | |||
5562 | } | 5601 | } |
5563 | 5602 | ||
5564 | /* make sure list_len doesn't go past end of SMB */ | 5603 | /* make sure list_len doesn't go past end of SMB */ |
5565 | end_of_smb = (char *)pByteArea(&pSMBr->hdr) + BCC(&pSMBr->hdr); | 5604 | end_of_smb = (char *)pByteArea(&pSMBr->hdr) + get_bcc(&pSMBr->hdr); |
5566 | if ((char *)ea_response_data + list_len > end_of_smb) { | 5605 | if ((char *)ea_response_data + list_len > end_of_smb) { |
5567 | cFYI(1, "EA list appears to go beyond SMB"); | 5606 | cFYI(1, "EA list appears to go beyond SMB"); |
5568 | rc = -EIO; | 5607 | rc = -EIO; |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index a65d311d163a..8d6c17ab593d 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -52,8 +52,8 @@ | |||
52 | #define CIFS_PORT 445 | 52 | #define CIFS_PORT 445 |
53 | #define RFC1001_PORT 139 | 53 | #define RFC1001_PORT 139 |
54 | 54 | ||
55 | extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, | 55 | /* SMB echo "timeout" -- FIXME: tunable? */ |
56 | unsigned char *p24); | 56 | #define SMB_ECHO_INTERVAL (60 * HZ) |
57 | 57 | ||
58 | extern mempool_t *cifs_req_poolp; | 58 | extern mempool_t *cifs_req_poolp; |
59 | 59 | ||
@@ -84,6 +84,7 @@ struct smb_vol { | |||
84 | bool no_xattr:1; /* set if xattr (EA) support should be disabled*/ | 84 | bool no_xattr:1; /* set if xattr (EA) support should be disabled*/ |
85 | bool server_ino:1; /* use inode numbers from server ie UniqueId */ | 85 | bool server_ino:1; /* use inode numbers from server ie UniqueId */ |
86 | bool direct_io:1; | 86 | bool direct_io:1; |
87 | bool strict_io:1; /* strict cache behavior */ | ||
87 | bool remap:1; /* set to remap seven reserved chars in filenames */ | 88 | bool remap:1; /* set to remap seven reserved chars in filenames */ |
88 | bool posix_paths:1; /* unset to not ask for posix pathnames. */ | 89 | bool posix_paths:1; /* unset to not ask for posix pathnames. */ |
89 | bool no_linux_ext:1; | 90 | bool no_linux_ext:1; |
@@ -152,6 +153,7 @@ cifs_reconnect(struct TCP_Server_Info *server) | |||
152 | 153 | ||
153 | /* before reconnecting the tcp session, mark the smb session (uid) | 154 | /* before reconnecting the tcp session, mark the smb session (uid) |
154 | and the tid bad so they are not used until reconnected */ | 155 | and the tid bad so they are not used until reconnected */ |
156 | cFYI(1, "%s: marking sessions and tcons for reconnect", __func__); | ||
155 | spin_lock(&cifs_tcp_ses_lock); | 157 | spin_lock(&cifs_tcp_ses_lock); |
156 | list_for_each(tmp, &server->smb_ses_list) { | 158 | list_for_each(tmp, &server->smb_ses_list) { |
157 | ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list); | 159 | ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list); |
@@ -163,7 +165,9 @@ cifs_reconnect(struct TCP_Server_Info *server) | |||
163 | } | 165 | } |
164 | } | 166 | } |
165 | spin_unlock(&cifs_tcp_ses_lock); | 167 | spin_unlock(&cifs_tcp_ses_lock); |
168 | |||
166 | /* do not want to be sending data on a socket we are freeing */ | 169 | /* do not want to be sending data on a socket we are freeing */ |
170 | cFYI(1, "%s: tearing down socket", __func__); | ||
167 | mutex_lock(&server->srv_mutex); | 171 | mutex_lock(&server->srv_mutex); |
168 | if (server->ssocket) { | 172 | if (server->ssocket) { |
169 | cFYI(1, "State: 0x%x Flags: 0x%lx", server->ssocket->state, | 173 | cFYI(1, "State: 0x%x Flags: 0x%lx", server->ssocket->state, |
@@ -180,22 +184,20 @@ cifs_reconnect(struct TCP_Server_Info *server) | |||
180 | kfree(server->session_key.response); | 184 | kfree(server->session_key.response); |
181 | server->session_key.response = NULL; | 185 | server->session_key.response = NULL; |
182 | server->session_key.len = 0; | 186 | server->session_key.len = 0; |
187 | server->lstrp = jiffies; | ||
188 | mutex_unlock(&server->srv_mutex); | ||
183 | 189 | ||
190 | /* mark submitted MIDs for retry and issue callback */ | ||
191 | cFYI(1, "%s: issuing mid callbacks", __func__); | ||
184 | spin_lock(&GlobalMid_Lock); | 192 | spin_lock(&GlobalMid_Lock); |
185 | list_for_each(tmp, &server->pending_mid_q) { | 193 | list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { |
186 | mid_entry = list_entry(tmp, struct | 194 | mid_entry = list_entry(tmp, struct mid_q_entry, qhead); |
187 | mid_q_entry, | 195 | if (mid_entry->midState == MID_REQUEST_SUBMITTED) |
188 | qhead); | ||
189 | if (mid_entry->midState == MID_REQUEST_SUBMITTED) { | ||
190 | /* Mark other intransit requests as needing | ||
191 | retry so we do not immediately mark the | ||
192 | session bad again (ie after we reconnect | ||
193 | below) as they timeout too */ | ||
194 | mid_entry->midState = MID_RETRY_NEEDED; | 196 | mid_entry->midState = MID_RETRY_NEEDED; |
195 | } | 197 | list_del_init(&mid_entry->qhead); |
198 | mid_entry->callback(mid_entry); | ||
196 | } | 199 | } |
197 | spin_unlock(&GlobalMid_Lock); | 200 | spin_unlock(&GlobalMid_Lock); |
198 | mutex_unlock(&server->srv_mutex); | ||
199 | 201 | ||
200 | while ((server->tcpStatus != CifsExiting) && | 202 | while ((server->tcpStatus != CifsExiting) && |
201 | (server->tcpStatus != CifsGood)) { | 203 | (server->tcpStatus != CifsGood)) { |
@@ -212,10 +214,9 @@ cifs_reconnect(struct TCP_Server_Info *server) | |||
212 | if (server->tcpStatus != CifsExiting) | 214 | if (server->tcpStatus != CifsExiting) |
213 | server->tcpStatus = CifsGood; | 215 | server->tcpStatus = CifsGood; |
214 | spin_unlock(&GlobalMid_Lock); | 216 | spin_unlock(&GlobalMid_Lock); |
215 | /* atomic_set(&server->inFlight,0);*/ | ||
216 | wake_up(&server->response_q); | ||
217 | } | 217 | } |
218 | } | 218 | } |
219 | |||
219 | return rc; | 220 | return rc; |
220 | } | 221 | } |
221 | 222 | ||
@@ -229,9 +230,8 @@ cifs_reconnect(struct TCP_Server_Info *server) | |||
229 | static int check2ndT2(struct smb_hdr *pSMB, unsigned int maxBufSize) | 230 | static int check2ndT2(struct smb_hdr *pSMB, unsigned int maxBufSize) |
230 | { | 231 | { |
231 | struct smb_t2_rsp *pSMBt; | 232 | struct smb_t2_rsp *pSMBt; |
232 | int total_data_size; | ||
233 | int data_in_this_rsp; | ||
234 | int remaining; | 233 | int remaining; |
234 | __u16 total_data_size, data_in_this_rsp; | ||
235 | 235 | ||
236 | if (pSMB->Command != SMB_COM_TRANSACTION2) | 236 | if (pSMB->Command != SMB_COM_TRANSACTION2) |
237 | return 0; | 237 | return 0; |
@@ -245,8 +245,8 @@ static int check2ndT2(struct smb_hdr *pSMB, unsigned int maxBufSize) | |||
245 | 245 | ||
246 | pSMBt = (struct smb_t2_rsp *)pSMB; | 246 | pSMBt = (struct smb_t2_rsp *)pSMB; |
247 | 247 | ||
248 | total_data_size = le16_to_cpu(pSMBt->t2_rsp.TotalDataCount); | 248 | total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount); |
249 | data_in_this_rsp = le16_to_cpu(pSMBt->t2_rsp.DataCount); | 249 | data_in_this_rsp = get_unaligned_le16(&pSMBt->t2_rsp.DataCount); |
250 | 250 | ||
251 | remaining = total_data_size - data_in_this_rsp; | 251 | remaining = total_data_size - data_in_this_rsp; |
252 | 252 | ||
@@ -272,21 +272,18 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB) | |||
272 | { | 272 | { |
273 | struct smb_t2_rsp *pSMB2 = (struct smb_t2_rsp *)psecond; | 273 | struct smb_t2_rsp *pSMB2 = (struct smb_t2_rsp *)psecond; |
274 | struct smb_t2_rsp *pSMBt = (struct smb_t2_rsp *)pTargetSMB; | 274 | struct smb_t2_rsp *pSMBt = (struct smb_t2_rsp *)pTargetSMB; |
275 | int total_data_size; | ||
276 | int total_in_buf; | ||
277 | int remaining; | ||
278 | int total_in_buf2; | ||
279 | char *data_area_of_target; | 275 | char *data_area_of_target; |
280 | char *data_area_of_buf2; | 276 | char *data_area_of_buf2; |
281 | __u16 byte_count; | 277 | int remaining; |
278 | __u16 byte_count, total_data_size, total_in_buf, total_in_buf2; | ||
282 | 279 | ||
283 | total_data_size = le16_to_cpu(pSMBt->t2_rsp.TotalDataCount); | 280 | total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount); |
284 | 281 | ||
285 | if (total_data_size != le16_to_cpu(pSMB2->t2_rsp.TotalDataCount)) { | 282 | if (total_data_size != |
283 | get_unaligned_le16(&pSMB2->t2_rsp.TotalDataCount)) | ||
286 | cFYI(1, "total data size of primary and secondary t2 differ"); | 284 | cFYI(1, "total data size of primary and secondary t2 differ"); |
287 | } | ||
288 | 285 | ||
289 | total_in_buf = le16_to_cpu(pSMBt->t2_rsp.DataCount); | 286 | total_in_buf = get_unaligned_le16(&pSMBt->t2_rsp.DataCount); |
290 | 287 | ||
291 | remaining = total_data_size - total_in_buf; | 288 | remaining = total_data_size - total_in_buf; |
292 | 289 | ||
@@ -296,28 +293,28 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB) | |||
296 | if (remaining == 0) /* nothing to do, ignore */ | 293 | if (remaining == 0) /* nothing to do, ignore */ |
297 | return 0; | 294 | return 0; |
298 | 295 | ||
299 | total_in_buf2 = le16_to_cpu(pSMB2->t2_rsp.DataCount); | 296 | total_in_buf2 = get_unaligned_le16(&pSMB2->t2_rsp.DataCount); |
300 | if (remaining < total_in_buf2) { | 297 | if (remaining < total_in_buf2) { |
301 | cFYI(1, "transact2 2nd response contains too much data"); | 298 | cFYI(1, "transact2 2nd response contains too much data"); |
302 | } | 299 | } |
303 | 300 | ||
304 | /* find end of first SMB data area */ | 301 | /* find end of first SMB data area */ |
305 | data_area_of_target = (char *)&pSMBt->hdr.Protocol + | 302 | data_area_of_target = (char *)&pSMBt->hdr.Protocol + |
306 | le16_to_cpu(pSMBt->t2_rsp.DataOffset); | 303 | get_unaligned_le16(&pSMBt->t2_rsp.DataOffset); |
307 | /* validate target area */ | 304 | /* validate target area */ |
308 | 305 | ||
309 | data_area_of_buf2 = (char *) &pSMB2->hdr.Protocol + | 306 | data_area_of_buf2 = (char *)&pSMB2->hdr.Protocol + |
310 | le16_to_cpu(pSMB2->t2_rsp.DataOffset); | 307 | get_unaligned_le16(&pSMB2->t2_rsp.DataOffset); |
311 | 308 | ||
312 | data_area_of_target += total_in_buf; | 309 | data_area_of_target += total_in_buf; |
313 | 310 | ||
314 | /* copy second buffer into end of first buffer */ | 311 | /* copy second buffer into end of first buffer */ |
315 | memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2); | 312 | memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2); |
316 | total_in_buf += total_in_buf2; | 313 | total_in_buf += total_in_buf2; |
317 | pSMBt->t2_rsp.DataCount = cpu_to_le16(total_in_buf); | 314 | put_unaligned_le16(total_in_buf, &pSMBt->t2_rsp.DataCount); |
318 | byte_count = le16_to_cpu(BCC_LE(pTargetSMB)); | 315 | byte_count = get_bcc_le(pTargetSMB); |
319 | byte_count += total_in_buf2; | 316 | byte_count += total_in_buf2; |
320 | BCC_LE(pTargetSMB) = cpu_to_le16(byte_count); | 317 | put_bcc_le(byte_count, pTargetSMB); |
321 | 318 | ||
322 | byte_count = pTargetSMB->smb_buf_length; | 319 | byte_count = pTargetSMB->smb_buf_length; |
323 | byte_count += total_in_buf2; | 320 | byte_count += total_in_buf2; |
@@ -331,7 +328,31 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB) | |||
331 | return 0; /* we are done */ | 328 | return 0; /* we are done */ |
332 | } else /* more responses to go */ | 329 | } else /* more responses to go */ |
333 | return 1; | 330 | return 1; |
331 | } | ||
332 | |||
333 | static void | ||
334 | cifs_echo_request(struct work_struct *work) | ||
335 | { | ||
336 | int rc; | ||
337 | struct TCP_Server_Info *server = container_of(work, | ||
338 | struct TCP_Server_Info, echo.work); | ||
339 | |||
340 | /* | ||
341 | * We cannot send an echo until the NEGOTIATE_PROTOCOL request is | ||
342 | * done, which is indicated by maxBuf != 0. Also, no need to ping if | ||
343 | * we got a response recently | ||
344 | */ | ||
345 | if (server->maxBuf == 0 || | ||
346 | time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ)) | ||
347 | goto requeue_echo; | ||
348 | |||
349 | rc = CIFSSMBEcho(server); | ||
350 | if (rc) | ||
351 | cFYI(1, "Unable to send echo request to server: %s", | ||
352 | server->hostname); | ||
334 | 353 | ||
354 | requeue_echo: | ||
355 | queue_delayed_work(system_nrt_wq, &server->echo, SMB_ECHO_INTERVAL); | ||
335 | } | 356 | } |
336 | 357 | ||
337 | static int | 358 | static int |
@@ -345,8 +366,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) | |||
345 | struct msghdr smb_msg; | 366 | struct msghdr smb_msg; |
346 | struct kvec iov; | 367 | struct kvec iov; |
347 | struct socket *csocket = server->ssocket; | 368 | struct socket *csocket = server->ssocket; |
348 | struct list_head *tmp; | 369 | struct list_head *tmp, *tmp2; |
349 | struct cifsSesInfo *ses; | ||
350 | struct task_struct *task_to_wake = NULL; | 370 | struct task_struct *task_to_wake = NULL; |
351 | struct mid_q_entry *mid_entry; | 371 | struct mid_q_entry *mid_entry; |
352 | char temp; | 372 | char temp; |
@@ -399,7 +419,20 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) | |||
399 | smb_msg.msg_control = NULL; | 419 | smb_msg.msg_control = NULL; |
400 | smb_msg.msg_controllen = 0; | 420 | smb_msg.msg_controllen = 0; |
401 | pdu_length = 4; /* enough to get RFC1001 header */ | 421 | pdu_length = 4; /* enough to get RFC1001 header */ |
422 | |||
402 | incomplete_rcv: | 423 | incomplete_rcv: |
424 | if (echo_retries > 0 && | ||
425 | time_after(jiffies, server->lstrp + | ||
426 | (echo_retries * SMB_ECHO_INTERVAL))) { | ||
427 | cERROR(1, "Server %s has not responded in %d seconds. " | ||
428 | "Reconnecting...", server->hostname, | ||
429 | (echo_retries * SMB_ECHO_INTERVAL / HZ)); | ||
430 | cifs_reconnect(server); | ||
431 | csocket = server->ssocket; | ||
432 | wake_up(&server->response_q); | ||
433 | continue; | ||
434 | } | ||
435 | |||
403 | length = | 436 | length = |
404 | kernel_recvmsg(csocket, &smb_msg, | 437 | kernel_recvmsg(csocket, &smb_msg, |
405 | &iov, 1, pdu_length, 0 /* BB other flags? */); | 438 | &iov, 1, pdu_length, 0 /* BB other flags? */); |
@@ -550,25 +583,36 @@ incomplete_rcv: | |||
550 | else if (reconnect == 1) | 583 | else if (reconnect == 1) |
551 | continue; | 584 | continue; |
552 | 585 | ||
553 | length += 4; /* account for rfc1002 hdr */ | 586 | total_read += 4; /* account for rfc1002 hdr */ |
554 | 587 | ||
588 | dump_smb(smb_buffer, total_read); | ||
555 | 589 | ||
556 | dump_smb(smb_buffer, length); | 590 | /* |
557 | if (checkSMB(smb_buffer, smb_buffer->Mid, total_read+4)) { | 591 | * We know that we received enough to get to the MID as we |
558 | cifs_dump_mem("Bad SMB: ", smb_buffer, 48); | 592 | * checked the pdu_length earlier. Now check to see |
559 | continue; | 593 | * if the rest of the header is OK. We borrow the length |
560 | } | 594 | * var for the rest of the loop to avoid a new stack var. |
595 | * | ||
596 | * 48 bytes is enough to display the header and a little bit | ||
597 | * into the payload for debugging purposes. | ||
598 | */ | ||
599 | length = checkSMB(smb_buffer, smb_buffer->Mid, total_read); | ||
600 | if (length != 0) | ||
601 | cifs_dump_mem("Bad SMB: ", smb_buffer, | ||
602 | min_t(unsigned int, total_read, 48)); | ||
561 | 603 | ||
604 | mid_entry = NULL; | ||
605 | server->lstrp = jiffies; | ||
562 | 606 | ||
563 | task_to_wake = NULL; | ||
564 | spin_lock(&GlobalMid_Lock); | 607 | spin_lock(&GlobalMid_Lock); |
565 | list_for_each(tmp, &server->pending_mid_q) { | 608 | list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { |
566 | mid_entry = list_entry(tmp, struct mid_q_entry, qhead); | 609 | mid_entry = list_entry(tmp, struct mid_q_entry, qhead); |
567 | 610 | ||
568 | if ((mid_entry->mid == smb_buffer->Mid) && | 611 | if ((mid_entry->mid == smb_buffer->Mid) && |
569 | (mid_entry->midState == MID_REQUEST_SUBMITTED) && | 612 | (mid_entry->midState == MID_REQUEST_SUBMITTED) && |
570 | (mid_entry->command == smb_buffer->Command)) { | 613 | (mid_entry->command == smb_buffer->Command)) { |
571 | if (check2ndT2(smb_buffer,server->maxBuf) > 0) { | 614 | if (length == 0 && |
615 | check2ndT2(smb_buffer, server->maxBuf) > 0) { | ||
572 | /* We have a multipart transact2 resp */ | 616 | /* We have a multipart transact2 resp */ |
573 | isMultiRsp = true; | 617 | isMultiRsp = true; |
574 | if (mid_entry->resp_buf) { | 618 | if (mid_entry->resp_buf) { |
@@ -603,20 +647,24 @@ incomplete_rcv: | |||
603 | mid_entry->resp_buf = smb_buffer; | 647 | mid_entry->resp_buf = smb_buffer; |
604 | mid_entry->largeBuf = isLargeBuf; | 648 | mid_entry->largeBuf = isLargeBuf; |
605 | multi_t2_fnd: | 649 | multi_t2_fnd: |
606 | task_to_wake = mid_entry->tsk; | 650 | if (length == 0) |
607 | mid_entry->midState = MID_RESPONSE_RECEIVED; | 651 | mid_entry->midState = |
652 | MID_RESPONSE_RECEIVED; | ||
653 | else | ||
654 | mid_entry->midState = | ||
655 | MID_RESPONSE_MALFORMED; | ||
608 | #ifdef CONFIG_CIFS_STATS2 | 656 | #ifdef CONFIG_CIFS_STATS2 |
609 | mid_entry->when_received = jiffies; | 657 | mid_entry->when_received = jiffies; |
610 | #endif | 658 | #endif |
611 | /* so we do not time out requests to server | 659 | list_del_init(&mid_entry->qhead); |
612 | which is still responding (since server could | 660 | mid_entry->callback(mid_entry); |
613 | be busy but not dead) */ | ||
614 | server->lstrp = jiffies; | ||
615 | break; | 661 | break; |
616 | } | 662 | } |
663 | mid_entry = NULL; | ||
617 | } | 664 | } |
618 | spin_unlock(&GlobalMid_Lock); | 665 | spin_unlock(&GlobalMid_Lock); |
619 | if (task_to_wake) { | 666 | |
667 | if (mid_entry != NULL) { | ||
620 | /* Was previous buf put in mpx struct for multi-rsp? */ | 668 | /* Was previous buf put in mpx struct for multi-rsp? */ |
621 | if (!isMultiRsp) { | 669 | if (!isMultiRsp) { |
622 | /* smb buffer will be freed by user thread */ | 670 | /* smb buffer will be freed by user thread */ |
@@ -625,11 +673,13 @@ multi_t2_fnd: | |||
625 | else | 673 | else |
626 | smallbuf = NULL; | 674 | smallbuf = NULL; |
627 | } | 675 | } |
628 | wake_up_process(task_to_wake); | 676 | } else if (length != 0) { |
677 | /* response sanity checks failed */ | ||
678 | continue; | ||
629 | } else if (!is_valid_oplock_break(smb_buffer, server) && | 679 | } else if (!is_valid_oplock_break(smb_buffer, server) && |
630 | !isMultiRsp) { | 680 | !isMultiRsp) { |
631 | cERROR(1, "No task to wake, unknown frame received! " | 681 | cERROR(1, "No task to wake, unknown frame received! " |
632 | "NumMids %d", midCount.counter); | 682 | "NumMids %d", atomic_read(&midCount)); |
633 | cifs_dump_mem("Received Data is: ", (char *)smb_buffer, | 683 | cifs_dump_mem("Received Data is: ", (char *)smb_buffer, |
634 | sizeof(struct smb_hdr)); | 684 | sizeof(struct smb_hdr)); |
635 | #ifdef CONFIG_CIFS_DEBUG2 | 685 | #ifdef CONFIG_CIFS_DEBUG2 |
@@ -677,44 +727,16 @@ multi_t2_fnd: | |||
677 | if (smallbuf) /* no sense logging a debug message if NULL */ | 727 | if (smallbuf) /* no sense logging a debug message if NULL */ |
678 | cifs_small_buf_release(smallbuf); | 728 | cifs_small_buf_release(smallbuf); |
679 | 729 | ||
680 | /* | 730 | if (!list_empty(&server->pending_mid_q)) { |
681 | * BB: we shouldn't have to do any of this. It shouldn't be | ||
682 | * possible to exit from the thread with active SMB sessions | ||
683 | */ | ||
684 | spin_lock(&cifs_tcp_ses_lock); | ||
685 | if (list_empty(&server->pending_mid_q)) { | ||
686 | /* loop through server session structures attached to this and | ||
687 | mark them dead */ | ||
688 | list_for_each(tmp, &server->smb_ses_list) { | ||
689 | ses = list_entry(tmp, struct cifsSesInfo, | ||
690 | smb_ses_list); | ||
691 | ses->status = CifsExiting; | ||
692 | ses->server = NULL; | ||
693 | } | ||
694 | spin_unlock(&cifs_tcp_ses_lock); | ||
695 | } else { | ||
696 | /* although we can not zero the server struct pointer yet, | ||
697 | since there are active requests which may depnd on them, | ||
698 | mark the corresponding SMB sessions as exiting too */ | ||
699 | list_for_each(tmp, &server->smb_ses_list) { | ||
700 | ses = list_entry(tmp, struct cifsSesInfo, | ||
701 | smb_ses_list); | ||
702 | ses->status = CifsExiting; | ||
703 | } | ||
704 | |||
705 | spin_lock(&GlobalMid_Lock); | 731 | spin_lock(&GlobalMid_Lock); |
706 | list_for_each(tmp, &server->pending_mid_q) { | 732 | list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { |
707 | mid_entry = list_entry(tmp, struct mid_q_entry, qhead); | 733 | mid_entry = list_entry(tmp, struct mid_q_entry, qhead); |
708 | if (mid_entry->midState == MID_REQUEST_SUBMITTED) { | 734 | cFYI(1, "Clearing Mid 0x%x - issuing callback", |
709 | cFYI(1, "Clearing Mid 0x%x - waking up ", | ||
710 | mid_entry->mid); | 735 | mid_entry->mid); |
711 | task_to_wake = mid_entry->tsk; | 736 | list_del_init(&mid_entry->qhead); |
712 | if (task_to_wake) | 737 | mid_entry->callback(mid_entry); |
713 | wake_up_process(task_to_wake); | ||
714 | } | ||
715 | } | 738 | } |
716 | spin_unlock(&GlobalMid_Lock); | 739 | spin_unlock(&GlobalMid_Lock); |
717 | spin_unlock(&cifs_tcp_ses_lock); | ||
718 | /* 1/8th of sec is more than enough time for them to exit */ | 740 | /* 1/8th of sec is more than enough time for them to exit */ |
719 | msleep(125); | 741 | msleep(125); |
720 | } | 742 | } |
@@ -732,18 +754,6 @@ multi_t2_fnd: | |||
732 | coming home not much else we can do but free the memory */ | 754 | coming home not much else we can do but free the memory */ |
733 | } | 755 | } |
734 | 756 | ||
735 | /* last chance to mark ses pointers invalid | ||
736 | if there are any pointing to this (e.g | ||
737 | if a crazy root user tried to kill cifsd | ||
738 | kernel thread explicitly this might happen) */ | ||
739 | /* BB: This shouldn't be necessary, see above */ | ||
740 | spin_lock(&cifs_tcp_ses_lock); | ||
741 | list_for_each(tmp, &server->smb_ses_list) { | ||
742 | ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list); | ||
743 | ses->server = NULL; | ||
744 | } | ||
745 | spin_unlock(&cifs_tcp_ses_lock); | ||
746 | |||
747 | kfree(server->hostname); | 757 | kfree(server->hostname); |
748 | task_to_wake = xchg(&server->tsk, NULL); | 758 | task_to_wake = xchg(&server->tsk, NULL); |
749 | kfree(server); | 759 | kfree(server); |
@@ -1113,6 +1123,8 @@ cifs_parse_mount_options(char *options, const char *devname, | |||
1113 | } else if (!strnicmp(data, "uid", 3) && value && *value) { | 1123 | } else if (!strnicmp(data, "uid", 3) && value && *value) { |
1114 | vol->linux_uid = simple_strtoul(value, &value, 0); | 1124 | vol->linux_uid = simple_strtoul(value, &value, 0); |
1115 | uid_specified = true; | 1125 | uid_specified = true; |
1126 | } else if (!strnicmp(data, "cruid", 5) && value && *value) { | ||
1127 | vol->cred_uid = simple_strtoul(value, &value, 0); | ||
1116 | } else if (!strnicmp(data, "forceuid", 8)) { | 1128 | } else if (!strnicmp(data, "forceuid", 8)) { |
1117 | override_uid = 1; | 1129 | override_uid = 1; |
1118 | } else if (!strnicmp(data, "noforceuid", 10)) { | 1130 | } else if (!strnicmp(data, "noforceuid", 10)) { |
@@ -1353,6 +1365,8 @@ cifs_parse_mount_options(char *options, const char *devname, | |||
1353 | vol->direct_io = 1; | 1365 | vol->direct_io = 1; |
1354 | } else if (strnicmp(data, "forcedirectio", 13) == 0) { | 1366 | } else if (strnicmp(data, "forcedirectio", 13) == 0) { |
1355 | vol->direct_io = 1; | 1367 | vol->direct_io = 1; |
1368 | } else if (strnicmp(data, "strictcache", 11) == 0) { | ||
1369 | vol->strict_io = 1; | ||
1356 | } else if (strnicmp(data, "noac", 4) == 0) { | 1370 | } else if (strnicmp(data, "noac", 4) == 0) { |
1357 | printk(KERN_WARNING "CIFS: Mount option noac not " | 1371 | printk(KERN_WARNING "CIFS: Mount option noac not " |
1358 | "supported. Instead set " | 1372 | "supported. Instead set " |
@@ -1577,6 +1591,9 @@ cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol) | |||
1577 | 1591 | ||
1578 | spin_lock(&cifs_tcp_ses_lock); | 1592 | spin_lock(&cifs_tcp_ses_lock); |
1579 | list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { | 1593 | list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { |
1594 | if (!net_eq(cifs_net_ns(server), current->nsproxy->net_ns)) | ||
1595 | continue; | ||
1596 | |||
1580 | if (!match_address(server, addr, | 1597 | if (!match_address(server, addr, |
1581 | (struct sockaddr *)&vol->srcaddr)) | 1598 | (struct sockaddr *)&vol->srcaddr)) |
1582 | continue; | 1599 | continue; |
@@ -1607,9 +1624,13 @@ cifs_put_tcp_session(struct TCP_Server_Info *server) | |||
1607 | return; | 1624 | return; |
1608 | } | 1625 | } |
1609 | 1626 | ||
1627 | put_net(cifs_net_ns(server)); | ||
1628 | |||
1610 | list_del_init(&server->tcp_ses_list); | 1629 | list_del_init(&server->tcp_ses_list); |
1611 | spin_unlock(&cifs_tcp_ses_lock); | 1630 | spin_unlock(&cifs_tcp_ses_lock); |
1612 | 1631 | ||
1632 | cancel_delayed_work_sync(&server->echo); | ||
1633 | |||
1613 | spin_lock(&GlobalMid_Lock); | 1634 | spin_lock(&GlobalMid_Lock); |
1614 | server->tcpStatus = CifsExiting; | 1635 | server->tcpStatus = CifsExiting; |
1615 | spin_unlock(&GlobalMid_Lock); | 1636 | spin_unlock(&GlobalMid_Lock); |
@@ -1679,6 +1700,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info) | |||
1679 | goto out_err; | 1700 | goto out_err; |
1680 | } | 1701 | } |
1681 | 1702 | ||
1703 | cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns)); | ||
1682 | tcp_ses->hostname = extract_hostname(volume_info->UNC); | 1704 | tcp_ses->hostname = extract_hostname(volume_info->UNC); |
1683 | if (IS_ERR(tcp_ses->hostname)) { | 1705 | if (IS_ERR(tcp_ses->hostname)) { |
1684 | rc = PTR_ERR(tcp_ses->hostname); | 1706 | rc = PTR_ERR(tcp_ses->hostname); |
@@ -1699,8 +1721,10 @@ cifs_get_tcp_session(struct smb_vol *volume_info) | |||
1699 | volume_info->target_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL); | 1721 | volume_info->target_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL); |
1700 | tcp_ses->session_estab = false; | 1722 | tcp_ses->session_estab = false; |
1701 | tcp_ses->sequence_number = 0; | 1723 | tcp_ses->sequence_number = 0; |
1724 | tcp_ses->lstrp = jiffies; | ||
1702 | INIT_LIST_HEAD(&tcp_ses->tcp_ses_list); | 1725 | INIT_LIST_HEAD(&tcp_ses->tcp_ses_list); |
1703 | INIT_LIST_HEAD(&tcp_ses->smb_ses_list); | 1726 | INIT_LIST_HEAD(&tcp_ses->smb_ses_list); |
1727 | INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request); | ||
1704 | 1728 | ||
1705 | /* | 1729 | /* |
1706 | * at this point we are the only ones with the pointer | 1730 | * at this point we are the only ones with the pointer |
@@ -1749,11 +1773,16 @@ cifs_get_tcp_session(struct smb_vol *volume_info) | |||
1749 | 1773 | ||
1750 | cifs_fscache_get_client_cookie(tcp_ses); | 1774 | cifs_fscache_get_client_cookie(tcp_ses); |
1751 | 1775 | ||
1776 | /* queue echo request delayed work */ | ||
1777 | queue_delayed_work(system_nrt_wq, &tcp_ses->echo, SMB_ECHO_INTERVAL); | ||
1778 | |||
1752 | return tcp_ses; | 1779 | return tcp_ses; |
1753 | 1780 | ||
1754 | out_err_crypto_release: | 1781 | out_err_crypto_release: |
1755 | cifs_crypto_shash_release(tcp_ses); | 1782 | cifs_crypto_shash_release(tcp_ses); |
1756 | 1783 | ||
1784 | put_net(cifs_net_ns(tcp_ses)); | ||
1785 | |||
1757 | out_err: | 1786 | out_err: |
1758 | if (tcp_ses) { | 1787 | if (tcp_ses) { |
1759 | if (!IS_ERR(tcp_ses->hostname)) | 1788 | if (!IS_ERR(tcp_ses->hostname)) |
@@ -2265,8 +2294,8 @@ generic_ip_connect(struct TCP_Server_Info *server) | |||
2265 | } | 2294 | } |
2266 | 2295 | ||
2267 | if (socket == NULL) { | 2296 | if (socket == NULL) { |
2268 | rc = sock_create_kern(sfamily, SOCK_STREAM, | 2297 | rc = __sock_create(cifs_net_ns(server), sfamily, SOCK_STREAM, |
2269 | IPPROTO_TCP, &socket); | 2298 | IPPROTO_TCP, &socket, 1); |
2270 | if (rc < 0) { | 2299 | if (rc < 0) { |
2271 | cERROR(1, "Error %d creating socket", rc); | 2300 | cERROR(1, "Error %d creating socket", rc); |
2272 | server->ssocket = NULL; | 2301 | server->ssocket = NULL; |
@@ -2578,6 +2607,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info, | |||
2578 | if (pvolume_info->multiuser) | 2607 | if (pvolume_info->multiuser) |
2579 | cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_MULTIUSER | | 2608 | cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_MULTIUSER | |
2580 | CIFS_MOUNT_NO_PERM); | 2609 | CIFS_MOUNT_NO_PERM); |
2610 | if (pvolume_info->strict_io) | ||
2611 | cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_STRICT_IO; | ||
2581 | if (pvolume_info->direct_io) { | 2612 | if (pvolume_info->direct_io) { |
2582 | cFYI(1, "mounting share using direct i/o"); | 2613 | cFYI(1, "mounting share using direct i/o"); |
2583 | cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; | 2614 | cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; |
@@ -2934,8 +2965,8 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, | |||
2934 | TCONX_RSP *pSMBr; | 2965 | TCONX_RSP *pSMBr; |
2935 | unsigned char *bcc_ptr; | 2966 | unsigned char *bcc_ptr; |
2936 | int rc = 0; | 2967 | int rc = 0; |
2937 | int length, bytes_left; | 2968 | int length; |
2938 | __u16 count; | 2969 | __u16 bytes_left, count; |
2939 | 2970 | ||
2940 | if (ses == NULL) | 2971 | if (ses == NULL) |
2941 | return -EIO; | 2972 | return -EIO; |
@@ -2963,7 +2994,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, | |||
2963 | bcc_ptr++; /* skip password */ | 2994 | bcc_ptr++; /* skip password */ |
2964 | /* already aligned so no need to do it below */ | 2995 | /* already aligned so no need to do it below */ |
2965 | } else { | 2996 | } else { |
2966 | pSMB->PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); | 2997 | pSMB->PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE); |
2967 | /* BB FIXME add code to fail this if NTLMv2 or Kerberos | 2998 | /* BB FIXME add code to fail this if NTLMv2 or Kerberos |
2968 | specified as required (when that support is added to | 2999 | specified as required (when that support is added to |
2969 | the vfs in the future) as only NTLM or the much | 3000 | the vfs in the future) as only NTLM or the much |
@@ -2979,9 +3010,10 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, | |||
2979 | bcc_ptr); | 3010 | bcc_ptr); |
2980 | else | 3011 | else |
2981 | #endif /* CIFS_WEAK_PW_HASH */ | 3012 | #endif /* CIFS_WEAK_PW_HASH */ |
2982 | SMBNTencrypt(tcon->password, ses->server->cryptkey, bcc_ptr); | 3013 | rc = SMBNTencrypt(tcon->password, ses->server->cryptkey, |
3014 | bcc_ptr); | ||
2983 | 3015 | ||
2984 | bcc_ptr += CIFS_SESS_KEY_SIZE; | 3016 | bcc_ptr += CIFS_AUTH_RESP_SIZE; |
2985 | if (ses->capabilities & CAP_UNICODE) { | 3017 | if (ses->capabilities & CAP_UNICODE) { |
2986 | /* must align unicode strings */ | 3018 | /* must align unicode strings */ |
2987 | *bcc_ptr = 0; /* null byte password */ | 3019 | *bcc_ptr = 0; /* null byte password */ |
@@ -3019,7 +3051,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, | |||
3019 | pSMB->ByteCount = cpu_to_le16(count); | 3051 | pSMB->ByteCount = cpu_to_le16(count); |
3020 | 3052 | ||
3021 | rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length, | 3053 | rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length, |
3022 | CIFS_STD_OP); | 3054 | 0); |
3023 | 3055 | ||
3024 | /* above now done in SendReceive */ | 3056 | /* above now done in SendReceive */ |
3025 | if ((rc == 0) && (tcon != NULL)) { | 3057 | if ((rc == 0) && (tcon != NULL)) { |
@@ -3029,7 +3061,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, | |||
3029 | tcon->need_reconnect = false; | 3061 | tcon->need_reconnect = false; |
3030 | tcon->tid = smb_buffer_response->Tid; | 3062 | tcon->tid = smb_buffer_response->Tid; |
3031 | bcc_ptr = pByteArea(smb_buffer_response); | 3063 | bcc_ptr = pByteArea(smb_buffer_response); |
3032 | bytes_left = BCC(smb_buffer_response); | 3064 | bytes_left = get_bcc(smb_buffer_response); |
3033 | length = strnlen(bcc_ptr, bytes_left - 2); | 3065 | length = strnlen(bcc_ptr, bytes_left - 2); |
3034 | if (smb_buffer->Flags2 & SMBFLG2_UNICODE) | 3066 | if (smb_buffer->Flags2 & SMBFLG2_UNICODE) |
3035 | is_unicode = true; | 3067 | is_unicode = true; |
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 1e95dd635632..dd5f22918c33 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
@@ -675,6 +675,7 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) | |||
675 | 675 | ||
676 | const struct dentry_operations cifs_dentry_ops = { | 676 | const struct dentry_operations cifs_dentry_ops = { |
677 | .d_revalidate = cifs_d_revalidate, | 677 | .d_revalidate = cifs_d_revalidate, |
678 | .d_automount = cifs_dfs_d_automount, | ||
678 | /* d_delete: cifs_d_delete, */ /* not needed except for debugging */ | 679 | /* d_delete: cifs_d_delete, */ /* not needed except for debugging */ |
679 | }; | 680 | }; |
680 | 681 | ||
@@ -711,4 +712,5 @@ const struct dentry_operations cifs_ci_dentry_ops = { | |||
711 | .d_revalidate = cifs_d_revalidate, | 712 | .d_revalidate = cifs_d_revalidate, |
712 | .d_hash = cifs_ci_hash, | 713 | .d_hash = cifs_ci_hash, |
713 | .d_compare = cifs_ci_compare, | 714 | .d_compare = cifs_ci_compare, |
715 | .d_automount = cifs_dfs_d_automount, | ||
714 | }; | 716 | }; |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index d843631c028d..e964b1cd5dd0 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -287,6 +287,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) | |||
287 | struct inode *inode = cifs_file->dentry->d_inode; | 287 | struct inode *inode = cifs_file->dentry->d_inode; |
288 | struct cifsTconInfo *tcon = tlink_tcon(cifs_file->tlink); | 288 | struct cifsTconInfo *tcon = tlink_tcon(cifs_file->tlink); |
289 | struct cifsInodeInfo *cifsi = CIFS_I(inode); | 289 | struct cifsInodeInfo *cifsi = CIFS_I(inode); |
290 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | ||
290 | struct cifsLockInfo *li, *tmp; | 291 | struct cifsLockInfo *li, *tmp; |
291 | 292 | ||
292 | spin_lock(&cifs_file_list_lock); | 293 | spin_lock(&cifs_file_list_lock); |
@@ -302,6 +303,13 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) | |||
302 | if (list_empty(&cifsi->openFileList)) { | 303 | if (list_empty(&cifsi->openFileList)) { |
303 | cFYI(1, "closing last open instance for inode %p", | 304 | cFYI(1, "closing last open instance for inode %p", |
304 | cifs_file->dentry->d_inode); | 305 | cifs_file->dentry->d_inode); |
306 | |||
307 | /* in strict cache mode we need invalidate mapping on the last | ||
308 | close because it may cause a error when we open this file | ||
309 | again and get at least level II oplock */ | ||
310 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) | ||
311 | CIFS_I(inode)->invalid_mapping = true; | ||
312 | |||
305 | cifs_set_oplock_level(cifsi, 0); | 313 | cifs_set_oplock_level(cifsi, 0); |
306 | } | 314 | } |
307 | spin_unlock(&cifs_file_list_lock); | 315 | spin_unlock(&cifs_file_list_lock); |
@@ -338,7 +346,6 @@ int cifs_open(struct inode *inode, struct file *file) | |||
338 | struct cifsTconInfo *tcon; | 346 | struct cifsTconInfo *tcon; |
339 | struct tcon_link *tlink; | 347 | struct tcon_link *tlink; |
340 | struct cifsFileInfo *pCifsFile = NULL; | 348 | struct cifsFileInfo *pCifsFile = NULL; |
341 | struct cifsInodeInfo *pCifsInode; | ||
342 | char *full_path = NULL; | 349 | char *full_path = NULL; |
343 | bool posix_open_ok = false; | 350 | bool posix_open_ok = false; |
344 | __u16 netfid; | 351 | __u16 netfid; |
@@ -353,8 +360,6 @@ int cifs_open(struct inode *inode, struct file *file) | |||
353 | } | 360 | } |
354 | tcon = tlink_tcon(tlink); | 361 | tcon = tlink_tcon(tlink); |
355 | 362 | ||
356 | pCifsInode = CIFS_I(file->f_path.dentry->d_inode); | ||
357 | |||
358 | full_path = build_path_from_dentry(file->f_path.dentry); | 363 | full_path = build_path_from_dentry(file->f_path.dentry); |
359 | if (full_path == NULL) { | 364 | if (full_path == NULL) { |
360 | rc = -ENOMEM; | 365 | rc = -ENOMEM; |
@@ -726,12 +731,12 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) | |||
726 | 731 | ||
727 | /* BB we could chain these into one lock request BB */ | 732 | /* BB we could chain these into one lock request BB */ |
728 | rc = CIFSSMBLock(xid, tcon, netfid, length, pfLock->fl_start, | 733 | rc = CIFSSMBLock(xid, tcon, netfid, length, pfLock->fl_start, |
729 | 0, 1, lockType, 0 /* wait flag */ ); | 734 | 0, 1, lockType, 0 /* wait flag */, 0); |
730 | if (rc == 0) { | 735 | if (rc == 0) { |
731 | rc = CIFSSMBLock(xid, tcon, netfid, length, | 736 | rc = CIFSSMBLock(xid, tcon, netfid, length, |
732 | pfLock->fl_start, 1 /* numUnlock */ , | 737 | pfLock->fl_start, 1 /* numUnlock */ , |
733 | 0 /* numLock */ , lockType, | 738 | 0 /* numLock */ , lockType, |
734 | 0 /* wait flag */ ); | 739 | 0 /* wait flag */, 0); |
735 | pfLock->fl_type = F_UNLCK; | 740 | pfLock->fl_type = F_UNLCK; |
736 | if (rc != 0) | 741 | if (rc != 0) |
737 | cERROR(1, "Error unlocking previously locked " | 742 | cERROR(1, "Error unlocking previously locked " |
@@ -748,13 +753,13 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) | |||
748 | rc = CIFSSMBLock(xid, tcon, netfid, length, | 753 | rc = CIFSSMBLock(xid, tcon, netfid, length, |
749 | pfLock->fl_start, 0, 1, | 754 | pfLock->fl_start, 0, 1, |
750 | lockType | LOCKING_ANDX_SHARED_LOCK, | 755 | lockType | LOCKING_ANDX_SHARED_LOCK, |
751 | 0 /* wait flag */); | 756 | 0 /* wait flag */, 0); |
752 | if (rc == 0) { | 757 | if (rc == 0) { |
753 | rc = CIFSSMBLock(xid, tcon, netfid, | 758 | rc = CIFSSMBLock(xid, tcon, netfid, |
754 | length, pfLock->fl_start, 1, 0, | 759 | length, pfLock->fl_start, 1, 0, |
755 | lockType | | 760 | lockType | |
756 | LOCKING_ANDX_SHARED_LOCK, | 761 | LOCKING_ANDX_SHARED_LOCK, |
757 | 0 /* wait flag */); | 762 | 0 /* wait flag */, 0); |
758 | pfLock->fl_type = F_RDLCK; | 763 | pfLock->fl_type = F_RDLCK; |
759 | if (rc != 0) | 764 | if (rc != 0) |
760 | cERROR(1, "Error unlocking " | 765 | cERROR(1, "Error unlocking " |
@@ -797,8 +802,8 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) | |||
797 | 802 | ||
798 | if (numLock) { | 803 | if (numLock) { |
799 | rc = CIFSSMBLock(xid, tcon, netfid, length, | 804 | rc = CIFSSMBLock(xid, tcon, netfid, length, |
800 | pfLock->fl_start, | 805 | pfLock->fl_start, 0, numLock, lockType, |
801 | 0, numLock, lockType, wait_flag); | 806 | wait_flag, 0); |
802 | 807 | ||
803 | if (rc == 0) { | 808 | if (rc == 0) { |
804 | /* For Windows locks we must store them. */ | 809 | /* For Windows locks we must store them. */ |
@@ -818,9 +823,9 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) | |||
818 | (pfLock->fl_start + length) >= | 823 | (pfLock->fl_start + length) >= |
819 | (li->offset + li->length)) { | 824 | (li->offset + li->length)) { |
820 | stored_rc = CIFSSMBLock(xid, tcon, | 825 | stored_rc = CIFSSMBLock(xid, tcon, |
821 | netfid, | 826 | netfid, li->length, |
822 | li->length, li->offset, | 827 | li->offset, 1, 0, |
823 | 1, 0, li->type, false); | 828 | li->type, false, 0); |
824 | if (stored_rc) | 829 | if (stored_rc) |
825 | rc = stored_rc; | 830 | rc = stored_rc; |
826 | else { | 831 | else { |
@@ -839,31 +844,8 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) | |||
839 | return rc; | 844 | return rc; |
840 | } | 845 | } |
841 | 846 | ||
842 | /* | ||
843 | * Set the timeout on write requests past EOF. For some servers (Windows) | ||
844 | * these calls can be very long. | ||
845 | * | ||
846 | * If we're writing >10M past the EOF we give a 180s timeout. Anything less | ||
847 | * than that gets a 45s timeout. Writes not past EOF get 15s timeouts. | ||
848 | * The 10M cutoff is totally arbitrary. A better scheme for this would be | ||
849 | * welcome if someone wants to suggest one. | ||
850 | * | ||
851 | * We may be able to do a better job with this if there were some way to | ||
852 | * declare that a file should be sparse. | ||
853 | */ | ||
854 | static int | ||
855 | cifs_write_timeout(struct cifsInodeInfo *cifsi, loff_t offset) | ||
856 | { | ||
857 | if (offset <= cifsi->server_eof) | ||
858 | return CIFS_STD_OP; | ||
859 | else if (offset > (cifsi->server_eof + (10 * 1024 * 1024))) | ||
860 | return CIFS_VLONG_OP; | ||
861 | else | ||
862 | return CIFS_LONG_OP; | ||
863 | } | ||
864 | |||
865 | /* update the file size (if needed) after a write */ | 847 | /* update the file size (if needed) after a write */ |
866 | static void | 848 | void |
867 | cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, | 849 | cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, |
868 | unsigned int bytes_written) | 850 | unsigned int bytes_written) |
869 | { | 851 | { |
@@ -882,7 +864,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, | |||
882 | unsigned int total_written; | 864 | unsigned int total_written; |
883 | struct cifs_sb_info *cifs_sb; | 865 | struct cifs_sb_info *cifs_sb; |
884 | struct cifsTconInfo *pTcon; | 866 | struct cifsTconInfo *pTcon; |
885 | int xid, long_op; | 867 | int xid; |
886 | struct cifsFileInfo *open_file; | 868 | struct cifsFileInfo *open_file; |
887 | struct cifsInodeInfo *cifsi = CIFS_I(inode); | 869 | struct cifsInodeInfo *cifsi = CIFS_I(inode); |
888 | 870 | ||
@@ -903,7 +885,6 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, | |||
903 | 885 | ||
904 | xid = GetXid(); | 886 | xid = GetXid(); |
905 | 887 | ||
906 | long_op = cifs_write_timeout(cifsi, *poffset); | ||
907 | for (total_written = 0; write_size > total_written; | 888 | for (total_written = 0; write_size > total_written; |
908 | total_written += bytes_written) { | 889 | total_written += bytes_written) { |
909 | rc = -EAGAIN; | 890 | rc = -EAGAIN; |
@@ -931,7 +912,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, | |||
931 | min_t(const int, cifs_sb->wsize, | 912 | min_t(const int, cifs_sb->wsize, |
932 | write_size - total_written), | 913 | write_size - total_written), |
933 | *poffset, &bytes_written, | 914 | *poffset, &bytes_written, |
934 | NULL, write_data + total_written, long_op); | 915 | NULL, write_data + total_written, 0); |
935 | } | 916 | } |
936 | if (rc || (bytes_written == 0)) { | 917 | if (rc || (bytes_written == 0)) { |
937 | if (total_written) | 918 | if (total_written) |
@@ -944,8 +925,6 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, | |||
944 | cifs_update_eof(cifsi, *poffset, bytes_written); | 925 | cifs_update_eof(cifsi, *poffset, bytes_written); |
945 | *poffset += bytes_written; | 926 | *poffset += bytes_written; |
946 | } | 927 | } |
947 | long_op = CIFS_STD_OP; /* subsequent writes fast - | ||
948 | 15 seconds is plenty */ | ||
949 | } | 928 | } |
950 | 929 | ||
951 | cifs_stats_bytes_written(pTcon, total_written); | 930 | cifs_stats_bytes_written(pTcon, total_written); |
@@ -974,7 +953,7 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, | |||
974 | unsigned int total_written; | 953 | unsigned int total_written; |
975 | struct cifs_sb_info *cifs_sb; | 954 | struct cifs_sb_info *cifs_sb; |
976 | struct cifsTconInfo *pTcon; | 955 | struct cifsTconInfo *pTcon; |
977 | int xid, long_op; | 956 | int xid; |
978 | struct dentry *dentry = open_file->dentry; | 957 | struct dentry *dentry = open_file->dentry; |
979 | struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode); | 958 | struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode); |
980 | 959 | ||
@@ -987,7 +966,6 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, | |||
987 | 966 | ||
988 | xid = GetXid(); | 967 | xid = GetXid(); |
989 | 968 | ||
990 | long_op = cifs_write_timeout(cifsi, *poffset); | ||
991 | for (total_written = 0; write_size > total_written; | 969 | for (total_written = 0; write_size > total_written; |
992 | total_written += bytes_written) { | 970 | total_written += bytes_written) { |
993 | rc = -EAGAIN; | 971 | rc = -EAGAIN; |
@@ -1017,7 +995,7 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, | |||
1017 | rc = CIFSSMBWrite2(xid, pTcon, | 995 | rc = CIFSSMBWrite2(xid, pTcon, |
1018 | open_file->netfid, len, | 996 | open_file->netfid, len, |
1019 | *poffset, &bytes_written, | 997 | *poffset, &bytes_written, |
1020 | iov, 1, long_op); | 998 | iov, 1, 0); |
1021 | } else | 999 | } else |
1022 | rc = CIFSSMBWrite(xid, pTcon, | 1000 | rc = CIFSSMBWrite(xid, pTcon, |
1023 | open_file->netfid, | 1001 | open_file->netfid, |
@@ -1025,7 +1003,7 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, | |||
1025 | write_size - total_written), | 1003 | write_size - total_written), |
1026 | *poffset, &bytes_written, | 1004 | *poffset, &bytes_written, |
1027 | write_data + total_written, | 1005 | write_data + total_written, |
1028 | NULL, long_op); | 1006 | NULL, 0); |
1029 | } | 1007 | } |
1030 | if (rc || (bytes_written == 0)) { | 1008 | if (rc || (bytes_written == 0)) { |
1031 | if (total_written) | 1009 | if (total_written) |
@@ -1038,8 +1016,6 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, | |||
1038 | cifs_update_eof(cifsi, *poffset, bytes_written); | 1016 | cifs_update_eof(cifsi, *poffset, bytes_written); |
1039 | *poffset += bytes_written; | 1017 | *poffset += bytes_written; |
1040 | } | 1018 | } |
1041 | long_op = CIFS_STD_OP; /* subsequent writes fast - | ||
1042 | 15 seconds is plenty */ | ||
1043 | } | 1019 | } |
1044 | 1020 | ||
1045 | cifs_stats_bytes_written(pTcon, total_written); | 1021 | cifs_stats_bytes_written(pTcon, total_written); |
@@ -1167,7 +1143,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) | |||
1167 | char *write_data; | 1143 | char *write_data; |
1168 | int rc = -EFAULT; | 1144 | int rc = -EFAULT; |
1169 | int bytes_written = 0; | 1145 | int bytes_written = 0; |
1170 | struct cifs_sb_info *cifs_sb; | ||
1171 | struct inode *inode; | 1146 | struct inode *inode; |
1172 | struct cifsFileInfo *open_file; | 1147 | struct cifsFileInfo *open_file; |
1173 | 1148 | ||
@@ -1175,7 +1150,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) | |||
1175 | return -EFAULT; | 1150 | return -EFAULT; |
1176 | 1151 | ||
1177 | inode = page->mapping->host; | 1152 | inode = page->mapping->host; |
1178 | cifs_sb = CIFS_SB(inode->i_sb); | ||
1179 | 1153 | ||
1180 | offset += (loff_t)from; | 1154 | offset += (loff_t)from; |
1181 | write_data = kmap(page); | 1155 | write_data = kmap(page); |
@@ -1239,7 +1213,7 @@ static int cifs_writepages(struct address_space *mapping, | |||
1239 | struct pagevec pvec; | 1213 | struct pagevec pvec; |
1240 | int rc = 0; | 1214 | int rc = 0; |
1241 | int scanned = 0; | 1215 | int scanned = 0; |
1242 | int xid, long_op; | 1216 | int xid; |
1243 | 1217 | ||
1244 | cifs_sb = CIFS_SB(mapping->host->i_sb); | 1218 | cifs_sb = CIFS_SB(mapping->host->i_sb); |
1245 | 1219 | ||
@@ -1377,43 +1351,67 @@ retry: | |||
1377 | break; | 1351 | break; |
1378 | } | 1352 | } |
1379 | if (n_iov) { | 1353 | if (n_iov) { |
1354 | retry_write: | ||
1380 | open_file = find_writable_file(CIFS_I(mapping->host), | 1355 | open_file = find_writable_file(CIFS_I(mapping->host), |
1381 | false); | 1356 | false); |
1382 | if (!open_file) { | 1357 | if (!open_file) { |
1383 | cERROR(1, "No writable handles for inode"); | 1358 | cERROR(1, "No writable handles for inode"); |
1384 | rc = -EBADF; | 1359 | rc = -EBADF; |
1385 | } else { | 1360 | } else { |
1386 | long_op = cifs_write_timeout(cifsi, offset); | ||
1387 | rc = CIFSSMBWrite2(xid, tcon, open_file->netfid, | 1361 | rc = CIFSSMBWrite2(xid, tcon, open_file->netfid, |
1388 | bytes_to_write, offset, | 1362 | bytes_to_write, offset, |
1389 | &bytes_written, iov, n_iov, | 1363 | &bytes_written, iov, n_iov, |
1390 | long_op); | 1364 | 0); |
1391 | cifsFileInfo_put(open_file); | 1365 | cifsFileInfo_put(open_file); |
1392 | cifs_update_eof(cifsi, offset, bytes_written); | ||
1393 | } | 1366 | } |
1394 | 1367 | ||
1395 | if (rc || bytes_written < bytes_to_write) { | 1368 | cFYI(1, "Write2 rc=%d, wrote=%u", rc, bytes_written); |
1396 | cERROR(1, "Write2 ret %d, wrote %d", | 1369 | |
1397 | rc, bytes_written); | 1370 | /* |
1398 | mapping_set_error(mapping, rc); | 1371 | * For now, treat a short write as if nothing got |
1399 | } else { | 1372 | * written. A zero length write however indicates |
1373 | * ENOSPC or EFBIG. We have no way to know which | ||
1374 | * though, so call it ENOSPC for now. EFBIG would | ||
1375 | * get translated to AS_EIO anyway. | ||
1376 | * | ||
1377 | * FIXME: make it take into account the data that did | ||
1378 | * get written | ||
1379 | */ | ||
1380 | if (rc == 0) { | ||
1381 | if (bytes_written == 0) | ||
1382 | rc = -ENOSPC; | ||
1383 | else if (bytes_written < bytes_to_write) | ||
1384 | rc = -EAGAIN; | ||
1385 | } | ||
1386 | |||
1387 | /* retry on data-integrity flush */ | ||
1388 | if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) | ||
1389 | goto retry_write; | ||
1390 | |||
1391 | /* fix the stats and EOF */ | ||
1392 | if (bytes_written > 0) { | ||
1400 | cifs_stats_bytes_written(tcon, bytes_written); | 1393 | cifs_stats_bytes_written(tcon, bytes_written); |
1394 | cifs_update_eof(cifsi, offset, bytes_written); | ||
1401 | } | 1395 | } |
1402 | 1396 | ||
1403 | for (i = 0; i < n_iov; i++) { | 1397 | for (i = 0; i < n_iov; i++) { |
1404 | page = pvec.pages[first + i]; | 1398 | page = pvec.pages[first + i]; |
1405 | /* Should we also set page error on | 1399 | /* on retryable write error, redirty page */ |
1406 | success rc but too little data written? */ | 1400 | if (rc == -EAGAIN) |
1407 | /* BB investigate retry logic on temporary | 1401 | redirty_page_for_writepage(wbc, page); |
1408 | server crash cases and how recovery works | 1402 | else if (rc != 0) |
1409 | when page marked as error */ | ||
1410 | if (rc) | ||
1411 | SetPageError(page); | 1403 | SetPageError(page); |
1412 | kunmap(page); | 1404 | kunmap(page); |
1413 | unlock_page(page); | 1405 | unlock_page(page); |
1414 | end_page_writeback(page); | 1406 | end_page_writeback(page); |
1415 | page_cache_release(page); | 1407 | page_cache_release(page); |
1416 | } | 1408 | } |
1409 | |||
1410 | if (rc != -EAGAIN) | ||
1411 | mapping_set_error(mapping, rc); | ||
1412 | else | ||
1413 | rc = 0; | ||
1414 | |||
1417 | if ((wbc->nr_to_write -= n_iov) <= 0) | 1415 | if ((wbc->nr_to_write -= n_iov) <= 0) |
1418 | done = 1; | 1416 | done = 1; |
1419 | index = next; | 1417 | index = next; |
@@ -1525,27 +1523,47 @@ static int cifs_write_end(struct file *file, struct address_space *mapping, | |||
1525 | return rc; | 1523 | return rc; |
1526 | } | 1524 | } |
1527 | 1525 | ||
1528 | int cifs_fsync(struct file *file, int datasync) | 1526 | int cifs_strict_fsync(struct file *file, int datasync) |
1529 | { | 1527 | { |
1530 | int xid; | 1528 | int xid; |
1531 | int rc = 0; | 1529 | int rc = 0; |
1532 | struct cifsTconInfo *tcon; | 1530 | struct cifsTconInfo *tcon; |
1533 | struct cifsFileInfo *smbfile = file->private_data; | 1531 | struct cifsFileInfo *smbfile = file->private_data; |
1534 | struct inode *inode = file->f_path.dentry->d_inode; | 1532 | struct inode *inode = file->f_path.dentry->d_inode; |
1533 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | ||
1535 | 1534 | ||
1536 | xid = GetXid(); | 1535 | xid = GetXid(); |
1537 | 1536 | ||
1538 | cFYI(1, "Sync file - name: %s datasync: 0x%x", | 1537 | cFYI(1, "Sync file - name: %s datasync: 0x%x", |
1539 | file->f_path.dentry->d_name.name, datasync); | 1538 | file->f_path.dentry->d_name.name, datasync); |
1540 | 1539 | ||
1541 | rc = filemap_write_and_wait(inode->i_mapping); | 1540 | if (!CIFS_I(inode)->clientCanCacheRead) |
1542 | if (rc == 0) { | 1541 | cifs_invalidate_mapping(inode); |
1543 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | ||
1544 | 1542 | ||
1545 | tcon = tlink_tcon(smbfile->tlink); | 1543 | tcon = tlink_tcon(smbfile->tlink); |
1546 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) | 1544 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) |
1547 | rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); | 1545 | rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); |
1548 | } | 1546 | |
1547 | FreeXid(xid); | ||
1548 | return rc; | ||
1549 | } | ||
1550 | |||
1551 | int cifs_fsync(struct file *file, int datasync) | ||
1552 | { | ||
1553 | int xid; | ||
1554 | int rc = 0; | ||
1555 | struct cifsTconInfo *tcon; | ||
1556 | struct cifsFileInfo *smbfile = file->private_data; | ||
1557 | struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | ||
1558 | |||
1559 | xid = GetXid(); | ||
1560 | |||
1561 | cFYI(1, "Sync file - name: %s datasync: 0x%x", | ||
1562 | file->f_path.dentry->d_name.name, datasync); | ||
1563 | |||
1564 | tcon = tlink_tcon(smbfile->tlink); | ||
1565 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) | ||
1566 | rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); | ||
1549 | 1567 | ||
1550 | FreeXid(xid); | 1568 | FreeXid(xid); |
1551 | return rc; | 1569 | return rc; |
@@ -1596,42 +1614,244 @@ int cifs_flush(struct file *file, fl_owner_t id) | |||
1596 | return rc; | 1614 | return rc; |
1597 | } | 1615 | } |
1598 | 1616 | ||
1599 | ssize_t cifs_user_read(struct file *file, char __user *read_data, | 1617 | static int |
1600 | size_t read_size, loff_t *poffset) | 1618 | cifs_write_allocate_pages(struct page **pages, unsigned long num_pages) |
1601 | { | 1619 | { |
1602 | int rc = -EACCES; | 1620 | int rc = 0; |
1621 | unsigned long i; | ||
1622 | |||
1623 | for (i = 0; i < num_pages; i++) { | ||
1624 | pages[i] = alloc_page(__GFP_HIGHMEM); | ||
1625 | if (!pages[i]) { | ||
1626 | /* | ||
1627 | * save number of pages we have already allocated and | ||
1628 | * return with ENOMEM error | ||
1629 | */ | ||
1630 | num_pages = i; | ||
1631 | rc = -ENOMEM; | ||
1632 | goto error; | ||
1633 | } | ||
1634 | } | ||
1635 | |||
1636 | return rc; | ||
1637 | |||
1638 | error: | ||
1639 | for (i = 0; i < num_pages; i++) | ||
1640 | put_page(pages[i]); | ||
1641 | return rc; | ||
1642 | } | ||
1643 | |||
1644 | static inline | ||
1645 | size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len) | ||
1646 | { | ||
1647 | size_t num_pages; | ||
1648 | size_t clen; | ||
1649 | |||
1650 | clen = min_t(const size_t, len, wsize); | ||
1651 | num_pages = clen / PAGE_CACHE_SIZE; | ||
1652 | if (clen % PAGE_CACHE_SIZE) | ||
1653 | num_pages++; | ||
1654 | |||
1655 | if (cur_len) | ||
1656 | *cur_len = clen; | ||
1657 | |||
1658 | return num_pages; | ||
1659 | } | ||
1660 | |||
1661 | static ssize_t | ||
1662 | cifs_iovec_write(struct file *file, const struct iovec *iov, | ||
1663 | unsigned long nr_segs, loff_t *poffset) | ||
1664 | { | ||
1665 | unsigned int written; | ||
1666 | unsigned long num_pages, npages, i; | ||
1667 | size_t copied, len, cur_len; | ||
1668 | ssize_t total_written = 0; | ||
1669 | struct kvec *to_send; | ||
1670 | struct page **pages; | ||
1671 | struct iov_iter it; | ||
1672 | struct inode *inode; | ||
1673 | struct cifsFileInfo *open_file; | ||
1674 | struct cifsTconInfo *pTcon; | ||
1675 | struct cifs_sb_info *cifs_sb; | ||
1676 | int xid, rc; | ||
1677 | |||
1678 | len = iov_length(iov, nr_segs); | ||
1679 | if (!len) | ||
1680 | return 0; | ||
1681 | |||
1682 | rc = generic_write_checks(file, poffset, &len, 0); | ||
1683 | if (rc) | ||
1684 | return rc; | ||
1685 | |||
1686 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | ||
1687 | num_pages = get_numpages(cifs_sb->wsize, len, &cur_len); | ||
1688 | |||
1689 | pages = kmalloc(sizeof(struct pages *)*num_pages, GFP_KERNEL); | ||
1690 | if (!pages) | ||
1691 | return -ENOMEM; | ||
1692 | |||
1693 | to_send = kmalloc(sizeof(struct kvec)*(num_pages + 1), GFP_KERNEL); | ||
1694 | if (!to_send) { | ||
1695 | kfree(pages); | ||
1696 | return -ENOMEM; | ||
1697 | } | ||
1698 | |||
1699 | rc = cifs_write_allocate_pages(pages, num_pages); | ||
1700 | if (rc) { | ||
1701 | kfree(pages); | ||
1702 | kfree(to_send); | ||
1703 | return rc; | ||
1704 | } | ||
1705 | |||
1706 | xid = GetXid(); | ||
1707 | open_file = file->private_data; | ||
1708 | pTcon = tlink_tcon(open_file->tlink); | ||
1709 | inode = file->f_path.dentry->d_inode; | ||
1710 | |||
1711 | iov_iter_init(&it, iov, nr_segs, len, 0); | ||
1712 | npages = num_pages; | ||
1713 | |||
1714 | do { | ||
1715 | size_t save_len = cur_len; | ||
1716 | for (i = 0; i < npages; i++) { | ||
1717 | copied = min_t(const size_t, cur_len, PAGE_CACHE_SIZE); | ||
1718 | copied = iov_iter_copy_from_user(pages[i], &it, 0, | ||
1719 | copied); | ||
1720 | cur_len -= copied; | ||
1721 | iov_iter_advance(&it, copied); | ||
1722 | to_send[i+1].iov_base = kmap(pages[i]); | ||
1723 | to_send[i+1].iov_len = copied; | ||
1724 | } | ||
1725 | |||
1726 | cur_len = save_len - cur_len; | ||
1727 | |||
1728 | do { | ||
1729 | if (open_file->invalidHandle) { | ||
1730 | rc = cifs_reopen_file(open_file, false); | ||
1731 | if (rc != 0) | ||
1732 | break; | ||
1733 | } | ||
1734 | rc = CIFSSMBWrite2(xid, pTcon, open_file->netfid, | ||
1735 | cur_len, *poffset, &written, | ||
1736 | to_send, npages, 0); | ||
1737 | } while (rc == -EAGAIN); | ||
1738 | |||
1739 | for (i = 0; i < npages; i++) | ||
1740 | kunmap(pages[i]); | ||
1741 | |||
1742 | if (written) { | ||
1743 | len -= written; | ||
1744 | total_written += written; | ||
1745 | cifs_update_eof(CIFS_I(inode), *poffset, written); | ||
1746 | *poffset += written; | ||
1747 | } else if (rc < 0) { | ||
1748 | if (!total_written) | ||
1749 | total_written = rc; | ||
1750 | break; | ||
1751 | } | ||
1752 | |||
1753 | /* get length and number of kvecs of the next write */ | ||
1754 | npages = get_numpages(cifs_sb->wsize, len, &cur_len); | ||
1755 | } while (len > 0); | ||
1756 | |||
1757 | if (total_written > 0) { | ||
1758 | spin_lock(&inode->i_lock); | ||
1759 | if (*poffset > inode->i_size) | ||
1760 | i_size_write(inode, *poffset); | ||
1761 | spin_unlock(&inode->i_lock); | ||
1762 | } | ||
1763 | |||
1764 | cifs_stats_bytes_written(pTcon, total_written); | ||
1765 | mark_inode_dirty_sync(inode); | ||
1766 | |||
1767 | for (i = 0; i < num_pages; i++) | ||
1768 | put_page(pages[i]); | ||
1769 | kfree(to_send); | ||
1770 | kfree(pages); | ||
1771 | FreeXid(xid); | ||
1772 | return total_written; | ||
1773 | } | ||
1774 | |||
1775 | static ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, | ||
1776 | unsigned long nr_segs, loff_t pos) | ||
1777 | { | ||
1778 | ssize_t written; | ||
1779 | struct inode *inode; | ||
1780 | |||
1781 | inode = iocb->ki_filp->f_path.dentry->d_inode; | ||
1782 | |||
1783 | /* | ||
1784 | * BB - optimize the way when signing is disabled. We can drop this | ||
1785 | * extra memory-to-memory copying and use iovec buffers for constructing | ||
1786 | * write request. | ||
1787 | */ | ||
1788 | |||
1789 | written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos); | ||
1790 | if (written > 0) { | ||
1791 | CIFS_I(inode)->invalid_mapping = true; | ||
1792 | iocb->ki_pos = pos; | ||
1793 | } | ||
1794 | |||
1795 | return written; | ||
1796 | } | ||
1797 | |||
1798 | ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, | ||
1799 | unsigned long nr_segs, loff_t pos) | ||
1800 | { | ||
1801 | struct inode *inode; | ||
1802 | |||
1803 | inode = iocb->ki_filp->f_path.dentry->d_inode; | ||
1804 | |||
1805 | if (CIFS_I(inode)->clientCanCacheAll) | ||
1806 | return generic_file_aio_write(iocb, iov, nr_segs, pos); | ||
1807 | |||
1808 | /* | ||
1809 | * In strict cache mode we need to write the data to the server exactly | ||
1810 | * from the pos to pos+len-1 rather than flush all affected pages | ||
1811 | * because it may cause a error with mandatory locks on these pages but | ||
1812 | * not on the region from pos to ppos+len-1. | ||
1813 | */ | ||
1814 | |||
1815 | return cifs_user_writev(iocb, iov, nr_segs, pos); | ||
1816 | } | ||
1817 | |||
1818 | static ssize_t | ||
1819 | cifs_iovec_read(struct file *file, const struct iovec *iov, | ||
1820 | unsigned long nr_segs, loff_t *poffset) | ||
1821 | { | ||
1822 | int rc; | ||
1823 | int xid; | ||
1824 | ssize_t total_read; | ||
1603 | unsigned int bytes_read = 0; | 1825 | unsigned int bytes_read = 0; |
1604 | unsigned int total_read = 0; | 1826 | size_t len, cur_len; |
1605 | unsigned int current_read_size; | 1827 | int iov_offset = 0; |
1606 | struct cifs_sb_info *cifs_sb; | 1828 | struct cifs_sb_info *cifs_sb; |
1607 | struct cifsTconInfo *pTcon; | 1829 | struct cifsTconInfo *pTcon; |
1608 | int xid; | ||
1609 | struct cifsFileInfo *open_file; | 1830 | struct cifsFileInfo *open_file; |
1610 | char *smb_read_data; | ||
1611 | char __user *current_offset; | ||
1612 | struct smb_com_read_rsp *pSMBr; | 1831 | struct smb_com_read_rsp *pSMBr; |
1832 | char *read_data; | ||
1833 | |||
1834 | if (!nr_segs) | ||
1835 | return 0; | ||
1836 | |||
1837 | len = iov_length(iov, nr_segs); | ||
1838 | if (!len) | ||
1839 | return 0; | ||
1613 | 1840 | ||
1614 | xid = GetXid(); | 1841 | xid = GetXid(); |
1615 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 1842 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
1616 | 1843 | ||
1617 | if (file->private_data == NULL) { | ||
1618 | rc = -EBADF; | ||
1619 | FreeXid(xid); | ||
1620 | return rc; | ||
1621 | } | ||
1622 | open_file = file->private_data; | 1844 | open_file = file->private_data; |
1623 | pTcon = tlink_tcon(open_file->tlink); | 1845 | pTcon = tlink_tcon(open_file->tlink); |
1624 | 1846 | ||
1625 | if ((file->f_flags & O_ACCMODE) == O_WRONLY) | 1847 | if ((file->f_flags & O_ACCMODE) == O_WRONLY) |
1626 | cFYI(1, "attempting read on write only file instance"); | 1848 | cFYI(1, "attempting read on write only file instance"); |
1627 | 1849 | ||
1628 | for (total_read = 0, current_offset = read_data; | 1850 | for (total_read = 0; total_read < len; total_read += bytes_read) { |
1629 | read_size > total_read; | 1851 | cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize); |
1630 | total_read += bytes_read, current_offset += bytes_read) { | ||
1631 | current_read_size = min_t(const int, read_size - total_read, | ||
1632 | cifs_sb->rsize); | ||
1633 | rc = -EAGAIN; | 1852 | rc = -EAGAIN; |
1634 | smb_read_data = NULL; | 1853 | read_data = NULL; |
1854 | |||
1635 | while (rc == -EAGAIN) { | 1855 | while (rc == -EAGAIN) { |
1636 | int buf_type = CIFS_NO_BUFFER; | 1856 | int buf_type = CIFS_NO_BUFFER; |
1637 | if (open_file->invalidHandle) { | 1857 | if (open_file->invalidHandle) { |
@@ -1639,27 +1859,25 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data, | |||
1639 | if (rc != 0) | 1859 | if (rc != 0) |
1640 | break; | 1860 | break; |
1641 | } | 1861 | } |
1642 | rc = CIFSSMBRead(xid, pTcon, | 1862 | rc = CIFSSMBRead(xid, pTcon, open_file->netfid, |
1643 | open_file->netfid, | 1863 | cur_len, *poffset, &bytes_read, |
1644 | current_read_size, *poffset, | 1864 | &read_data, &buf_type); |
1645 | &bytes_read, &smb_read_data, | 1865 | pSMBr = (struct smb_com_read_rsp *)read_data; |
1646 | &buf_type); | 1866 | if (read_data) { |
1647 | pSMBr = (struct smb_com_read_rsp *)smb_read_data; | 1867 | char *data_offset = read_data + 4 + |
1648 | if (smb_read_data) { | 1868 | le16_to_cpu(pSMBr->DataOffset); |
1649 | if (copy_to_user(current_offset, | 1869 | if (memcpy_toiovecend(iov, data_offset, |
1650 | smb_read_data + | 1870 | iov_offset, bytes_read)) |
1651 | 4 /* RFC1001 length field */ + | ||
1652 | le16_to_cpu(pSMBr->DataOffset), | ||
1653 | bytes_read)) | ||
1654 | rc = -EFAULT; | 1871 | rc = -EFAULT; |
1655 | |||
1656 | if (buf_type == CIFS_SMALL_BUFFER) | 1872 | if (buf_type == CIFS_SMALL_BUFFER) |
1657 | cifs_small_buf_release(smb_read_data); | 1873 | cifs_small_buf_release(read_data); |
1658 | else if (buf_type == CIFS_LARGE_BUFFER) | 1874 | else if (buf_type == CIFS_LARGE_BUFFER) |
1659 | cifs_buf_release(smb_read_data); | 1875 | cifs_buf_release(read_data); |
1660 | smb_read_data = NULL; | 1876 | read_data = NULL; |
1877 | iov_offset += bytes_read; | ||
1661 | } | 1878 | } |
1662 | } | 1879 | } |
1880 | |||
1663 | if (rc || (bytes_read == 0)) { | 1881 | if (rc || (bytes_read == 0)) { |
1664 | if (total_read) { | 1882 | if (total_read) { |
1665 | break; | 1883 | break; |
@@ -1672,13 +1890,57 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data, | |||
1672 | *poffset += bytes_read; | 1890 | *poffset += bytes_read; |
1673 | } | 1891 | } |
1674 | } | 1892 | } |
1893 | |||
1675 | FreeXid(xid); | 1894 | FreeXid(xid); |
1676 | return total_read; | 1895 | return total_read; |
1677 | } | 1896 | } |
1678 | 1897 | ||
1898 | ssize_t cifs_user_read(struct file *file, char __user *read_data, | ||
1899 | size_t read_size, loff_t *poffset) | ||
1900 | { | ||
1901 | struct iovec iov; | ||
1902 | iov.iov_base = read_data; | ||
1903 | iov.iov_len = read_size; | ||
1904 | |||
1905 | return cifs_iovec_read(file, &iov, 1, poffset); | ||
1906 | } | ||
1907 | |||
1908 | static ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov, | ||
1909 | unsigned long nr_segs, loff_t pos) | ||
1910 | { | ||
1911 | ssize_t read; | ||
1912 | |||
1913 | read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos); | ||
1914 | if (read > 0) | ||
1915 | iocb->ki_pos = pos; | ||
1916 | |||
1917 | return read; | ||
1918 | } | ||
1919 | |||
1920 | ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, | ||
1921 | unsigned long nr_segs, loff_t pos) | ||
1922 | { | ||
1923 | struct inode *inode; | ||
1924 | |||
1925 | inode = iocb->ki_filp->f_path.dentry->d_inode; | ||
1926 | |||
1927 | if (CIFS_I(inode)->clientCanCacheRead) | ||
1928 | return generic_file_aio_read(iocb, iov, nr_segs, pos); | ||
1929 | |||
1930 | /* | ||
1931 | * In strict cache mode we need to read from the server all the time | ||
1932 | * if we don't have level II oplock because the server can delay mtime | ||
1933 | * change - so we can't make a decision about inode invalidating. | ||
1934 | * And we can also fail with pagereading if there are mandatory locks | ||
1935 | * on pages affected by this read but not on the region from pos to | ||
1936 | * pos+len-1. | ||
1937 | */ | ||
1938 | |||
1939 | return cifs_user_readv(iocb, iov, nr_segs, pos); | ||
1940 | } | ||
1679 | 1941 | ||
1680 | static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, | 1942 | static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, |
1681 | loff_t *poffset) | 1943 | loff_t *poffset) |
1682 | { | 1944 | { |
1683 | int rc = -EACCES; | 1945 | int rc = -EACCES; |
1684 | unsigned int bytes_read = 0; | 1946 | unsigned int bytes_read = 0; |
@@ -1746,6 +2008,21 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, | |||
1746 | return total_read; | 2008 | return total_read; |
1747 | } | 2009 | } |
1748 | 2010 | ||
2011 | int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) | ||
2012 | { | ||
2013 | int rc, xid; | ||
2014 | struct inode *inode = file->f_path.dentry->d_inode; | ||
2015 | |||
2016 | xid = GetXid(); | ||
2017 | |||
2018 | if (!CIFS_I(inode)->clientCanCacheRead) | ||
2019 | cifs_invalidate_mapping(inode); | ||
2020 | |||
2021 | rc = generic_file_mmap(file, vma); | ||
2022 | FreeXid(xid); | ||
2023 | return rc; | ||
2024 | } | ||
2025 | |||
1749 | int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) | 2026 | int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) |
1750 | { | 2027 | { |
1751 | int rc, xid; | 2028 | int rc, xid; |
@@ -2192,7 +2469,8 @@ void cifs_oplock_break(struct work_struct *work) | |||
2192 | */ | 2469 | */ |
2193 | if (!cfile->oplock_break_cancelled) { | 2470 | if (!cfile->oplock_break_cancelled) { |
2194 | rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->netfid, 0, | 2471 | rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->netfid, 0, |
2195 | 0, 0, 0, LOCKING_ANDX_OPLOCK_RELEASE, false); | 2472 | 0, 0, 0, LOCKING_ANDX_OPLOCK_RELEASE, false, |
2473 | cinode->clientCanCacheRead ? 1 : 0); | ||
2196 | cFYI(1, "Oplock release rc = %d", rc); | 2474 | cFYI(1, "Oplock release rc = %d", rc); |
2197 | } | 2475 | } |
2198 | 2476 | ||
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index b06b60620240..8852470b4fbb 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -32,7 +32,7 @@ | |||
32 | #include "fscache.h" | 32 | #include "fscache.h" |
33 | 33 | ||
34 | 34 | ||
35 | static void cifs_set_ops(struct inode *inode, const bool is_dfs_referral) | 35 | static void cifs_set_ops(struct inode *inode) |
36 | { | 36 | { |
37 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 37 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
38 | 38 | ||
@@ -44,13 +44,17 @@ static void cifs_set_ops(struct inode *inode, const bool is_dfs_referral) | |||
44 | inode->i_fop = &cifs_file_direct_nobrl_ops; | 44 | inode->i_fop = &cifs_file_direct_nobrl_ops; |
45 | else | 45 | else |
46 | inode->i_fop = &cifs_file_direct_ops; | 46 | inode->i_fop = &cifs_file_direct_ops; |
47 | } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { | ||
48 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) | ||
49 | inode->i_fop = &cifs_file_strict_nobrl_ops; | ||
50 | else | ||
51 | inode->i_fop = &cifs_file_strict_ops; | ||
47 | } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) | 52 | } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) |
48 | inode->i_fop = &cifs_file_nobrl_ops; | 53 | inode->i_fop = &cifs_file_nobrl_ops; |
49 | else { /* not direct, send byte range locks */ | 54 | else { /* not direct, send byte range locks */ |
50 | inode->i_fop = &cifs_file_ops; | 55 | inode->i_fop = &cifs_file_ops; |
51 | } | 56 | } |
52 | 57 | ||
53 | |||
54 | /* check if server can support readpages */ | 58 | /* check if server can support readpages */ |
55 | if (cifs_sb_master_tcon(cifs_sb)->ses->server->maxBuf < | 59 | if (cifs_sb_master_tcon(cifs_sb)->ses->server->maxBuf < |
56 | PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE) | 60 | PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE) |
@@ -60,7 +64,7 @@ static void cifs_set_ops(struct inode *inode, const bool is_dfs_referral) | |||
60 | break; | 64 | break; |
61 | case S_IFDIR: | 65 | case S_IFDIR: |
62 | #ifdef CONFIG_CIFS_DFS_UPCALL | 66 | #ifdef CONFIG_CIFS_DFS_UPCALL |
63 | if (is_dfs_referral) { | 67 | if (IS_AUTOMOUNT(inode)) { |
64 | inode->i_op = &cifs_dfs_referral_inode_operations; | 68 | inode->i_op = &cifs_dfs_referral_inode_operations; |
65 | } else { | 69 | } else { |
66 | #else /* NO DFS support, treat as a directory */ | 70 | #else /* NO DFS support, treat as a directory */ |
@@ -167,7 +171,9 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) | |||
167 | } | 171 | } |
168 | spin_unlock(&inode->i_lock); | 172 | spin_unlock(&inode->i_lock); |
169 | 173 | ||
170 | cifs_set_ops(inode, fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL); | 174 | if (fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL) |
175 | inode->i_flags |= S_AUTOMOUNT; | ||
176 | cifs_set_ops(inode); | ||
171 | } | 177 | } |
172 | 178 | ||
173 | void | 179 | void |
@@ -1677,7 +1683,7 @@ cifs_inode_needs_reval(struct inode *inode) | |||
1677 | /* | 1683 | /* |
1678 | * Zap the cache. Called when invalid_mapping flag is set. | 1684 | * Zap the cache. Called when invalid_mapping flag is set. |
1679 | */ | 1685 | */ |
1680 | static void | 1686 | void |
1681 | cifs_invalidate_mapping(struct inode *inode) | 1687 | cifs_invalidate_mapping(struct inode *inode) |
1682 | { | 1688 | { |
1683 | int rc; | 1689 | int rc; |
diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 306769de2fb5..e8804d373404 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c | |||
@@ -28,7 +28,6 @@ | |||
28 | #include "cifsproto.h" | 28 | #include "cifsproto.h" |
29 | #include "cifs_debug.h" | 29 | #include "cifs_debug.h" |
30 | #include "cifs_fs_sb.h" | 30 | #include "cifs_fs_sb.h" |
31 | #include "md5.h" | ||
32 | 31 | ||
33 | #define CIFS_MF_SYMLINK_LEN_OFFSET (4+1) | 32 | #define CIFS_MF_SYMLINK_LEN_OFFSET (4+1) |
34 | #define CIFS_MF_SYMLINK_MD5_OFFSET (CIFS_MF_SYMLINK_LEN_OFFSET+(4+1)) | 33 | #define CIFS_MF_SYMLINK_MD5_OFFSET (CIFS_MF_SYMLINK_LEN_OFFSET+(4+1)) |
@@ -47,6 +46,45 @@ | |||
47 | md5_hash[12], md5_hash[13], md5_hash[14], md5_hash[15] | 46 | md5_hash[12], md5_hash[13], md5_hash[14], md5_hash[15] |
48 | 47 | ||
49 | static int | 48 | static int |
49 | symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash) | ||
50 | { | ||
51 | int rc; | ||
52 | unsigned int size; | ||
53 | struct crypto_shash *md5; | ||
54 | struct sdesc *sdescmd5; | ||
55 | |||
56 | md5 = crypto_alloc_shash("md5", 0, 0); | ||
57 | if (IS_ERR(md5)) { | ||
58 | rc = PTR_ERR(md5); | ||
59 | cERROR(1, "%s: Crypto md5 allocation error %d\n", __func__, rc); | ||
60 | return rc; | ||
61 | } | ||
62 | size = sizeof(struct shash_desc) + crypto_shash_descsize(md5); | ||
63 | sdescmd5 = kmalloc(size, GFP_KERNEL); | ||
64 | if (!sdescmd5) { | ||
65 | rc = -ENOMEM; | ||
66 | cERROR(1, "%s: Memory allocation failure\n", __func__); | ||
67 | goto symlink_hash_err; | ||
68 | } | ||
69 | sdescmd5->shash.tfm = md5; | ||
70 | sdescmd5->shash.flags = 0x0; | ||
71 | |||
72 | rc = crypto_shash_init(&sdescmd5->shash); | ||
73 | if (rc) { | ||
74 | cERROR(1, "%s: Could not init md5 shash\n", __func__); | ||
75 | goto symlink_hash_err; | ||
76 | } | ||
77 | crypto_shash_update(&sdescmd5->shash, link_str, link_len); | ||
78 | rc = crypto_shash_final(&sdescmd5->shash, md5_hash); | ||
79 | |||
80 | symlink_hash_err: | ||
81 | crypto_free_shash(md5); | ||
82 | kfree(sdescmd5); | ||
83 | |||
84 | return rc; | ||
85 | } | ||
86 | |||
87 | static int | ||
50 | CIFSParseMFSymlink(const u8 *buf, | 88 | CIFSParseMFSymlink(const u8 *buf, |
51 | unsigned int buf_len, | 89 | unsigned int buf_len, |
52 | unsigned int *_link_len, | 90 | unsigned int *_link_len, |
@@ -56,7 +94,6 @@ CIFSParseMFSymlink(const u8 *buf, | |||
56 | unsigned int link_len; | 94 | unsigned int link_len; |
57 | const char *md5_str1; | 95 | const char *md5_str1; |
58 | const char *link_str; | 96 | const char *link_str; |
59 | struct MD5Context md5_ctx; | ||
60 | u8 md5_hash[16]; | 97 | u8 md5_hash[16]; |
61 | char md5_str2[34]; | 98 | char md5_str2[34]; |
62 | 99 | ||
@@ -70,9 +107,11 @@ CIFSParseMFSymlink(const u8 *buf, | |||
70 | if (rc != 1) | 107 | if (rc != 1) |
71 | return -EINVAL; | 108 | return -EINVAL; |
72 | 109 | ||
73 | cifs_MD5_init(&md5_ctx); | 110 | rc = symlink_hash(link_len, link_str, md5_hash); |
74 | cifs_MD5_update(&md5_ctx, (const u8 *)link_str, link_len); | 111 | if (rc) { |
75 | cifs_MD5_final(md5_hash, &md5_ctx); | 112 | cFYI(1, "%s: MD5 hash failure: %d\n", __func__, rc); |
113 | return rc; | ||
114 | } | ||
76 | 115 | ||
77 | snprintf(md5_str2, sizeof(md5_str2), | 116 | snprintf(md5_str2, sizeof(md5_str2), |
78 | CIFS_MF_SYMLINK_MD5_FORMAT, | 117 | CIFS_MF_SYMLINK_MD5_FORMAT, |
@@ -94,9 +133,9 @@ CIFSParseMFSymlink(const u8 *buf, | |||
94 | static int | 133 | static int |
95 | CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str) | 134 | CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str) |
96 | { | 135 | { |
136 | int rc; | ||
97 | unsigned int link_len; | 137 | unsigned int link_len; |
98 | unsigned int ofs; | 138 | unsigned int ofs; |
99 | struct MD5Context md5_ctx; | ||
100 | u8 md5_hash[16]; | 139 | u8 md5_hash[16]; |
101 | 140 | ||
102 | if (buf_len != CIFS_MF_SYMLINK_FILE_SIZE) | 141 | if (buf_len != CIFS_MF_SYMLINK_FILE_SIZE) |
@@ -107,9 +146,11 @@ CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str) | |||
107 | if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN) | 146 | if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN) |
108 | return -ENAMETOOLONG; | 147 | return -ENAMETOOLONG; |
109 | 148 | ||
110 | cifs_MD5_init(&md5_ctx); | 149 | rc = symlink_hash(link_len, link_str, md5_hash); |
111 | cifs_MD5_update(&md5_ctx, (const u8 *)link_str, link_len); | 150 | if (rc) { |
112 | cifs_MD5_final(md5_hash, &md5_ctx); | 151 | cFYI(1, "%s: MD5 hash failure: %d\n", __func__, rc); |
152 | return rc; | ||
153 | } | ||
113 | 154 | ||
114 | snprintf(buf, buf_len, | 155 | snprintf(buf, buf_len, |
115 | CIFS_MF_SYMLINK_LEN_FORMAT CIFS_MF_SYMLINK_MD5_FORMAT, | 156 | CIFS_MF_SYMLINK_LEN_FORMAT CIFS_MF_SYMLINK_MD5_FORMAT, |
diff --git a/fs/cifs/md4.c b/fs/cifs/md4.c deleted file mode 100644 index a725c2609d67..000000000000 --- a/fs/cifs/md4.c +++ /dev/null | |||
@@ -1,205 +0,0 @@ | |||
1 | /* | ||
2 | Unix SMB/Netbios implementation. | ||
3 | Version 1.9. | ||
4 | a implementation of MD4 designed for use in the SMB authentication protocol | ||
5 | Copyright (C) Andrew Tridgell 1997-1998. | ||
6 | Modified by Steve French (sfrench@us.ibm.com) 2002-2003 | ||
7 | |||
8 | This program is free software; you can redistribute it and/or modify | ||
9 | it under the terms of the GNU General Public License as published by | ||
10 | the Free Software Foundation; either version 2 of the License, or | ||
11 | (at your option) any later version. | ||
12 | |||
13 | This program is distributed in the hope that it will be useful, | ||
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | GNU General Public License for more details. | ||
17 | |||
18 | You should have received a copy of the GNU General Public License | ||
19 | along with this program; if not, write to the Free Software | ||
20 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
21 | */ | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/fs.h> | ||
24 | #include "cifsencrypt.h" | ||
25 | |||
26 | /* NOTE: This code makes no attempt to be fast! */ | ||
27 | |||
28 | static __u32 | ||
29 | F(__u32 X, __u32 Y, __u32 Z) | ||
30 | { | ||
31 | return (X & Y) | ((~X) & Z); | ||
32 | } | ||
33 | |||
34 | static __u32 | ||
35 | G(__u32 X, __u32 Y, __u32 Z) | ||
36 | { | ||
37 | return (X & Y) | (X & Z) | (Y & Z); | ||
38 | } | ||
39 | |||
40 | static __u32 | ||
41 | H(__u32 X, __u32 Y, __u32 Z) | ||
42 | { | ||
43 | return X ^ Y ^ Z; | ||
44 | } | ||
45 | |||
46 | static __u32 | ||
47 | lshift(__u32 x, int s) | ||
48 | { | ||
49 | x &= 0xFFFFFFFF; | ||
50 | return ((x << s) & 0xFFFFFFFF) | (x >> (32 - s)); | ||
51 | } | ||
52 | |||
53 | #define ROUND1(a,b,c,d,k,s) (*a) = lshift((*a) + F(*b,*c,*d) + X[k], s) | ||
54 | #define ROUND2(a,b,c,d,k,s) (*a) = lshift((*a) + G(*b,*c,*d) + X[k] + (__u32)0x5A827999,s) | ||
55 | #define ROUND3(a,b,c,d,k,s) (*a) = lshift((*a) + H(*b,*c,*d) + X[k] + (__u32)0x6ED9EBA1,s) | ||
56 | |||
57 | /* this applies md4 to 64 byte chunks */ | ||
58 | static void | ||
59 | mdfour64(__u32 *M, __u32 *A, __u32 *B, __u32 *C, __u32 *D) | ||
60 | { | ||
61 | int j; | ||
62 | __u32 AA, BB, CC, DD; | ||
63 | __u32 X[16]; | ||
64 | |||
65 | |||
66 | for (j = 0; j < 16; j++) | ||
67 | X[j] = M[j]; | ||
68 | |||
69 | AA = *A; | ||
70 | BB = *B; | ||
71 | CC = *C; | ||
72 | DD = *D; | ||
73 | |||
74 | ROUND1(A, B, C, D, 0, 3); | ||
75 | ROUND1(D, A, B, C, 1, 7); | ||
76 | ROUND1(C, D, A, B, 2, 11); | ||
77 | ROUND1(B, C, D, A, 3, 19); | ||
78 | ROUND1(A, B, C, D, 4, 3); | ||
79 | ROUND1(D, A, B, C, 5, 7); | ||
80 | ROUND1(C, D, A, B, 6, 11); | ||
81 | ROUND1(B, C, D, A, 7, 19); | ||
82 | ROUND1(A, B, C, D, 8, 3); | ||
83 | ROUND1(D, A, B, C, 9, 7); | ||
84 | ROUND1(C, D, A, B, 10, 11); | ||
85 | ROUND1(B, C, D, A, 11, 19); | ||
86 | ROUND1(A, B, C, D, 12, 3); | ||
87 | ROUND1(D, A, B, C, 13, 7); | ||
88 | ROUND1(C, D, A, B, 14, 11); | ||
89 | ROUND1(B, C, D, A, 15, 19); | ||
90 | |||
91 | ROUND2(A, B, C, D, 0, 3); | ||
92 | ROUND2(D, A, B, C, 4, 5); | ||
93 | ROUND2(C, D, A, B, 8, 9); | ||
94 | ROUND2(B, C, D, A, 12, 13); | ||
95 | ROUND2(A, B, C, D, 1, 3); | ||
96 | ROUND2(D, A, B, C, 5, 5); | ||
97 | ROUND2(C, D, A, B, 9, 9); | ||
98 | ROUND2(B, C, D, A, 13, 13); | ||
99 | ROUND2(A, B, C, D, 2, 3); | ||
100 | ROUND2(D, A, B, C, 6, 5); | ||
101 | ROUND2(C, D, A, B, 10, 9); | ||
102 | ROUND2(B, C, D, A, 14, 13); | ||
103 | ROUND2(A, B, C, D, 3, 3); | ||
104 | ROUND2(D, A, B, C, 7, 5); | ||
105 | ROUND2(C, D, A, B, 11, 9); | ||
106 | ROUND2(B, C, D, A, 15, 13); | ||
107 | |||
108 | ROUND3(A, B, C, D, 0, 3); | ||
109 | ROUND3(D, A, B, C, 8, 9); | ||
110 | ROUND3(C, D, A, B, 4, 11); | ||
111 | ROUND3(B, C, D, A, 12, 15); | ||
112 | ROUND3(A, B, C, D, 2, 3); | ||
113 | ROUND3(D, A, B, C, 10, 9); | ||
114 | ROUND3(C, D, A, B, 6, 11); | ||
115 | ROUND3(B, C, D, A, 14, 15); | ||
116 | ROUND3(A, B, C, D, 1, 3); | ||
117 | ROUND3(D, A, B, C, 9, 9); | ||
118 | ROUND3(C, D, A, B, 5, 11); | ||
119 | ROUND3(B, C, D, A, 13, 15); | ||
120 | ROUND3(A, B, C, D, 3, 3); | ||
121 | ROUND3(D, A, B, C, 11, 9); | ||
122 | ROUND3(C, D, A, B, 7, 11); | ||
123 | ROUND3(B, C, D, A, 15, 15); | ||
124 | |||
125 | *A += AA; | ||
126 | *B += BB; | ||
127 | *C += CC; | ||
128 | *D += DD; | ||
129 | |||
130 | *A &= 0xFFFFFFFF; | ||
131 | *B &= 0xFFFFFFFF; | ||
132 | *C &= 0xFFFFFFFF; | ||
133 | *D &= 0xFFFFFFFF; | ||
134 | |||
135 | for (j = 0; j < 16; j++) | ||
136 | X[j] = 0; | ||
137 | } | ||
138 | |||
139 | static void | ||
140 | copy64(__u32 *M, unsigned char *in) | ||
141 | { | ||
142 | int i; | ||
143 | |||
144 | for (i = 0; i < 16; i++) | ||
145 | M[i] = (in[i * 4 + 3] << 24) | (in[i * 4 + 2] << 16) | | ||
146 | (in[i * 4 + 1] << 8) | (in[i * 4 + 0] << 0); | ||
147 | } | ||
148 | |||
149 | static void | ||
150 | copy4(unsigned char *out, __u32 x) | ||
151 | { | ||
152 | out[0] = x & 0xFF; | ||
153 | out[1] = (x >> 8) & 0xFF; | ||
154 | out[2] = (x >> 16) & 0xFF; | ||
155 | out[3] = (x >> 24) & 0xFF; | ||
156 | } | ||
157 | |||
158 | /* produce a md4 message digest from data of length n bytes */ | ||
159 | void | ||
160 | mdfour(unsigned char *out, unsigned char *in, int n) | ||
161 | { | ||
162 | unsigned char buf[128]; | ||
163 | __u32 M[16]; | ||
164 | __u32 b = n * 8; | ||
165 | int i; | ||
166 | __u32 A = 0x67452301; | ||
167 | __u32 B = 0xefcdab89; | ||
168 | __u32 C = 0x98badcfe; | ||
169 | __u32 D = 0x10325476; | ||
170 | |||
171 | while (n > 64) { | ||
172 | copy64(M, in); | ||
173 | mdfour64(M, &A, &B, &C, &D); | ||
174 | in += 64; | ||
175 | n -= 64; | ||
176 | } | ||
177 | |||
178 | for (i = 0; i < 128; i++) | ||
179 | buf[i] = 0; | ||
180 | memcpy(buf, in, n); | ||
181 | buf[n] = 0x80; | ||
182 | |||
183 | if (n <= 55) { | ||
184 | copy4(buf + 56, b); | ||
185 | copy64(M, buf); | ||
186 | mdfour64(M, &A, &B, &C, &D); | ||
187 | } else { | ||
188 | copy4(buf + 120, b); | ||
189 | copy64(M, buf); | ||
190 | mdfour64(M, &A, &B, &C, &D); | ||
191 | copy64(M, buf + 64); | ||
192 | mdfour64(M, &A, &B, &C, &D); | ||
193 | } | ||
194 | |||
195 | for (i = 0; i < 128; i++) | ||
196 | buf[i] = 0; | ||
197 | copy64(M, buf); | ||
198 | |||
199 | copy4(out, A); | ||
200 | copy4(out + 4, B); | ||
201 | copy4(out + 8, C); | ||
202 | copy4(out + 12, D); | ||
203 | |||
204 | A = B = C = D = 0; | ||
205 | } | ||
diff --git a/fs/cifs/md5.c b/fs/cifs/md5.c deleted file mode 100644 index 98b66a54c319..000000000000 --- a/fs/cifs/md5.c +++ /dev/null | |||
@@ -1,366 +0,0 @@ | |||
1 | /* | ||
2 | * This code implements the MD5 message-digest algorithm. | ||
3 | * The algorithm is due to Ron Rivest. This code was | ||
4 | * written by Colin Plumb in 1993, no copyright is claimed. | ||
5 | * This code is in the public domain; do with it what you wish. | ||
6 | * | ||
7 | * Equivalent code is available from RSA Data Security, Inc. | ||
8 | * This code has been tested against that, and is equivalent, | ||
9 | * except that you don't need to include two pages of legalese | ||
10 | * with every copy. | ||
11 | * | ||
12 | * To compute the message digest of a chunk of bytes, declare an | ||
13 | * MD5Context structure, pass it to cifs_MD5_init, call cifs_MD5_update as | ||
14 | * needed on buffers full of bytes, and then call cifs_MD5_final, which | ||
15 | * will fill a supplied 16-byte array with the digest. | ||
16 | */ | ||
17 | |||
18 | /* This code slightly modified to fit into Samba by | ||
19 | abartlet@samba.org Jun 2001 | ||
20 | and to fit the cifs vfs by | ||
21 | Steve French sfrench@us.ibm.com */ | ||
22 | |||
23 | #include <linux/string.h> | ||
24 | #include "md5.h" | ||
25 | |||
26 | static void MD5Transform(__u32 buf[4], __u32 const in[16]); | ||
27 | |||
28 | /* | ||
29 | * Note: this code is harmless on little-endian machines. | ||
30 | */ | ||
31 | static void | ||
32 | byteReverse(unsigned char *buf, unsigned longs) | ||
33 | { | ||
34 | __u32 t; | ||
35 | do { | ||
36 | t = (__u32) ((unsigned) buf[3] << 8 | buf[2]) << 16 | | ||
37 | ((unsigned) buf[1] << 8 | buf[0]); | ||
38 | *(__u32 *) buf = t; | ||
39 | buf += 4; | ||
40 | } while (--longs); | ||
41 | } | ||
42 | |||
43 | /* | ||
44 | * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious | ||
45 | * initialization constants. | ||
46 | */ | ||
47 | void | ||
48 | cifs_MD5_init(struct MD5Context *ctx) | ||
49 | { | ||
50 | ctx->buf[0] = 0x67452301; | ||
51 | ctx->buf[1] = 0xefcdab89; | ||
52 | ctx->buf[2] = 0x98badcfe; | ||
53 | ctx->buf[3] = 0x10325476; | ||
54 | |||
55 | ctx->bits[0] = 0; | ||
56 | ctx->bits[1] = 0; | ||
57 | } | ||
58 | |||
59 | /* | ||
60 | * Update context to reflect the concatenation of another buffer full | ||
61 | * of bytes. | ||
62 | */ | ||
63 | void | ||
64 | cifs_MD5_update(struct MD5Context *ctx, unsigned char const *buf, unsigned len) | ||
65 | { | ||
66 | register __u32 t; | ||
67 | |||
68 | /* Update bitcount */ | ||
69 | |||
70 | t = ctx->bits[0]; | ||
71 | if ((ctx->bits[0] = t + ((__u32) len << 3)) < t) | ||
72 | ctx->bits[1]++; /* Carry from low to high */ | ||
73 | ctx->bits[1] += len >> 29; | ||
74 | |||
75 | t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */ | ||
76 | |||
77 | /* Handle any leading odd-sized chunks */ | ||
78 | |||
79 | if (t) { | ||
80 | unsigned char *p = (unsigned char *) ctx->in + t; | ||
81 | |||
82 | t = 64 - t; | ||
83 | if (len < t) { | ||
84 | memmove(p, buf, len); | ||
85 | return; | ||
86 | } | ||
87 | memmove(p, buf, t); | ||
88 | byteReverse(ctx->in, 16); | ||
89 | MD5Transform(ctx->buf, (__u32 *) ctx->in); | ||
90 | buf += t; | ||
91 | len -= t; | ||
92 | } | ||
93 | /* Process data in 64-byte chunks */ | ||
94 | |||
95 | while (len >= 64) { | ||
96 | memmove(ctx->in, buf, 64); | ||
97 | byteReverse(ctx->in, 16); | ||
98 | MD5Transform(ctx->buf, (__u32 *) ctx->in); | ||
99 | buf += 64; | ||
100 | len -= 64; | ||
101 | } | ||
102 | |||
103 | /* Handle any remaining bytes of data. */ | ||
104 | |||
105 | memmove(ctx->in, buf, len); | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * Final wrapup - pad to 64-byte boundary with the bit pattern | ||
110 | * 1 0* (64-bit count of bits processed, MSB-first) | ||
111 | */ | ||
112 | void | ||
113 | cifs_MD5_final(unsigned char digest[16], struct MD5Context *ctx) | ||
114 | { | ||
115 | unsigned int count; | ||
116 | unsigned char *p; | ||
117 | |||
118 | /* Compute number of bytes mod 64 */ | ||
119 | count = (ctx->bits[0] >> 3) & 0x3F; | ||
120 | |||
121 | /* Set the first char of padding to 0x80. This is safe since there is | ||
122 | always at least one byte free */ | ||
123 | p = ctx->in + count; | ||
124 | *p++ = 0x80; | ||
125 | |||
126 | /* Bytes of padding needed to make 64 bytes */ | ||
127 | count = 64 - 1 - count; | ||
128 | |||
129 | /* Pad out to 56 mod 64 */ | ||
130 | if (count < 8) { | ||
131 | /* Two lots of padding: Pad the first block to 64 bytes */ | ||
132 | memset(p, 0, count); | ||
133 | byteReverse(ctx->in, 16); | ||
134 | MD5Transform(ctx->buf, (__u32 *) ctx->in); | ||
135 | |||
136 | /* Now fill the next block with 56 bytes */ | ||
137 | memset(ctx->in, 0, 56); | ||
138 | } else { | ||
139 | /* Pad block to 56 bytes */ | ||
140 | memset(p, 0, count - 8); | ||
141 | } | ||
142 | byteReverse(ctx->in, 14); | ||
143 | |||
144 | /* Append length in bits and transform */ | ||
145 | ((__u32 *) ctx->in)[14] = ctx->bits[0]; | ||
146 | ((__u32 *) ctx->in)[15] = ctx->bits[1]; | ||
147 | |||
148 | MD5Transform(ctx->buf, (__u32 *) ctx->in); | ||
149 | byteReverse((unsigned char *) ctx->buf, 4); | ||
150 | memmove(digest, ctx->buf, 16); | ||
151 | memset(ctx, 0, sizeof(*ctx)); /* In case it's sensitive */ | ||
152 | } | ||
153 | |||
154 | /* The four core functions - F1 is optimized somewhat */ | ||
155 | |||
156 | /* #define F1(x, y, z) (x & y | ~x & z) */ | ||
157 | #define F1(x, y, z) (z ^ (x & (y ^ z))) | ||
158 | #define F2(x, y, z) F1(z, x, y) | ||
159 | #define F3(x, y, z) (x ^ y ^ z) | ||
160 | #define F4(x, y, z) (y ^ (x | ~z)) | ||
161 | |||
162 | /* This is the central step in the MD5 algorithm. */ | ||
163 | #define MD5STEP(f, w, x, y, z, data, s) \ | ||
164 | (w += f(x, y, z) + data, w = w<<s | w>>(32-s), w += x) | ||
165 | |||
166 | /* | ||
167 | * The core of the MD5 algorithm, this alters an existing MD5 hash to | ||
168 | * reflect the addition of 16 longwords of new data. cifs_MD5_update blocks | ||
169 | * the data and converts bytes into longwords for this routine. | ||
170 | */ | ||
171 | static void | ||
172 | MD5Transform(__u32 buf[4], __u32 const in[16]) | ||
173 | { | ||
174 | register __u32 a, b, c, d; | ||
175 | |||
176 | a = buf[0]; | ||
177 | b = buf[1]; | ||
178 | c = buf[2]; | ||
179 | d = buf[3]; | ||
180 | |||
181 | MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7); | ||
182 | MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12); | ||
183 | MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17); | ||
184 | MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22); | ||
185 | MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7); | ||
186 | MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12); | ||
187 | MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17); | ||
188 | MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22); | ||
189 | MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7); | ||
190 | MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12); | ||
191 | MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); | ||
192 | MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); | ||
193 | MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); | ||
194 | MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); | ||
195 | MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); | ||
196 | MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); | ||
197 | |||
198 | MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5); | ||
199 | MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9); | ||
200 | MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); | ||
201 | MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); | ||
202 | MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5); | ||
203 | MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); | ||
204 | MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); | ||
205 | MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); | ||
206 | MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5); | ||
207 | MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); | ||
208 | MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14); | ||
209 | MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20); | ||
210 | MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); | ||
211 | MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); | ||
212 | MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14); | ||
213 | MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); | ||
214 | |||
215 | MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4); | ||
216 | MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11); | ||
217 | MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); | ||
218 | MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); | ||
219 | MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4); | ||
220 | MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); | ||
221 | MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); | ||
222 | MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); | ||
223 | MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); | ||
224 | MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11); | ||
225 | MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16); | ||
226 | MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23); | ||
227 | MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4); | ||
228 | MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); | ||
229 | MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); | ||
230 | MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23); | ||
231 | |||
232 | MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6); | ||
233 | MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10); | ||
234 | MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); | ||
235 | MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21); | ||
236 | MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); | ||
237 | MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); | ||
238 | MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); | ||
239 | MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21); | ||
240 | MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); | ||
241 | MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); | ||
242 | MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15); | ||
243 | MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); | ||
244 | MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6); | ||
245 | MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); | ||
246 | MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); | ||
247 | MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21); | ||
248 | |||
249 | buf[0] += a; | ||
250 | buf[1] += b; | ||
251 | buf[2] += c; | ||
252 | buf[3] += d; | ||
253 | } | ||
254 | |||
255 | #if 0 /* currently unused */ | ||
256 | /*********************************************************************** | ||
257 | the rfc 2104 version of hmac_md5 initialisation. | ||
258 | ***********************************************************************/ | ||
259 | static void | ||
260 | hmac_md5_init_rfc2104(unsigned char *key, int key_len, | ||
261 | struct HMACMD5Context *ctx) | ||
262 | { | ||
263 | int i; | ||
264 | |||
265 | /* if key is longer than 64 bytes reset it to key=MD5(key) */ | ||
266 | if (key_len > 64) { | ||
267 | unsigned char tk[16]; | ||
268 | struct MD5Context tctx; | ||
269 | |||
270 | cifs_MD5_init(&tctx); | ||
271 | cifs_MD5_update(&tctx, key, key_len); | ||
272 | cifs_MD5_final(tk, &tctx); | ||
273 | |||
274 | key = tk; | ||
275 | key_len = 16; | ||
276 | } | ||
277 | |||
278 | /* start out by storing key in pads */ | ||
279 | memset(ctx->k_ipad, 0, sizeof(ctx->k_ipad)); | ||
280 | memset(ctx->k_opad, 0, sizeof(ctx->k_opad)); | ||
281 | memcpy(ctx->k_ipad, key, key_len); | ||
282 | memcpy(ctx->k_opad, key, key_len); | ||
283 | |||
284 | /* XOR key with ipad and opad values */ | ||
285 | for (i = 0; i < 64; i++) { | ||
286 | ctx->k_ipad[i] ^= 0x36; | ||
287 | ctx->k_opad[i] ^= 0x5c; | ||
288 | } | ||
289 | |||
290 | cifs_MD5_init(&ctx->ctx); | ||
291 | cifs_MD5_update(&ctx->ctx, ctx->k_ipad, 64); | ||
292 | } | ||
293 | #endif | ||
294 | |||
295 | /*********************************************************************** | ||
296 | the microsoft version of hmac_md5 initialisation. | ||
297 | ***********************************************************************/ | ||
298 | void | ||
299 | hmac_md5_init_limK_to_64(const unsigned char *key, int key_len, | ||
300 | struct HMACMD5Context *ctx) | ||
301 | { | ||
302 | int i; | ||
303 | |||
304 | /* if key is longer than 64 bytes truncate it */ | ||
305 | if (key_len > 64) | ||
306 | key_len = 64; | ||
307 | |||
308 | /* start out by storing key in pads */ | ||
309 | memset(ctx->k_ipad, 0, sizeof(ctx->k_ipad)); | ||
310 | memset(ctx->k_opad, 0, sizeof(ctx->k_opad)); | ||
311 | memcpy(ctx->k_ipad, key, key_len); | ||
312 | memcpy(ctx->k_opad, key, key_len); | ||
313 | |||
314 | /* XOR key with ipad and opad values */ | ||
315 | for (i = 0; i < 64; i++) { | ||
316 | ctx->k_ipad[i] ^= 0x36; | ||
317 | ctx->k_opad[i] ^= 0x5c; | ||
318 | } | ||
319 | |||
320 | cifs_MD5_init(&ctx->ctx); | ||
321 | cifs_MD5_update(&ctx->ctx, ctx->k_ipad, 64); | ||
322 | } | ||
323 | |||
324 | /*********************************************************************** | ||
325 | update hmac_md5 "inner" buffer | ||
326 | ***********************************************************************/ | ||
327 | void | ||
328 | hmac_md5_update(const unsigned char *text, int text_len, | ||
329 | struct HMACMD5Context *ctx) | ||
330 | { | ||
331 | cifs_MD5_update(&ctx->ctx, text, text_len); /* then text of datagram */ | ||
332 | } | ||
333 | |||
334 | /*********************************************************************** | ||
335 | finish off hmac_md5 "inner" buffer and generate outer one. | ||
336 | ***********************************************************************/ | ||
337 | void | ||
338 | hmac_md5_final(unsigned char *digest, struct HMACMD5Context *ctx) | ||
339 | { | ||
340 | struct MD5Context ctx_o; | ||
341 | |||
342 | cifs_MD5_final(digest, &ctx->ctx); | ||
343 | |||
344 | cifs_MD5_init(&ctx_o); | ||
345 | cifs_MD5_update(&ctx_o, ctx->k_opad, 64); | ||
346 | cifs_MD5_update(&ctx_o, digest, 16); | ||
347 | cifs_MD5_final(digest, &ctx_o); | ||
348 | } | ||
349 | |||
350 | /*********************************************************** | ||
351 | single function to calculate an HMAC MD5 digest from data. | ||
352 | use the microsoft hmacmd5 init method because the key is 16 bytes. | ||
353 | ************************************************************/ | ||
354 | #if 0 /* currently unused */ | ||
355 | static void | ||
356 | hmac_md5(unsigned char key[16], unsigned char *data, int data_len, | ||
357 | unsigned char *digest) | ||
358 | { | ||
359 | struct HMACMD5Context ctx; | ||
360 | hmac_md5_init_limK_to_64(key, 16, &ctx); | ||
361 | if (data_len != 0) | ||
362 | hmac_md5_update(data, data_len, &ctx); | ||
363 | |||
364 | hmac_md5_final(digest, &ctx); | ||
365 | } | ||
366 | #endif | ||
diff --git a/fs/cifs/md5.h b/fs/cifs/md5.h deleted file mode 100644 index 6fba8cb402fd..000000000000 --- a/fs/cifs/md5.h +++ /dev/null | |||
@@ -1,38 +0,0 @@ | |||
1 | #ifndef MD5_H | ||
2 | #define MD5_H | ||
3 | #ifndef HEADER_MD5_H | ||
4 | /* Try to avoid clashes with OpenSSL */ | ||
5 | #define HEADER_MD5_H | ||
6 | #endif | ||
7 | |||
8 | struct MD5Context { | ||
9 | __u32 buf[4]; | ||
10 | __u32 bits[2]; | ||
11 | unsigned char in[64]; | ||
12 | }; | ||
13 | #endif /* !MD5_H */ | ||
14 | |||
15 | #ifndef _HMAC_MD5_H | ||
16 | struct HMACMD5Context { | ||
17 | struct MD5Context ctx; | ||
18 | unsigned char k_ipad[65]; | ||
19 | unsigned char k_opad[65]; | ||
20 | }; | ||
21 | #endif /* _HMAC_MD5_H */ | ||
22 | |||
23 | void cifs_MD5_init(struct MD5Context *context); | ||
24 | void cifs_MD5_update(struct MD5Context *context, unsigned char const *buf, | ||
25 | unsigned len); | ||
26 | void cifs_MD5_final(unsigned char digest[16], struct MD5Context *context); | ||
27 | |||
28 | /* The following definitions come from lib/hmacmd5.c */ | ||
29 | |||
30 | /* void hmac_md5_init_rfc2104(unsigned char *key, int key_len, | ||
31 | struct HMACMD5Context *ctx);*/ | ||
32 | void hmac_md5_init_limK_to_64(const unsigned char *key, int key_len, | ||
33 | struct HMACMD5Context *ctx); | ||
34 | void hmac_md5_update(const unsigned char *text, int text_len, | ||
35 | struct HMACMD5Context *ctx); | ||
36 | void hmac_md5_final(unsigned char *digest, struct HMACMD5Context *ctx); | ||
37 | /* void hmac_md5(unsigned char key[16], unsigned char *data, int data_len, | ||
38 | unsigned char *digest);*/ | ||
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 43f10281bc19..2a930a752a78 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c | |||
@@ -236,10 +236,7 @@ __u16 GetNextMid(struct TCP_Server_Info *server) | |||
236 | { | 236 | { |
237 | __u16 mid = 0; | 237 | __u16 mid = 0; |
238 | __u16 last_mid; | 238 | __u16 last_mid; |
239 | int collision; | 239 | bool collision; |
240 | |||
241 | if (server == NULL) | ||
242 | return mid; | ||
243 | 240 | ||
244 | spin_lock(&GlobalMid_Lock); | 241 | spin_lock(&GlobalMid_Lock); |
245 | last_mid = server->CurrentMid; /* we do not want to loop forever */ | 242 | last_mid = server->CurrentMid; /* we do not want to loop forever */ |
@@ -252,24 +249,38 @@ __u16 GetNextMid(struct TCP_Server_Info *server) | |||
252 | (and it would also have to have been a request that | 249 | (and it would also have to have been a request that |
253 | did not time out) */ | 250 | did not time out) */ |
254 | while (server->CurrentMid != last_mid) { | 251 | while (server->CurrentMid != last_mid) { |
255 | struct list_head *tmp; | ||
256 | struct mid_q_entry *mid_entry; | 252 | struct mid_q_entry *mid_entry; |
253 | unsigned int num_mids; | ||
257 | 254 | ||
258 | collision = 0; | 255 | collision = false; |
259 | if (server->CurrentMid == 0) | 256 | if (server->CurrentMid == 0) |
260 | server->CurrentMid++; | 257 | server->CurrentMid++; |
261 | 258 | ||
262 | list_for_each(tmp, &server->pending_mid_q) { | 259 | num_mids = 0; |
263 | mid_entry = list_entry(tmp, struct mid_q_entry, qhead); | 260 | list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) { |
264 | 261 | ++num_mids; | |
265 | if ((mid_entry->mid == server->CurrentMid) && | 262 | if (mid_entry->mid == server->CurrentMid && |
266 | (mid_entry->midState == MID_REQUEST_SUBMITTED)) { | 263 | mid_entry->midState == MID_REQUEST_SUBMITTED) { |
267 | /* This mid is in use, try a different one */ | 264 | /* This mid is in use, try a different one */ |
268 | collision = 1; | 265 | collision = true; |
269 | break; | 266 | break; |
270 | } | 267 | } |
271 | } | 268 | } |
272 | if (collision == 0) { | 269 | |
270 | /* | ||
271 | * if we have more than 32k mids in the list, then something | ||
272 | * is very wrong. Possibly a local user is trying to DoS the | ||
273 | * box by issuing long-running calls and SIGKILL'ing them. If | ||
274 | * we get to 2^16 mids then we're in big trouble as this | ||
275 | * function could loop forever. | ||
276 | * | ||
277 | * Go ahead and assign out the mid in this situation, but force | ||
278 | * an eventual reconnect to clean out the pending_mid_q. | ||
279 | */ | ||
280 | if (num_mids > 32768) | ||
281 | server->tcpStatus = CifsNeedReconnect; | ||
282 | |||
283 | if (!collision) { | ||
273 | mid = server->CurrentMid; | 284 | mid = server->CurrentMid; |
274 | break; | 285 | break; |
275 | } | 286 | } |
@@ -381,29 +392,31 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , | |||
381 | } | 392 | } |
382 | 393 | ||
383 | static int | 394 | static int |
384 | checkSMBhdr(struct smb_hdr *smb, __u16 mid) | 395 | check_smb_hdr(struct smb_hdr *smb, __u16 mid) |
385 | { | 396 | { |
386 | /* Make sure that this really is an SMB, that it is a response, | 397 | /* does it have the right SMB "signature" ? */ |
387 | and that the message ids match */ | 398 | if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff)) { |
388 | if ((*(__le32 *) smb->Protocol == cpu_to_le32(0x424d53ff)) && | 399 | cERROR(1, "Bad protocol string signature header 0x%x", |
389 | (mid == smb->Mid)) { | 400 | *(unsigned int *)smb->Protocol); |
390 | if (smb->Flags & SMBFLG_RESPONSE) | 401 | return 1; |
391 | return 0; | ||
392 | else { | ||
393 | /* only one valid case where server sends us request */ | ||
394 | if (smb->Command == SMB_COM_LOCKING_ANDX) | ||
395 | return 0; | ||
396 | else | ||
397 | cERROR(1, "Received Request not response"); | ||
398 | } | ||
399 | } else { /* bad signature or mid */ | ||
400 | if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff)) | ||
401 | cERROR(1, "Bad protocol string signature header %x", | ||
402 | *(unsigned int *) smb->Protocol); | ||
403 | if (mid != smb->Mid) | ||
404 | cERROR(1, "Mids do not match"); | ||
405 | } | 402 | } |
406 | cERROR(1, "bad smb detected. The Mid=%d", smb->Mid); | 403 | |
404 | /* Make sure that message ids match */ | ||
405 | if (mid != smb->Mid) { | ||
406 | cERROR(1, "Mids do not match. received=%u expected=%u", | ||
407 | smb->Mid, mid); | ||
408 | return 1; | ||
409 | } | ||
410 | |||
411 | /* if it's a response then accept */ | ||
412 | if (smb->Flags & SMBFLG_RESPONSE) | ||
413 | return 0; | ||
414 | |||
415 | /* only one valid case where server sends us request */ | ||
416 | if (smb->Command == SMB_COM_LOCKING_ANDX) | ||
417 | return 0; | ||
418 | |||
419 | cERROR(1, "Server sent request, not response. mid=%u", smb->Mid); | ||
407 | return 1; | 420 | return 1; |
408 | } | 421 | } |
409 | 422 | ||
@@ -448,7 +461,7 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length) | |||
448 | return 1; | 461 | return 1; |
449 | } | 462 | } |
450 | 463 | ||
451 | if (checkSMBhdr(smb, mid)) | 464 | if (check_smb_hdr(smb, mid)) |
452 | return 1; | 465 | return 1; |
453 | clc_len = smbCalcSize_LE(smb); | 466 | clc_len = smbCalcSize_LE(smb); |
454 | 467 | ||
@@ -465,25 +478,26 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length) | |||
465 | if (((4 + len) & 0xFFFF) == (clc_len & 0xFFFF)) | 478 | if (((4 + len) & 0xFFFF) == (clc_len & 0xFFFF)) |
466 | return 0; /* bcc wrapped */ | 479 | return 0; /* bcc wrapped */ |
467 | } | 480 | } |
468 | cFYI(1, "Calculated size %d vs length %d mismatch for mid %d", | 481 | cFYI(1, "Calculated size %u vs length %u mismatch for mid=%u", |
469 | clc_len, 4 + len, smb->Mid); | 482 | clc_len, 4 + len, smb->Mid); |
470 | /* Windows XP can return a few bytes too much, presumably | 483 | |
471 | an illegal pad, at the end of byte range lock responses | 484 | if (4 + len < clc_len) { |
472 | so we allow for that three byte pad, as long as actual | 485 | cERROR(1, "RFC1001 size %u smaller than SMB for mid=%u", |
473 | received length is as long or longer than calculated length */ | ||
474 | /* We have now had to extend this more, since there is a | ||
475 | case in which it needs to be bigger still to handle a | ||
476 | malformed response to transact2 findfirst from WinXP when | ||
477 | access denied is returned and thus bcc and wct are zero | ||
478 | but server says length is 0x21 bytes too long as if the server | ||
479 | forget to reset the smb rfc1001 length when it reset the | ||
480 | wct and bcc to minimum size and drop the t2 parms and data */ | ||
481 | if ((4+len > clc_len) && (len <= clc_len + 512)) | ||
482 | return 0; | ||
483 | else { | ||
484 | cERROR(1, "RFC1001 size %d bigger than SMB for Mid=%d", | ||
485 | len, smb->Mid); | 486 | len, smb->Mid); |
486 | return 1; | 487 | return 1; |
488 | } else if (len > clc_len + 512) { | ||
489 | /* | ||
490 | * Some servers (Windows XP in particular) send more | ||
491 | * data than the lengths in the SMB packet would | ||
492 | * indicate on certain calls (byte range locks and | ||
493 | * trans2 find first calls in particular). While the | ||
494 | * client can handle such a frame by ignoring the | ||
495 | * trailing data, we choose limit the amount of extra | ||
496 | * data to 512 bytes. | ||
497 | */ | ||
498 | cERROR(1, "RFC1001 size %u more than 512 bytes larger " | ||
499 | "than SMB for mid=%u", len, smb->Mid); | ||
500 | return 1; | ||
487 | } | 501 | } |
488 | } | 502 | } |
489 | return 0; | 503 | return 0; |
@@ -571,7 +585,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) | |||
571 | pCifsInode = CIFS_I(netfile->dentry->d_inode); | 585 | pCifsInode = CIFS_I(netfile->dentry->d_inode); |
572 | 586 | ||
573 | cifs_set_oplock_level(pCifsInode, | 587 | cifs_set_oplock_level(pCifsInode, |
574 | pSMB->OplockLevel); | 588 | pSMB->OplockLevel ? OPLOCK_READ : 0); |
575 | /* | 589 | /* |
576 | * cifs_oplock_break_put() can't be called | 590 | * cifs_oplock_break_put() can't be called |
577 | * from here. Get reference after queueing | 591 | * from here. Get reference after queueing |
@@ -637,77 +651,6 @@ dump_smb(struct smb_hdr *smb_buf, int smb_buf_length) | |||
637 | return; | 651 | return; |
638 | } | 652 | } |
639 | 653 | ||
640 | /* Convert 16 bit Unicode pathname to wire format from string in current code | ||
641 | page. Conversion may involve remapping up the seven characters that are | ||
642 | only legal in POSIX-like OS (if they are present in the string). Path | ||
643 | names are little endian 16 bit Unicode on the wire */ | ||
644 | int | ||
645 | cifsConvertToUCS(__le16 *target, const char *source, int maxlen, | ||
646 | const struct nls_table *cp, int mapChars) | ||
647 | { | ||
648 | int i, j, charlen; | ||
649 | int len_remaining = maxlen; | ||
650 | char src_char; | ||
651 | __u16 temp; | ||
652 | |||
653 | if (!mapChars) | ||
654 | return cifs_strtoUCS(target, source, PATH_MAX, cp); | ||
655 | |||
656 | for (i = 0, j = 0; i < maxlen; j++) { | ||
657 | src_char = source[i]; | ||
658 | switch (src_char) { | ||
659 | case 0: | ||
660 | target[j] = 0; | ||
661 | goto ctoUCS_out; | ||
662 | case ':': | ||
663 | target[j] = cpu_to_le16(UNI_COLON); | ||
664 | break; | ||
665 | case '*': | ||
666 | target[j] = cpu_to_le16(UNI_ASTERIK); | ||
667 | break; | ||
668 | case '?': | ||
669 | target[j] = cpu_to_le16(UNI_QUESTION); | ||
670 | break; | ||
671 | case '<': | ||
672 | target[j] = cpu_to_le16(UNI_LESSTHAN); | ||
673 | break; | ||
674 | case '>': | ||
675 | target[j] = cpu_to_le16(UNI_GRTRTHAN); | ||
676 | break; | ||
677 | case '|': | ||
678 | target[j] = cpu_to_le16(UNI_PIPE); | ||
679 | break; | ||
680 | /* BB We can not handle remapping slash until | ||
681 | all the calls to build_path_from_dentry | ||
682 | are modified, as they use slash as separator BB */ | ||
683 | /* case '\\': | ||
684 | target[j] = cpu_to_le16(UNI_SLASH); | ||
685 | break;*/ | ||
686 | default: | ||
687 | charlen = cp->char2uni(source+i, | ||
688 | len_remaining, &temp); | ||
689 | /* if no match, use question mark, which | ||
690 | at least in some cases servers as wild card */ | ||
691 | if (charlen < 1) { | ||
692 | target[j] = cpu_to_le16(0x003f); | ||
693 | charlen = 1; | ||
694 | } else | ||
695 | target[j] = cpu_to_le16(temp); | ||
696 | len_remaining -= charlen; | ||
697 | /* character may take more than one byte in the | ||
698 | the source string, but will take exactly two | ||
699 | bytes in the target string */ | ||
700 | i += charlen; | ||
701 | continue; | ||
702 | } | ||
703 | i++; /* move to next char in source string */ | ||
704 | len_remaining--; | ||
705 | } | ||
706 | |||
707 | ctoUCS_out: | ||
708 | return i; | ||
709 | } | ||
710 | |||
711 | void | 654 | void |
712 | cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb) | 655 | cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb) |
713 | { | 656 | { |
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 9aad47a2d62f..8d9189f64477 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c | |||
@@ -899,8 +899,8 @@ map_smb_to_linux_error(struct smb_hdr *smb, int logErr) | |||
899 | } | 899 | } |
900 | /* else ERRHRD class errors or junk - return EIO */ | 900 | /* else ERRHRD class errors or junk - return EIO */ |
901 | 901 | ||
902 | cFYI(1, "Mapping smb error code %d to POSIX err %d", | 902 | cFYI(1, "Mapping smb error code 0x%x to POSIX err %d", |
903 | smberrcode, rc); | 903 | le32_to_cpu(smb->Status.CifsError), rc); |
904 | 904 | ||
905 | /* generic corrective action e.g. reconnect SMB session on | 905 | /* generic corrective action e.g. reconnect SMB session on |
906 | * ERRbaduid could be added */ | 906 | * ERRbaduid could be added */ |
@@ -916,14 +916,14 @@ unsigned int | |||
916 | smbCalcSize(struct smb_hdr *ptr) | 916 | smbCalcSize(struct smb_hdr *ptr) |
917 | { | 917 | { |
918 | return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) + | 918 | return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) + |
919 | 2 /* size of the bcc field */ + BCC(ptr)); | 919 | 2 /* size of the bcc field */ + get_bcc(ptr)); |
920 | } | 920 | } |
921 | 921 | ||
922 | unsigned int | 922 | unsigned int |
923 | smbCalcSize_LE(struct smb_hdr *ptr) | 923 | smbCalcSize_LE(struct smb_hdr *ptr) |
924 | { | 924 | { |
925 | return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) + | 925 | return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) + |
926 | 2 /* size of the bcc field */ + le16_to_cpu(BCC_LE(ptr))); | 926 | 2 /* size of the bcc field */ + get_bcc_le(ptr)); |
927 | } | 927 | } |
928 | 928 | ||
929 | /* The following are taken from fs/ntfs/util.c */ | 929 | /* The following are taken from fs/ntfs/util.c */ |
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 7f25cc3d2256..f8e4cd2a7912 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c | |||
@@ -764,7 +764,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) | |||
764 | { | 764 | { |
765 | int rc = 0; | 765 | int rc = 0; |
766 | int xid, i; | 766 | int xid, i; |
767 | struct cifs_sb_info *cifs_sb; | ||
768 | struct cifsTconInfo *pTcon; | 767 | struct cifsTconInfo *pTcon; |
769 | struct cifsFileInfo *cifsFile = NULL; | 768 | struct cifsFileInfo *cifsFile = NULL; |
770 | char *current_entry; | 769 | char *current_entry; |
@@ -775,8 +774,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) | |||
775 | 774 | ||
776 | xid = GetXid(); | 775 | xid = GetXid(); |
777 | 776 | ||
778 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | ||
779 | |||
780 | /* | 777 | /* |
781 | * Ensure FindFirst doesn't fail before doing filldir() for '.' and | 778 | * Ensure FindFirst doesn't fail before doing filldir() for '.' and |
782 | * '..'. Otherwise we won't be able to notify VFS in case of failure. | 779 | * '..'. Otherwise we won't be able to notify VFS in case of failure. |
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index eb746486e49e..1adc9625a344 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c | |||
@@ -277,7 +277,7 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses, | |||
277 | } | 277 | } |
278 | 278 | ||
279 | static void | 279 | static void |
280 | decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses, | 280 | decode_unicode_ssetup(char **pbcc_area, __u16 bleft, struct cifsSesInfo *ses, |
281 | const struct nls_table *nls_cp) | 281 | const struct nls_table *nls_cp) |
282 | { | 282 | { |
283 | int len; | 283 | int len; |
@@ -323,7 +323,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses, | |||
323 | return; | 323 | return; |
324 | } | 324 | } |
325 | 325 | ||
326 | static int decode_ascii_ssetup(char **pbcc_area, int bleft, | 326 | static int decode_ascii_ssetup(char **pbcc_area, __u16 bleft, |
327 | struct cifsSesInfo *ses, | 327 | struct cifsSesInfo *ses, |
328 | const struct nls_table *nls_cp) | 328 | const struct nls_table *nls_cp) |
329 | { | 329 | { |
@@ -575,12 +575,11 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
575 | char *str_area; | 575 | char *str_area; |
576 | SESSION_SETUP_ANDX *pSMB; | 576 | SESSION_SETUP_ANDX *pSMB; |
577 | __u32 capabilities; | 577 | __u32 capabilities; |
578 | int count; | 578 | __u16 count; |
579 | int resp_buf_type; | 579 | int resp_buf_type; |
580 | struct kvec iov[3]; | 580 | struct kvec iov[3]; |
581 | enum securityEnum type; | 581 | enum securityEnum type; |
582 | __u16 action; | 582 | __u16 action, bytes_remaining; |
583 | int bytes_remaining; | ||
584 | struct key *spnego_key = NULL; | 583 | struct key *spnego_key = NULL; |
585 | __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ | 584 | __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ |
586 | u16 blob_len; | 585 | u16 blob_len; |
@@ -876,10 +875,10 @@ ssetup_ntlmssp_authenticate: | |||
876 | count = iov[1].iov_len + iov[2].iov_len; | 875 | count = iov[1].iov_len + iov[2].iov_len; |
877 | smb_buf->smb_buf_length += count; | 876 | smb_buf->smb_buf_length += count; |
878 | 877 | ||
879 | BCC_LE(smb_buf) = cpu_to_le16(count); | 878 | put_bcc_le(count, smb_buf); |
880 | 879 | ||
881 | rc = SendReceive2(xid, ses, iov, 3 /* num_iovecs */, &resp_buf_type, | 880 | rc = SendReceive2(xid, ses, iov, 3 /* num_iovecs */, &resp_buf_type, |
882 | CIFS_STD_OP /* not long */ | CIFS_LOG_ERROR); | 881 | CIFS_LOG_ERROR); |
883 | /* SMB request buf freed in SendReceive2 */ | 882 | /* SMB request buf freed in SendReceive2 */ |
884 | 883 | ||
885 | pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base; | 884 | pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base; |
@@ -910,7 +909,7 @@ ssetup_ntlmssp_authenticate: | |||
910 | cFYI(1, "UID = %d ", ses->Suid); | 909 | cFYI(1, "UID = %d ", ses->Suid); |
911 | /* response can have either 3 or 4 word count - Samba sends 3 */ | 910 | /* response can have either 3 or 4 word count - Samba sends 3 */ |
912 | /* and lanman response is 3 */ | 911 | /* and lanman response is 3 */ |
913 | bytes_remaining = BCC(smb_buf); | 912 | bytes_remaining = get_bcc(smb_buf); |
914 | bcc_ptr = pByteArea(smb_buf); | 913 | bcc_ptr = pByteArea(smb_buf); |
915 | 914 | ||
916 | if (smb_buf->WordCount == 4) { | 915 | if (smb_buf->WordCount == 4) { |
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c index b6b6dcb500bf..04721485925d 100644 --- a/fs/cifs/smbdes.c +++ b/fs/cifs/smbdes.c | |||
@@ -45,7 +45,6 @@ | |||
45 | up with a different answer to the one above) | 45 | up with a different answer to the one above) |
46 | */ | 46 | */ |
47 | #include <linux/slab.h> | 47 | #include <linux/slab.h> |
48 | #include "cifsencrypt.h" | ||
49 | #define uchar unsigned char | 48 | #define uchar unsigned char |
50 | 49 | ||
51 | static uchar perm1[56] = { 57, 49, 41, 33, 25, 17, 9, | 50 | static uchar perm1[56] = { 57, 49, 41, 33, 25, 17, 9, |
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index 192ea51af20f..b5041c849981 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c | |||
@@ -32,9 +32,8 @@ | |||
32 | #include "cifs_unicode.h" | 32 | #include "cifs_unicode.h" |
33 | #include "cifspdu.h" | 33 | #include "cifspdu.h" |
34 | #include "cifsglob.h" | 34 | #include "cifsglob.h" |
35 | #include "md5.h" | ||
36 | #include "cifs_debug.h" | 35 | #include "cifs_debug.h" |
37 | #include "cifsencrypt.h" | 36 | #include "cifsproto.h" |
38 | 37 | ||
39 | #ifndef false | 38 | #ifndef false |
40 | #define false 0 | 39 | #define false 0 |
@@ -48,14 +47,58 @@ | |||
48 | #define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8) | 47 | #define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8) |
49 | #define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val))) | 48 | #define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val))) |
50 | 49 | ||
51 | /*The following definitions come from libsmb/smbencrypt.c */ | 50 | /* produce a md4 message digest from data of length n bytes */ |
51 | int | ||
52 | mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len) | ||
53 | { | ||
54 | int rc; | ||
55 | unsigned int size; | ||
56 | struct crypto_shash *md4; | ||
57 | struct sdesc *sdescmd4; | ||
58 | |||
59 | md4 = crypto_alloc_shash("md4", 0, 0); | ||
60 | if (IS_ERR(md4)) { | ||
61 | rc = PTR_ERR(md4); | ||
62 | cERROR(1, "%s: Crypto md4 allocation error %d\n", __func__, rc); | ||
63 | return rc; | ||
64 | } | ||
65 | size = sizeof(struct shash_desc) + crypto_shash_descsize(md4); | ||
66 | sdescmd4 = kmalloc(size, GFP_KERNEL); | ||
67 | if (!sdescmd4) { | ||
68 | rc = -ENOMEM; | ||
69 | cERROR(1, "%s: Memory allocation failure\n", __func__); | ||
70 | goto mdfour_err; | ||
71 | } | ||
72 | sdescmd4->shash.tfm = md4; | ||
73 | sdescmd4->shash.flags = 0x0; | ||
74 | |||
75 | rc = crypto_shash_init(&sdescmd4->shash); | ||
76 | if (rc) { | ||
77 | cERROR(1, "%s: Could not init md4 shash\n", __func__); | ||
78 | goto mdfour_err; | ||
79 | } | ||
80 | crypto_shash_update(&sdescmd4->shash, link_str, link_len); | ||
81 | rc = crypto_shash_final(&sdescmd4->shash, md4_hash); | ||
52 | 82 | ||
53 | void SMBencrypt(unsigned char *passwd, const unsigned char *c8, | 83 | mdfour_err: |
54 | unsigned char *p24); | 84 | crypto_free_shash(md4); |
55 | void E_md4hash(const unsigned char *passwd, unsigned char *p16); | 85 | kfree(sdescmd4); |
56 | static void SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8, | 86 | |
57 | unsigned char p24[24]); | 87 | return rc; |
58 | void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24); | 88 | } |
89 | |||
90 | /* Does the des encryption from the NT or LM MD4 hash. */ | ||
91 | static void | ||
92 | SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8, | ||
93 | unsigned char p24[24]) | ||
94 | { | ||
95 | unsigned char p21[21]; | ||
96 | |||
97 | memset(p21, '\0', 21); | ||
98 | |||
99 | memcpy(p21, passwd, 16); | ||
100 | E_P24(p21, c8, p24); | ||
101 | } | ||
59 | 102 | ||
60 | /* | 103 | /* |
61 | This implements the X/Open SMB password encryption | 104 | This implements the X/Open SMB password encryption |
@@ -118,9 +161,10 @@ _my_mbstowcs(__u16 *dst, const unsigned char *src, int len) | |||
118 | * Creates the MD4 Hash of the users password in NT UNICODE. | 161 | * Creates the MD4 Hash of the users password in NT UNICODE. |
119 | */ | 162 | */ |
120 | 163 | ||
121 | void | 164 | int |
122 | E_md4hash(const unsigned char *passwd, unsigned char *p16) | 165 | E_md4hash(const unsigned char *passwd, unsigned char *p16) |
123 | { | 166 | { |
167 | int rc; | ||
124 | int len; | 168 | int len; |
125 | __u16 wpwd[129]; | 169 | __u16 wpwd[129]; |
126 | 170 | ||
@@ -139,8 +183,10 @@ E_md4hash(const unsigned char *passwd, unsigned char *p16) | |||
139 | /* Calculate length in bytes */ | 183 | /* Calculate length in bytes */ |
140 | len = _my_wcslen(wpwd) * sizeof(__u16); | 184 | len = _my_wcslen(wpwd) * sizeof(__u16); |
141 | 185 | ||
142 | mdfour(p16, (unsigned char *) wpwd, len); | 186 | rc = mdfour(p16, (unsigned char *) wpwd, len); |
143 | memset(wpwd, 0, 129 * 2); | 187 | memset(wpwd, 0, 129 * 2); |
188 | |||
189 | return rc; | ||
144 | } | 190 | } |
145 | 191 | ||
146 | #if 0 /* currently unused */ | 192 | #if 0 /* currently unused */ |
@@ -212,19 +258,6 @@ ntv2_owf_gen(const unsigned char owf[16], const char *user_n, | |||
212 | } | 258 | } |
213 | #endif | 259 | #endif |
214 | 260 | ||
215 | /* Does the des encryption from the NT or LM MD4 hash. */ | ||
216 | static void | ||
217 | SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8, | ||
218 | unsigned char p24[24]) | ||
219 | { | ||
220 | unsigned char p21[21]; | ||
221 | |||
222 | memset(p21, '\0', 21); | ||
223 | |||
224 | memcpy(p21, passwd, 16); | ||
225 | E_P24(p21, c8, p24); | ||
226 | } | ||
227 | |||
228 | /* Does the des encryption from the FIRST 8 BYTES of the NT or LM MD4 hash. */ | 261 | /* Does the des encryption from the FIRST 8 BYTES of the NT or LM MD4 hash. */ |
229 | #if 0 /* currently unused */ | 262 | #if 0 /* currently unused */ |
230 | static void | 263 | static void |
@@ -242,16 +275,21 @@ NTLMSSPOWFencrypt(unsigned char passwd[8], | |||
242 | #endif | 275 | #endif |
243 | 276 | ||
244 | /* Does the NT MD4 hash then des encryption. */ | 277 | /* Does the NT MD4 hash then des encryption. */ |
245 | 278 | int | |
246 | void | ||
247 | SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24) | 279 | SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24) |
248 | { | 280 | { |
281 | int rc; | ||
249 | unsigned char p21[21]; | 282 | unsigned char p21[21]; |
250 | 283 | ||
251 | memset(p21, '\0', 21); | 284 | memset(p21, '\0', 21); |
252 | 285 | ||
253 | E_md4hash(passwd, p21); | 286 | rc = E_md4hash(passwd, p21); |
287 | if (rc) { | ||
288 | cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc); | ||
289 | return rc; | ||
290 | } | ||
254 | SMBOWFencrypt(p21, c8, p24); | 291 | SMBOWFencrypt(p21, c8, p24); |
292 | return rc; | ||
255 | } | 293 | } |
256 | 294 | ||
257 | 295 | ||
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 59ca81b16919..46d8756f2b24 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c | |||
@@ -36,7 +36,13 @@ | |||
36 | 36 | ||
37 | extern mempool_t *cifs_mid_poolp; | 37 | extern mempool_t *cifs_mid_poolp; |
38 | 38 | ||
39 | static struct mid_q_entry * | 39 | static void |
40 | wake_up_task(struct mid_q_entry *mid) | ||
41 | { | ||
42 | wake_up_process(mid->callback_data); | ||
43 | } | ||
44 | |||
45 | struct mid_q_entry * | ||
40 | AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) | 46 | AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) |
41 | { | 47 | { |
42 | struct mid_q_entry *temp; | 48 | struct mid_q_entry *temp; |
@@ -58,28 +64,28 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) | |||
58 | /* do_gettimeofday(&temp->when_sent);*/ /* easier to use jiffies */ | 64 | /* do_gettimeofday(&temp->when_sent);*/ /* easier to use jiffies */ |
59 | /* when mid allocated can be before when sent */ | 65 | /* when mid allocated can be before when sent */ |
60 | temp->when_alloc = jiffies; | 66 | temp->when_alloc = jiffies; |
61 | temp->tsk = current; | 67 | |
68 | /* | ||
69 | * The default is for the mid to be synchronous, so the | ||
70 | * default callback just wakes up the current task. | ||
71 | */ | ||
72 | temp->callback = wake_up_task; | ||
73 | temp->callback_data = current; | ||
62 | } | 74 | } |
63 | 75 | ||
64 | spin_lock(&GlobalMid_Lock); | ||
65 | list_add_tail(&temp->qhead, &server->pending_mid_q); | ||
66 | atomic_inc(&midCount); | 76 | atomic_inc(&midCount); |
67 | temp->midState = MID_REQUEST_ALLOCATED; | 77 | temp->midState = MID_REQUEST_ALLOCATED; |
68 | spin_unlock(&GlobalMid_Lock); | ||
69 | return temp; | 78 | return temp; |
70 | } | 79 | } |
71 | 80 | ||
72 | static void | 81 | void |
73 | DeleteMidQEntry(struct mid_q_entry *midEntry) | 82 | DeleteMidQEntry(struct mid_q_entry *midEntry) |
74 | { | 83 | { |
75 | #ifdef CONFIG_CIFS_STATS2 | 84 | #ifdef CONFIG_CIFS_STATS2 |
76 | unsigned long now; | 85 | unsigned long now; |
77 | #endif | 86 | #endif |
78 | spin_lock(&GlobalMid_Lock); | ||
79 | midEntry->midState = MID_FREE; | 87 | midEntry->midState = MID_FREE; |
80 | list_del(&midEntry->qhead); | ||
81 | atomic_dec(&midCount); | 88 | atomic_dec(&midCount); |
82 | spin_unlock(&GlobalMid_Lock); | ||
83 | if (midEntry->largeBuf) | 89 | if (midEntry->largeBuf) |
84 | cifs_buf_release(midEntry->resp_buf); | 90 | cifs_buf_release(midEntry->resp_buf); |
85 | else | 91 | else |
@@ -103,6 +109,16 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) | |||
103 | mempool_free(midEntry, cifs_mid_poolp); | 109 | mempool_free(midEntry, cifs_mid_poolp); |
104 | } | 110 | } |
105 | 111 | ||
112 | static void | ||
113 | delete_mid(struct mid_q_entry *mid) | ||
114 | { | ||
115 | spin_lock(&GlobalMid_Lock); | ||
116 | list_del(&mid->qhead); | ||
117 | spin_unlock(&GlobalMid_Lock); | ||
118 | |||
119 | DeleteMidQEntry(mid); | ||
120 | } | ||
121 | |||
106 | static int | 122 | static int |
107 | smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) | 123 | smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) |
108 | { | 124 | { |
@@ -220,9 +236,9 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) | |||
220 | server->tcpStatus = CifsNeedReconnect; | 236 | server->tcpStatus = CifsNeedReconnect; |
221 | } | 237 | } |
222 | 238 | ||
223 | if (rc < 0) { | 239 | if (rc < 0 && rc != -EINTR) |
224 | cERROR(1, "Error %d sending data on socket to server", rc); | 240 | cERROR(1, "Error %d sending data on socket to server", rc); |
225 | } else | 241 | else |
226 | rc = 0; | 242 | rc = 0; |
227 | 243 | ||
228 | /* Don't want to modify the buffer as a | 244 | /* Don't want to modify the buffer as a |
@@ -244,31 +260,31 @@ smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer, | |||
244 | return smb_sendv(server, &iov, 1); | 260 | return smb_sendv(server, &iov, 1); |
245 | } | 261 | } |
246 | 262 | ||
247 | static int wait_for_free_request(struct cifsSesInfo *ses, const int long_op) | 263 | static int wait_for_free_request(struct TCP_Server_Info *server, |
264 | const int long_op) | ||
248 | { | 265 | { |
249 | if (long_op == CIFS_ASYNC_OP) { | 266 | if (long_op == CIFS_ASYNC_OP) { |
250 | /* oplock breaks must not be held up */ | 267 | /* oplock breaks must not be held up */ |
251 | atomic_inc(&ses->server->inFlight); | 268 | atomic_inc(&server->inFlight); |
252 | return 0; | 269 | return 0; |
253 | } | 270 | } |
254 | 271 | ||
255 | spin_lock(&GlobalMid_Lock); | 272 | spin_lock(&GlobalMid_Lock); |
256 | while (1) { | 273 | while (1) { |
257 | if (atomic_read(&ses->server->inFlight) >= | 274 | if (atomic_read(&server->inFlight) >= cifs_max_pending) { |
258 | cifs_max_pending){ | ||
259 | spin_unlock(&GlobalMid_Lock); | 275 | spin_unlock(&GlobalMid_Lock); |
260 | #ifdef CONFIG_CIFS_STATS2 | 276 | #ifdef CONFIG_CIFS_STATS2 |
261 | atomic_inc(&ses->server->num_waiters); | 277 | atomic_inc(&server->num_waiters); |
262 | #endif | 278 | #endif |
263 | wait_event(ses->server->request_q, | 279 | wait_event(server->request_q, |
264 | atomic_read(&ses->server->inFlight) | 280 | atomic_read(&server->inFlight) |
265 | < cifs_max_pending); | 281 | < cifs_max_pending); |
266 | #ifdef CONFIG_CIFS_STATS2 | 282 | #ifdef CONFIG_CIFS_STATS2 |
267 | atomic_dec(&ses->server->num_waiters); | 283 | atomic_dec(&server->num_waiters); |
268 | #endif | 284 | #endif |
269 | spin_lock(&GlobalMid_Lock); | 285 | spin_lock(&GlobalMid_Lock); |
270 | } else { | 286 | } else { |
271 | if (ses->server->tcpStatus == CifsExiting) { | 287 | if (server->tcpStatus == CifsExiting) { |
272 | spin_unlock(&GlobalMid_Lock); | 288 | spin_unlock(&GlobalMid_Lock); |
273 | return -ENOENT; | 289 | return -ENOENT; |
274 | } | 290 | } |
@@ -278,7 +294,7 @@ static int wait_for_free_request(struct cifsSesInfo *ses, const int long_op) | |||
278 | 294 | ||
279 | /* update # of requests on the wire to server */ | 295 | /* update # of requests on the wire to server */ |
280 | if (long_op != CIFS_BLOCKING_OP) | 296 | if (long_op != CIFS_BLOCKING_OP) |
281 | atomic_inc(&ses->server->inFlight); | 297 | atomic_inc(&server->inFlight); |
282 | spin_unlock(&GlobalMid_Lock); | 298 | spin_unlock(&GlobalMid_Lock); |
283 | break; | 299 | break; |
284 | } | 300 | } |
@@ -308,53 +324,85 @@ static int allocate_mid(struct cifsSesInfo *ses, struct smb_hdr *in_buf, | |||
308 | *ppmidQ = AllocMidQEntry(in_buf, ses->server); | 324 | *ppmidQ = AllocMidQEntry(in_buf, ses->server); |
309 | if (*ppmidQ == NULL) | 325 | if (*ppmidQ == NULL) |
310 | return -ENOMEM; | 326 | return -ENOMEM; |
327 | spin_lock(&GlobalMid_Lock); | ||
328 | list_add_tail(&(*ppmidQ)->qhead, &ses->server->pending_mid_q); | ||
329 | spin_unlock(&GlobalMid_Lock); | ||
311 | return 0; | 330 | return 0; |
312 | } | 331 | } |
313 | 332 | ||
314 | static int wait_for_response(struct cifsSesInfo *ses, | 333 | static int |
315 | struct mid_q_entry *midQ, | 334 | wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ) |
316 | unsigned long timeout, | ||
317 | unsigned long time_to_wait) | ||
318 | { | 335 | { |
319 | unsigned long curr_timeout; | 336 | int error; |
320 | 337 | ||
321 | for (;;) { | 338 | error = wait_event_killable(server->response_q, |
322 | curr_timeout = timeout + jiffies; | 339 | midQ->midState != MID_REQUEST_SUBMITTED); |
323 | wait_event_timeout(ses->server->response_q, | 340 | if (error < 0) |
324 | midQ->midState != MID_REQUEST_SUBMITTED, timeout); | 341 | return -ERESTARTSYS; |
325 | 342 | ||
326 | if (time_after(jiffies, curr_timeout) && | 343 | return 0; |
327 | (midQ->midState == MID_REQUEST_SUBMITTED) && | 344 | } |
328 | ((ses->server->tcpStatus == CifsGood) || | ||
329 | (ses->server->tcpStatus == CifsNew))) { | ||
330 | 345 | ||
331 | unsigned long lrt; | ||
332 | 346 | ||
333 | /* We timed out. Is the server still | 347 | /* |
334 | sending replies ? */ | 348 | * Send a SMB request and set the callback function in the mid to handle |
335 | spin_lock(&GlobalMid_Lock); | 349 | * the result. Caller is responsible for dealing with timeouts. |
336 | lrt = ses->server->lstrp; | 350 | */ |
337 | spin_unlock(&GlobalMid_Lock); | 351 | int |
352 | cifs_call_async(struct TCP_Server_Info *server, struct smb_hdr *in_buf, | ||
353 | mid_callback_t *callback, void *cbdata) | ||
354 | { | ||
355 | int rc; | ||
356 | struct mid_q_entry *mid; | ||
338 | 357 | ||
339 | /* Calculate time_to_wait past last receive time. | 358 | rc = wait_for_free_request(server, CIFS_ASYNC_OP); |
340 | Although we prefer not to time out if the | 359 | if (rc) |
341 | server is still responding - we will time | 360 | return rc; |
342 | out if the server takes more than 15 (or 45 | 361 | |
343 | or 180) seconds to respond to this request | 362 | /* enable signing if server requires it */ |
344 | and has not responded to any request from | 363 | if (server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) |
345 | other threads on the client within 10 seconds */ | 364 | in_buf->Flags2 |= SMBFLG2_SECURITY_SIGNATURE; |
346 | lrt += time_to_wait; | 365 | |
347 | if (time_after(jiffies, lrt)) { | 366 | mutex_lock(&server->srv_mutex); |
348 | /* No replies for time_to_wait. */ | 367 | mid = AllocMidQEntry(in_buf, server); |
349 | cERROR(1, "server not responding"); | 368 | if (mid == NULL) { |
350 | return -1; | 369 | mutex_unlock(&server->srv_mutex); |
351 | } | 370 | return -ENOMEM; |
352 | } else { | ||
353 | return 0; | ||
354 | } | ||
355 | } | 371 | } |
356 | } | ||
357 | 372 | ||
373 | /* put it on the pending_mid_q */ | ||
374 | spin_lock(&GlobalMid_Lock); | ||
375 | list_add_tail(&mid->qhead, &server->pending_mid_q); | ||
376 | spin_unlock(&GlobalMid_Lock); | ||
377 | |||
378 | rc = cifs_sign_smb(in_buf, server, &mid->sequence_number); | ||
379 | if (rc) { | ||
380 | mutex_unlock(&server->srv_mutex); | ||
381 | goto out_err; | ||
382 | } | ||
383 | |||
384 | mid->callback = callback; | ||
385 | mid->callback_data = cbdata; | ||
386 | mid->midState = MID_REQUEST_SUBMITTED; | ||
387 | #ifdef CONFIG_CIFS_STATS2 | ||
388 | atomic_inc(&server->inSend); | ||
389 | #endif | ||
390 | rc = smb_send(server, in_buf, in_buf->smb_buf_length); | ||
391 | #ifdef CONFIG_CIFS_STATS2 | ||
392 | atomic_dec(&server->inSend); | ||
393 | mid->when_sent = jiffies; | ||
394 | #endif | ||
395 | mutex_unlock(&server->srv_mutex); | ||
396 | if (rc) | ||
397 | goto out_err; | ||
398 | |||
399 | return rc; | ||
400 | out_err: | ||
401 | delete_mid(mid); | ||
402 | atomic_dec(&server->inFlight); | ||
403 | wake_up(&server->request_q); | ||
404 | return rc; | ||
405 | } | ||
358 | 406 | ||
359 | /* | 407 | /* |
360 | * | 408 | * |
@@ -382,6 +430,84 @@ SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses, | |||
382 | return rc; | 430 | return rc; |
383 | } | 431 | } |
384 | 432 | ||
433 | static int | ||
434 | sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) | ||
435 | { | ||
436 | int rc = 0; | ||
437 | |||
438 | cFYI(1, "%s: cmd=%d mid=%d state=%d", __func__, mid->command, | ||
439 | mid->mid, mid->midState); | ||
440 | |||
441 | spin_lock(&GlobalMid_Lock); | ||
442 | /* ensure that it's no longer on the pending_mid_q */ | ||
443 | list_del_init(&mid->qhead); | ||
444 | |||
445 | switch (mid->midState) { | ||
446 | case MID_RESPONSE_RECEIVED: | ||
447 | spin_unlock(&GlobalMid_Lock); | ||
448 | return rc; | ||
449 | case MID_REQUEST_SUBMITTED: | ||
450 | /* socket is going down, reject all calls */ | ||
451 | if (server->tcpStatus == CifsExiting) { | ||
452 | cERROR(1, "%s: canceling mid=%d cmd=0x%x state=%d", | ||
453 | __func__, mid->mid, mid->command, mid->midState); | ||
454 | rc = -EHOSTDOWN; | ||
455 | break; | ||
456 | } | ||
457 | case MID_RETRY_NEEDED: | ||
458 | rc = -EAGAIN; | ||
459 | break; | ||
460 | case MID_RESPONSE_MALFORMED: | ||
461 | rc = -EIO; | ||
462 | break; | ||
463 | default: | ||
464 | cERROR(1, "%s: invalid mid state mid=%d state=%d", __func__, | ||
465 | mid->mid, mid->midState); | ||
466 | rc = -EIO; | ||
467 | } | ||
468 | spin_unlock(&GlobalMid_Lock); | ||
469 | |||
470 | DeleteMidQEntry(mid); | ||
471 | return rc; | ||
472 | } | ||
473 | |||
474 | /* | ||
475 | * An NT cancel request header looks just like the original request except: | ||
476 | * | ||
477 | * The Command is SMB_COM_NT_CANCEL | ||
478 | * The WordCount is zeroed out | ||
479 | * The ByteCount is zeroed out | ||
480 | * | ||
481 | * This function mangles an existing request buffer into a | ||
482 | * SMB_COM_NT_CANCEL request and then sends it. | ||
483 | */ | ||
484 | static int | ||
485 | send_nt_cancel(struct TCP_Server_Info *server, struct smb_hdr *in_buf, | ||
486 | struct mid_q_entry *mid) | ||
487 | { | ||
488 | int rc = 0; | ||
489 | |||
490 | /* -4 for RFC1001 length and +2 for BCC field */ | ||
491 | in_buf->smb_buf_length = sizeof(struct smb_hdr) - 4 + 2; | ||
492 | in_buf->Command = SMB_COM_NT_CANCEL; | ||
493 | in_buf->WordCount = 0; | ||
494 | put_bcc_le(0, in_buf); | ||
495 | |||
496 | mutex_lock(&server->srv_mutex); | ||
497 | rc = cifs_sign_smb(in_buf, server, &mid->sequence_number); | ||
498 | if (rc) { | ||
499 | mutex_unlock(&server->srv_mutex); | ||
500 | return rc; | ||
501 | } | ||
502 | rc = smb_send(server, in_buf, in_buf->smb_buf_length); | ||
503 | mutex_unlock(&server->srv_mutex); | ||
504 | |||
505 | cFYI(1, "issued NT_CANCEL for mid %u, rc = %d", | ||
506 | in_buf->Mid, rc); | ||
507 | |||
508 | return rc; | ||
509 | } | ||
510 | |||
385 | int | 511 | int |
386 | SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, | 512 | SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, |
387 | struct kvec *iov, int n_vec, int *pRespBufType /* ret */, | 513 | struct kvec *iov, int n_vec, int *pRespBufType /* ret */, |
@@ -390,7 +516,6 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, | |||
390 | int rc = 0; | 516 | int rc = 0; |
391 | int long_op; | 517 | int long_op; |
392 | unsigned int receive_len; | 518 | unsigned int receive_len; |
393 | unsigned long timeout; | ||
394 | struct mid_q_entry *midQ; | 519 | struct mid_q_entry *midQ; |
395 | struct smb_hdr *in_buf = iov[0].iov_base; | 520 | struct smb_hdr *in_buf = iov[0].iov_base; |
396 | 521 | ||
@@ -413,7 +538,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, | |||
413 | to the same server. We may make this configurable later or | 538 | to the same server. We may make this configurable later or |
414 | use ses->maxReq */ | 539 | use ses->maxReq */ |
415 | 540 | ||
416 | rc = wait_for_free_request(ses, long_op); | 541 | rc = wait_for_free_request(ses->server, long_op); |
417 | if (rc) { | 542 | if (rc) { |
418 | cifs_small_buf_release(in_buf); | 543 | cifs_small_buf_release(in_buf); |
419 | return rc; | 544 | return rc; |
@@ -452,70 +577,41 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, | |||
452 | #endif | 577 | #endif |
453 | 578 | ||
454 | mutex_unlock(&ses->server->srv_mutex); | 579 | mutex_unlock(&ses->server->srv_mutex); |
455 | cifs_small_buf_release(in_buf); | ||
456 | 580 | ||
457 | if (rc < 0) | 581 | if (rc < 0) { |
458 | goto out; | 582 | cifs_small_buf_release(in_buf); |
459 | |||
460 | if (long_op == CIFS_STD_OP) | ||
461 | timeout = 15 * HZ; | ||
462 | else if (long_op == CIFS_VLONG_OP) /* e.g. slow writes past EOF */ | ||
463 | timeout = 180 * HZ; | ||
464 | else if (long_op == CIFS_LONG_OP) | ||
465 | timeout = 45 * HZ; /* should be greater than | ||
466 | servers oplock break timeout (about 43 seconds) */ | ||
467 | else if (long_op == CIFS_ASYNC_OP) | ||
468 | goto out; | ||
469 | else if (long_op == CIFS_BLOCKING_OP) | ||
470 | timeout = 0x7FFFFFFF; /* large, but not so large as to wrap */ | ||
471 | else { | ||
472 | cERROR(1, "unknown timeout flag %d", long_op); | ||
473 | rc = -EIO; | ||
474 | goto out; | 583 | goto out; |
475 | } | 584 | } |
476 | 585 | ||
477 | /* wait for 15 seconds or until woken up due to response arriving or | 586 | if (long_op == CIFS_ASYNC_OP) { |
478 | due to last connection to this server being unmounted */ | 587 | cifs_small_buf_release(in_buf); |
479 | if (signal_pending(current)) { | 588 | goto out; |
480 | /* if signal pending do not hold up user for full smb timeout | ||
481 | but we still give response a chance to complete */ | ||
482 | timeout = 2 * HZ; | ||
483 | } | 589 | } |
484 | 590 | ||
485 | /* No user interrupts in wait - wreaks havoc with performance */ | 591 | rc = wait_for_response(ses->server, midQ); |
486 | wait_for_response(ses, midQ, timeout, 10 * HZ); | 592 | if (rc != 0) { |
487 | 593 | send_nt_cancel(ses->server, in_buf, midQ); | |
488 | spin_lock(&GlobalMid_Lock); | 594 | spin_lock(&GlobalMid_Lock); |
489 | |||
490 | if (midQ->resp_buf == NULL) { | ||
491 | cERROR(1, "No response to cmd %d mid %d", | ||
492 | midQ->command, midQ->mid); | ||
493 | if (midQ->midState == MID_REQUEST_SUBMITTED) { | 595 | if (midQ->midState == MID_REQUEST_SUBMITTED) { |
494 | if (ses->server->tcpStatus == CifsExiting) | 596 | midQ->callback = DeleteMidQEntry; |
495 | rc = -EHOSTDOWN; | 597 | spin_unlock(&GlobalMid_Lock); |
496 | else { | 598 | cifs_small_buf_release(in_buf); |
497 | ses->server->tcpStatus = CifsNeedReconnect; | 599 | atomic_dec(&ses->server->inFlight); |
498 | midQ->midState = MID_RETRY_NEEDED; | 600 | wake_up(&ses->server->request_q); |
499 | } | 601 | return rc; |
500 | } | ||
501 | |||
502 | if (rc != -EHOSTDOWN) { | ||
503 | if (midQ->midState == MID_RETRY_NEEDED) { | ||
504 | rc = -EAGAIN; | ||
505 | cFYI(1, "marking request for retry"); | ||
506 | } else { | ||
507 | rc = -EIO; | ||
508 | } | ||
509 | } | 602 | } |
510 | spin_unlock(&GlobalMid_Lock); | 603 | spin_unlock(&GlobalMid_Lock); |
511 | DeleteMidQEntry(midQ); | 604 | } |
512 | /* Update # of requests on wire to server */ | 605 | |
606 | cifs_small_buf_release(in_buf); | ||
607 | |||
608 | rc = sync_mid_result(midQ, ses->server); | ||
609 | if (rc != 0) { | ||
513 | atomic_dec(&ses->server->inFlight); | 610 | atomic_dec(&ses->server->inFlight); |
514 | wake_up(&ses->server->request_q); | 611 | wake_up(&ses->server->request_q); |
515 | return rc; | 612 | return rc; |
516 | } | 613 | } |
517 | 614 | ||
518 | spin_unlock(&GlobalMid_Lock); | ||
519 | receive_len = midQ->resp_buf->smb_buf_length; | 615 | receive_len = midQ->resp_buf->smb_buf_length; |
520 | 616 | ||
521 | if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { | 617 | if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { |
@@ -559,19 +655,18 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, | |||
559 | if (receive_len >= sizeof(struct smb_hdr) - 4 | 655 | if (receive_len >= sizeof(struct smb_hdr) - 4 |
560 | /* do not count RFC1001 header */ + | 656 | /* do not count RFC1001 header */ + |
561 | (2 * midQ->resp_buf->WordCount) + 2 /* bcc */ ) | 657 | (2 * midQ->resp_buf->WordCount) + 2 /* bcc */ ) |
562 | BCC(midQ->resp_buf) = | 658 | put_bcc(get_bcc_le(midQ->resp_buf), midQ->resp_buf); |
563 | le16_to_cpu(BCC_LE(midQ->resp_buf)); | ||
564 | if ((flags & CIFS_NO_RESP) == 0) | 659 | if ((flags & CIFS_NO_RESP) == 0) |
565 | midQ->resp_buf = NULL; /* mark it so buf will | 660 | midQ->resp_buf = NULL; /* mark it so buf will |
566 | not be freed by | 661 | not be freed by |
567 | DeleteMidQEntry */ | 662 | delete_mid */ |
568 | } else { | 663 | } else { |
569 | rc = -EIO; | 664 | rc = -EIO; |
570 | cFYI(1, "Bad MID state?"); | 665 | cFYI(1, "Bad MID state?"); |
571 | } | 666 | } |
572 | 667 | ||
573 | out: | 668 | out: |
574 | DeleteMidQEntry(midQ); | 669 | delete_mid(midQ); |
575 | atomic_dec(&ses->server->inFlight); | 670 | atomic_dec(&ses->server->inFlight); |
576 | wake_up(&ses->server->request_q); | 671 | wake_up(&ses->server->request_q); |
577 | 672 | ||
@@ -585,7 +680,6 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, | |||
585 | { | 680 | { |
586 | int rc = 0; | 681 | int rc = 0; |
587 | unsigned int receive_len; | 682 | unsigned int receive_len; |
588 | unsigned long timeout; | ||
589 | struct mid_q_entry *midQ; | 683 | struct mid_q_entry *midQ; |
590 | 684 | ||
591 | if (ses == NULL) { | 685 | if (ses == NULL) { |
@@ -610,7 +704,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, | |||
610 | return -EIO; | 704 | return -EIO; |
611 | } | 705 | } |
612 | 706 | ||
613 | rc = wait_for_free_request(ses, long_op); | 707 | rc = wait_for_free_request(ses->server, long_op); |
614 | if (rc) | 708 | if (rc) |
615 | return rc; | 709 | return rc; |
616 | 710 | ||
@@ -649,64 +743,31 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, | |||
649 | if (rc < 0) | 743 | if (rc < 0) |
650 | goto out; | 744 | goto out; |
651 | 745 | ||
652 | if (long_op == CIFS_STD_OP) | 746 | if (long_op == CIFS_ASYNC_OP) |
653 | timeout = 15 * HZ; | ||
654 | /* wait for 15 seconds or until woken up due to response arriving or | ||
655 | due to last connection to this server being unmounted */ | ||
656 | else if (long_op == CIFS_ASYNC_OP) | ||
657 | goto out; | ||
658 | else if (long_op == CIFS_VLONG_OP) /* writes past EOF can be slow */ | ||
659 | timeout = 180 * HZ; | ||
660 | else if (long_op == CIFS_LONG_OP) | ||
661 | timeout = 45 * HZ; /* should be greater than | ||
662 | servers oplock break timeout (about 43 seconds) */ | ||
663 | else if (long_op == CIFS_BLOCKING_OP) | ||
664 | timeout = 0x7FFFFFFF; /* large but no so large as to wrap */ | ||
665 | else { | ||
666 | cERROR(1, "unknown timeout flag %d", long_op); | ||
667 | rc = -EIO; | ||
668 | goto out; | 747 | goto out; |
669 | } | ||
670 | |||
671 | if (signal_pending(current)) { | ||
672 | /* if signal pending do not hold up user for full smb timeout | ||
673 | but we still give response a chance to complete */ | ||
674 | timeout = 2 * HZ; | ||
675 | } | ||
676 | |||
677 | /* No user interrupts in wait - wreaks havoc with performance */ | ||
678 | wait_for_response(ses, midQ, timeout, 10 * HZ); | ||
679 | 748 | ||
680 | spin_lock(&GlobalMid_Lock); | 749 | rc = wait_for_response(ses->server, midQ); |
681 | if (midQ->resp_buf == NULL) { | 750 | if (rc != 0) { |
682 | cERROR(1, "No response for cmd %d mid %d", | 751 | send_nt_cancel(ses->server, in_buf, midQ); |
683 | midQ->command, midQ->mid); | 752 | spin_lock(&GlobalMid_Lock); |
684 | if (midQ->midState == MID_REQUEST_SUBMITTED) { | 753 | if (midQ->midState == MID_REQUEST_SUBMITTED) { |
685 | if (ses->server->tcpStatus == CifsExiting) | 754 | /* no longer considered to be "in-flight" */ |
686 | rc = -EHOSTDOWN; | 755 | midQ->callback = DeleteMidQEntry; |
687 | else { | 756 | spin_unlock(&GlobalMid_Lock); |
688 | ses->server->tcpStatus = CifsNeedReconnect; | 757 | atomic_dec(&ses->server->inFlight); |
689 | midQ->midState = MID_RETRY_NEEDED; | 758 | wake_up(&ses->server->request_q); |
690 | } | 759 | return rc; |
691 | } | ||
692 | |||
693 | if (rc != -EHOSTDOWN) { | ||
694 | if (midQ->midState == MID_RETRY_NEEDED) { | ||
695 | rc = -EAGAIN; | ||
696 | cFYI(1, "marking request for retry"); | ||
697 | } else { | ||
698 | rc = -EIO; | ||
699 | } | ||
700 | } | 760 | } |
701 | spin_unlock(&GlobalMid_Lock); | 761 | spin_unlock(&GlobalMid_Lock); |
702 | DeleteMidQEntry(midQ); | 762 | } |
703 | /* Update # of requests on wire to server */ | 763 | |
764 | rc = sync_mid_result(midQ, ses->server); | ||
765 | if (rc != 0) { | ||
704 | atomic_dec(&ses->server->inFlight); | 766 | atomic_dec(&ses->server->inFlight); |
705 | wake_up(&ses->server->request_q); | 767 | wake_up(&ses->server->request_q); |
706 | return rc; | 768 | return rc; |
707 | } | 769 | } |
708 | 770 | ||
709 | spin_unlock(&GlobalMid_Lock); | ||
710 | receive_len = midQ->resp_buf->smb_buf_length; | 771 | receive_len = midQ->resp_buf->smb_buf_length; |
711 | 772 | ||
712 | if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { | 773 | if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { |
@@ -748,43 +809,20 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, | |||
748 | if (receive_len >= sizeof(struct smb_hdr) - 4 | 809 | if (receive_len >= sizeof(struct smb_hdr) - 4 |
749 | /* do not count RFC1001 header */ + | 810 | /* do not count RFC1001 header */ + |
750 | (2 * out_buf->WordCount) + 2 /* bcc */ ) | 811 | (2 * out_buf->WordCount) + 2 /* bcc */ ) |
751 | BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf)); | 812 | put_bcc(get_bcc_le(midQ->resp_buf), midQ->resp_buf); |
752 | } else { | 813 | } else { |
753 | rc = -EIO; | 814 | rc = -EIO; |
754 | cERROR(1, "Bad MID state?"); | 815 | cERROR(1, "Bad MID state?"); |
755 | } | 816 | } |
756 | 817 | ||
757 | out: | 818 | out: |
758 | DeleteMidQEntry(midQ); | 819 | delete_mid(midQ); |
759 | atomic_dec(&ses->server->inFlight); | 820 | atomic_dec(&ses->server->inFlight); |
760 | wake_up(&ses->server->request_q); | 821 | wake_up(&ses->server->request_q); |
761 | 822 | ||
762 | return rc; | 823 | return rc; |
763 | } | 824 | } |
764 | 825 | ||
765 | /* Send an NT_CANCEL SMB to cause the POSIX blocking lock to return. */ | ||
766 | |||
767 | static int | ||
768 | send_nt_cancel(struct cifsTconInfo *tcon, struct smb_hdr *in_buf, | ||
769 | struct mid_q_entry *midQ) | ||
770 | { | ||
771 | int rc = 0; | ||
772 | struct cifsSesInfo *ses = tcon->ses; | ||
773 | __u16 mid = in_buf->Mid; | ||
774 | |||
775 | header_assemble(in_buf, SMB_COM_NT_CANCEL, tcon, 0); | ||
776 | in_buf->Mid = mid; | ||
777 | mutex_lock(&ses->server->srv_mutex); | ||
778 | rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number); | ||
779 | if (rc) { | ||
780 | mutex_unlock(&ses->server->srv_mutex); | ||
781 | return rc; | ||
782 | } | ||
783 | rc = smb_send(ses->server, in_buf, in_buf->smb_buf_length); | ||
784 | mutex_unlock(&ses->server->srv_mutex); | ||
785 | return rc; | ||
786 | } | ||
787 | |||
788 | /* We send a LOCKINGX_CANCEL_LOCK to cause the Windows | 826 | /* We send a LOCKINGX_CANCEL_LOCK to cause the Windows |
789 | blocking lock to return. */ | 827 | blocking lock to return. */ |
790 | 828 | ||
@@ -807,7 +845,7 @@ send_lock_cancel(const unsigned int xid, struct cifsTconInfo *tcon, | |||
807 | pSMB->hdr.Mid = GetNextMid(ses->server); | 845 | pSMB->hdr.Mid = GetNextMid(ses->server); |
808 | 846 | ||
809 | return SendReceive(xid, ses, in_buf, out_buf, | 847 | return SendReceive(xid, ses, in_buf, out_buf, |
810 | &bytes_returned, CIFS_STD_OP); | 848 | &bytes_returned, 0); |
811 | } | 849 | } |
812 | 850 | ||
813 | int | 851 | int |
@@ -845,7 +883,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, | |||
845 | return -EIO; | 883 | return -EIO; |
846 | } | 884 | } |
847 | 885 | ||
848 | rc = wait_for_free_request(ses, CIFS_BLOCKING_OP); | 886 | rc = wait_for_free_request(ses->server, CIFS_BLOCKING_OP); |
849 | if (rc) | 887 | if (rc) |
850 | return rc; | 888 | return rc; |
851 | 889 | ||
@@ -863,7 +901,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, | |||
863 | 901 | ||
864 | rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number); | 902 | rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number); |
865 | if (rc) { | 903 | if (rc) { |
866 | DeleteMidQEntry(midQ); | 904 | delete_mid(midQ); |
867 | mutex_unlock(&ses->server->srv_mutex); | 905 | mutex_unlock(&ses->server->srv_mutex); |
868 | return rc; | 906 | return rc; |
869 | } | 907 | } |
@@ -880,7 +918,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, | |||
880 | mutex_unlock(&ses->server->srv_mutex); | 918 | mutex_unlock(&ses->server->srv_mutex); |
881 | 919 | ||
882 | if (rc < 0) { | 920 | if (rc < 0) { |
883 | DeleteMidQEntry(midQ); | 921 | delete_mid(midQ); |
884 | return rc; | 922 | return rc; |
885 | } | 923 | } |
886 | 924 | ||
@@ -899,10 +937,9 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, | |||
899 | if (in_buf->Command == SMB_COM_TRANSACTION2) { | 937 | if (in_buf->Command == SMB_COM_TRANSACTION2) { |
900 | /* POSIX lock. We send a NT_CANCEL SMB to cause the | 938 | /* POSIX lock. We send a NT_CANCEL SMB to cause the |
901 | blocking lock to return. */ | 939 | blocking lock to return. */ |
902 | 940 | rc = send_nt_cancel(ses->server, in_buf, midQ); | |
903 | rc = send_nt_cancel(tcon, in_buf, midQ); | ||
904 | if (rc) { | 941 | if (rc) { |
905 | DeleteMidQEntry(midQ); | 942 | delete_mid(midQ); |
906 | return rc; | 943 | return rc; |
907 | } | 944 | } |
908 | } else { | 945 | } else { |
@@ -914,47 +951,33 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, | |||
914 | /* If we get -ENOLCK back the lock may have | 951 | /* If we get -ENOLCK back the lock may have |
915 | already been removed. Don't exit in this case. */ | 952 | already been removed. Don't exit in this case. */ |
916 | if (rc && rc != -ENOLCK) { | 953 | if (rc && rc != -ENOLCK) { |
917 | DeleteMidQEntry(midQ); | 954 | delete_mid(midQ); |
918 | return rc; | 955 | return rc; |
919 | } | 956 | } |
920 | } | 957 | } |
921 | 958 | ||
922 | /* Wait 5 seconds for the response. */ | 959 | rc = wait_for_response(ses->server, midQ); |
923 | if (wait_for_response(ses, midQ, 5 * HZ, 5 * HZ) == 0) { | 960 | if (rc) { |
924 | /* We got the response - restart system call. */ | 961 | send_nt_cancel(ses->server, in_buf, midQ); |
925 | rstart = 1; | 962 | spin_lock(&GlobalMid_Lock); |
926 | } | 963 | if (midQ->midState == MID_REQUEST_SUBMITTED) { |
927 | } | 964 | /* no longer considered to be "in-flight" */ |
928 | 965 | midQ->callback = DeleteMidQEntry; | |
929 | spin_lock(&GlobalMid_Lock); | 966 | spin_unlock(&GlobalMid_Lock); |
930 | if (midQ->resp_buf) { | 967 | return rc; |
931 | spin_unlock(&GlobalMid_Lock); | ||
932 | receive_len = midQ->resp_buf->smb_buf_length; | ||
933 | } else { | ||
934 | cERROR(1, "No response for cmd %d mid %d", | ||
935 | midQ->command, midQ->mid); | ||
936 | if (midQ->midState == MID_REQUEST_SUBMITTED) { | ||
937 | if (ses->server->tcpStatus == CifsExiting) | ||
938 | rc = -EHOSTDOWN; | ||
939 | else { | ||
940 | ses->server->tcpStatus = CifsNeedReconnect; | ||
941 | midQ->midState = MID_RETRY_NEEDED; | ||
942 | } | 968 | } |
969 | spin_unlock(&GlobalMid_Lock); | ||
943 | } | 970 | } |
944 | 971 | ||
945 | if (rc != -EHOSTDOWN) { | 972 | /* We got the response - restart system call. */ |
946 | if (midQ->midState == MID_RETRY_NEEDED) { | 973 | rstart = 1; |
947 | rc = -EAGAIN; | ||
948 | cFYI(1, "marking request for retry"); | ||
949 | } else { | ||
950 | rc = -EIO; | ||
951 | } | ||
952 | } | ||
953 | spin_unlock(&GlobalMid_Lock); | ||
954 | DeleteMidQEntry(midQ); | ||
955 | return rc; | ||
956 | } | 974 | } |
957 | 975 | ||
976 | rc = sync_mid_result(midQ, ses->server); | ||
977 | if (rc != 0) | ||
978 | return rc; | ||
979 | |||
980 | receive_len = midQ->resp_buf->smb_buf_length; | ||
958 | if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { | 981 | if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { |
959 | cERROR(1, "Frame too large received. Length: %d Xid: %d", | 982 | cERROR(1, "Frame too large received. Length: %d Xid: %d", |
960 | receive_len, xid); | 983 | receive_len, xid); |
@@ -998,10 +1021,10 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, | |||
998 | if (receive_len >= sizeof(struct smb_hdr) - 4 | 1021 | if (receive_len >= sizeof(struct smb_hdr) - 4 |
999 | /* do not count RFC1001 header */ + | 1022 | /* do not count RFC1001 header */ + |
1000 | (2 * out_buf->WordCount) + 2 /* bcc */ ) | 1023 | (2 * out_buf->WordCount) + 2 /* bcc */ ) |
1001 | BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf)); | 1024 | put_bcc(get_bcc_le(out_buf), out_buf); |
1002 | 1025 | ||
1003 | out: | 1026 | out: |
1004 | DeleteMidQEntry(midQ); | 1027 | delete_mid(midQ); |
1005 | if (rstart && rc == -EACCES) | 1028 | if (rstart && rc == -EACCES) |
1006 | return -ERESTARTSYS; | 1029 | return -ERESTARTSYS; |
1007 | return rc; | 1030 | return rc; |
diff --git a/fs/compat.c b/fs/compat.c index eb1740ac8c0a..f6fd0a00e6cc 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -257,7 +257,7 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs * | |||
257 | } | 257 | } |
258 | 258 | ||
259 | /* | 259 | /* |
260 | * The following statfs calls are copies of code from fs/open.c and | 260 | * The following statfs calls are copies of code from fs/statfs.c and |
261 | * should be checked against those from time to time | 261 | * should be checked against those from time to time |
262 | */ | 262 | */ |
263 | asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf) | 263 | asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf) |
@@ -320,7 +320,9 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat | |||
320 | __put_user(kbuf->f_namelen, &ubuf->f_namelen) || | 320 | __put_user(kbuf->f_namelen, &ubuf->f_namelen) || |
321 | __put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) || | 321 | __put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) || |
322 | __put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]) || | 322 | __put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]) || |
323 | __put_user(kbuf->f_frsize, &ubuf->f_frsize)) | 323 | __put_user(kbuf->f_frsize, &ubuf->f_frsize) || |
324 | __put_user(kbuf->f_flags, &ubuf->f_flags) || | ||
325 | __clear_user(ubuf->f_spare, sizeof(ubuf->f_spare))) | ||
324 | return -EFAULT; | 326 | return -EFAULT; |
325 | return 0; | 327 | return 0; |
326 | } | 328 | } |
@@ -597,10 +599,8 @@ ssize_t compat_rw_copy_check_uvector(int type, | |||
597 | if (nr_segs > fast_segs) { | 599 | if (nr_segs > fast_segs) { |
598 | ret = -ENOMEM; | 600 | ret = -ENOMEM; |
599 | iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); | 601 | iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); |
600 | if (iov == NULL) { | 602 | if (iov == NULL) |
601 | *ret_pointer = fast_pointer; | ||
602 | goto out; | 603 | goto out; |
603 | } | ||
604 | } | 604 | } |
605 | *ret_pointer = iov; | 605 | *ret_pointer = iov; |
606 | 606 | ||
diff --git a/fs/configfs/Kconfig b/fs/configfs/Kconfig index 13587cc97a0b..9febcdefdfdc 100644 --- a/fs/configfs/Kconfig +++ b/fs/configfs/Kconfig | |||
@@ -1,8 +1,8 @@ | |||
1 | config CONFIGFS_FS | 1 | config CONFIGFS_FS |
2 | tristate "Userspace-driven configuration filesystem" | 2 | tristate "Userspace-driven configuration filesystem" |
3 | depends on SYSFS | 3 | select SYSFS |
4 | help | 4 | help |
5 | configfs is a ram-based filesystem that provides the converse | 5 | configfs is a RAM-based filesystem that provides the converse |
6 | of sysfs's functionality. Where sysfs is a filesystem-based | 6 | of sysfs's functionality. Where sysfs is a filesystem-based |
7 | view of kernel objects, configfs is a filesystem-based manager | 7 | view of kernel objects, configfs is a filesystem-based manager |
8 | of kernel objects, or config_items. | 8 | of kernel objects, or config_items. |
diff --git a/fs/dcache.c b/fs/dcache.c index 0c6d5c549d84..2a6bd9a4ae97 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -176,6 +176,7 @@ static void d_free(struct dentry *dentry) | |||
176 | 176 | ||
177 | /** | 177 | /** |
178 | * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups | 178 | * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups |
179 | * @dentry: the target dentry | ||
179 | * After this call, in-progress rcu-walk path lookup will fail. This | 180 | * After this call, in-progress rcu-walk path lookup will fail. This |
180 | * should be called after unhashing, and after changing d_inode (if | 181 | * should be called after unhashing, and after changing d_inode (if |
181 | * the dentry has not already been unhashed). | 182 | * the dentry has not already been unhashed). |
@@ -281,6 +282,7 @@ static void dentry_lru_move_tail(struct dentry *dentry) | |||
281 | /** | 282 | /** |
282 | * d_kill - kill dentry and return parent | 283 | * d_kill - kill dentry and return parent |
283 | * @dentry: dentry to kill | 284 | * @dentry: dentry to kill |
285 | * @parent: parent dentry | ||
284 | * | 286 | * |
285 | * The dentry must already be unhashed and removed from the LRU. | 287 | * The dentry must already be unhashed and removed from the LRU. |
286 | * | 288 | * |
@@ -1357,8 +1359,8 @@ EXPORT_SYMBOL(d_alloc_name); | |||
1357 | 1359 | ||
1358 | void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) | 1360 | void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) |
1359 | { | 1361 | { |
1360 | BUG_ON(dentry->d_op); | 1362 | WARN_ON_ONCE(dentry->d_op); |
1361 | BUG_ON(dentry->d_flags & (DCACHE_OP_HASH | | 1363 | WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH | |
1362 | DCACHE_OP_COMPARE | | 1364 | DCACHE_OP_COMPARE | |
1363 | DCACHE_OP_REVALIDATE | | 1365 | DCACHE_OP_REVALIDATE | |
1364 | DCACHE_OP_DELETE )); | 1366 | DCACHE_OP_DELETE )); |
@@ -1380,8 +1382,11 @@ EXPORT_SYMBOL(d_set_d_op); | |||
1380 | static void __d_instantiate(struct dentry *dentry, struct inode *inode) | 1382 | static void __d_instantiate(struct dentry *dentry, struct inode *inode) |
1381 | { | 1383 | { |
1382 | spin_lock(&dentry->d_lock); | 1384 | spin_lock(&dentry->d_lock); |
1383 | if (inode) | 1385 | if (inode) { |
1386 | if (unlikely(IS_AUTOMOUNT(inode))) | ||
1387 | dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; | ||
1384 | list_add(&dentry->d_alias, &inode->i_dentry); | 1388 | list_add(&dentry->d_alias, &inode->i_dentry); |
1389 | } | ||
1385 | dentry->d_inode = inode; | 1390 | dentry->d_inode = inode; |
1386 | dentry_rcuwalk_barrier(dentry); | 1391 | dentry_rcuwalk_barrier(dentry); |
1387 | spin_unlock(&dentry->d_lock); | 1392 | spin_unlock(&dentry->d_lock); |
@@ -1970,7 +1975,7 @@ out: | |||
1970 | /** | 1975 | /** |
1971 | * d_validate - verify dentry provided from insecure source (deprecated) | 1976 | * d_validate - verify dentry provided from insecure source (deprecated) |
1972 | * @dentry: The dentry alleged to be valid child of @dparent | 1977 | * @dentry: The dentry alleged to be valid child of @dparent |
1973 | * @parent: The parent dentry (known to be valid) | 1978 | * @dparent: The parent dentry (known to be valid) |
1974 | * | 1979 | * |
1975 | * An insecure source has sent us a dentry, here we verify it and dget() it. | 1980 | * An insecure source has sent us a dentry, here we verify it and dget() it. |
1976 | * This is used by ncpfs in its readdir implementation. | 1981 | * This is used by ncpfs in its readdir implementation. |
diff --git a/fs/direct-io.c b/fs/direct-io.c index 8201c2558d85..dcb5577cde1d 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -325,12 +325,16 @@ void dio_end_io(struct bio *bio, int error) | |||
325 | } | 325 | } |
326 | EXPORT_SYMBOL_GPL(dio_end_io); | 326 | EXPORT_SYMBOL_GPL(dio_end_io); |
327 | 327 | ||
328 | static int | 328 | static void |
329 | dio_bio_alloc(struct dio *dio, struct block_device *bdev, | 329 | dio_bio_alloc(struct dio *dio, struct block_device *bdev, |
330 | sector_t first_sector, int nr_vecs) | 330 | sector_t first_sector, int nr_vecs) |
331 | { | 331 | { |
332 | struct bio *bio; | 332 | struct bio *bio; |
333 | 333 | ||
334 | /* | ||
335 | * bio_alloc() is guaranteed to return a bio when called with | ||
336 | * __GFP_WAIT and we request a valid number of vectors. | ||
337 | */ | ||
334 | bio = bio_alloc(GFP_KERNEL, nr_vecs); | 338 | bio = bio_alloc(GFP_KERNEL, nr_vecs); |
335 | 339 | ||
336 | bio->bi_bdev = bdev; | 340 | bio->bi_bdev = bdev; |
@@ -342,7 +346,6 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev, | |||
342 | 346 | ||
343 | dio->bio = bio; | 347 | dio->bio = bio; |
344 | dio->logical_offset_in_bio = dio->cur_page_fs_offset; | 348 | dio->logical_offset_in_bio = dio->cur_page_fs_offset; |
345 | return 0; | ||
346 | } | 349 | } |
347 | 350 | ||
348 | /* | 351 | /* |
@@ -583,8 +586,9 @@ static int dio_new_bio(struct dio *dio, sector_t start_sector) | |||
583 | goto out; | 586 | goto out; |
584 | sector = start_sector << (dio->blkbits - 9); | 587 | sector = start_sector << (dio->blkbits - 9); |
585 | nr_pages = min(dio->pages_in_io, bio_get_nr_vecs(dio->map_bh.b_bdev)); | 588 | nr_pages = min(dio->pages_in_io, bio_get_nr_vecs(dio->map_bh.b_bdev)); |
589 | nr_pages = min(nr_pages, BIO_MAX_PAGES); | ||
586 | BUG_ON(nr_pages <= 0); | 590 | BUG_ON(nr_pages <= 0); |
587 | ret = dio_bio_alloc(dio, dio->map_bh.b_bdev, sector, nr_pages); | 591 | dio_bio_alloc(dio, dio->map_bh.b_bdev, sector, nr_pages); |
588 | dio->boundary = 0; | 592 | dio->boundary = 0; |
589 | out: | 593 | out: |
590 | return ret; | 594 | return ret; |
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig index 2dbb422e8116..1897eb1b4b6a 100644 --- a/fs/dlm/Kconfig +++ b/fs/dlm/Kconfig | |||
@@ -1,8 +1,7 @@ | |||
1 | menuconfig DLM | 1 | menuconfig DLM |
2 | tristate "Distributed Lock Manager (DLM)" | 2 | tristate "Distributed Lock Manager (DLM)" |
3 | depends on EXPERIMENTAL && INET | 3 | depends on EXPERIMENTAL && INET |
4 | depends on SYSFS && (IPV6 || IPV6=n) | 4 | depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n) |
5 | select CONFIGFS_FS | ||
6 | select IP_SCTP | 5 | select IP_SCTP |
7 | help | 6 | help |
8 | A general purpose distributed lock manager for kernel or userspace | 7 | A general purpose distributed lock manager for kernel or userspace |
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 9c64ae9e4c1a..2d8c87b951c2 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c | |||
@@ -1468,15 +1468,13 @@ static void work_stop(void) | |||
1468 | 1468 | ||
1469 | static int work_start(void) | 1469 | static int work_start(void) |
1470 | { | 1470 | { |
1471 | recv_workqueue = alloc_workqueue("dlm_recv", WQ_MEM_RECLAIM | | 1471 | recv_workqueue = create_singlethread_workqueue("dlm_recv"); |
1472 | WQ_HIGHPRI | WQ_FREEZEABLE, 0); | ||
1473 | if (!recv_workqueue) { | 1472 | if (!recv_workqueue) { |
1474 | log_print("can't start dlm_recv"); | 1473 | log_print("can't start dlm_recv"); |
1475 | return -ENOMEM; | 1474 | return -ENOMEM; |
1476 | } | 1475 | } |
1477 | 1476 | ||
1478 | send_workqueue = alloc_workqueue("dlm_send", WQ_MEM_RECLAIM | | 1477 | send_workqueue = create_singlethread_workqueue("dlm_send"); |
1479 | WQ_HIGHPRI | WQ_FREEZEABLE, 0); | ||
1480 | if (!send_workqueue) { | 1478 | if (!send_workqueue) { |
1481 | log_print("can't start dlm_send"); | 1479 | log_print("can't start dlm_send"); |
1482 | destroy_workqueue(recv_workqueue); | 1480 | destroy_workqueue(recv_workqueue); |
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index cbadc1bee6e7..bfd8b680e648 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c | |||
@@ -348,7 +348,7 @@ static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, | |||
348 | BUG_ON(!crypt_stat || !crypt_stat->tfm | 348 | BUG_ON(!crypt_stat || !crypt_stat->tfm |
349 | || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)); | 349 | || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)); |
350 | if (unlikely(ecryptfs_verbosity > 0)) { | 350 | if (unlikely(ecryptfs_verbosity > 0)) { |
351 | ecryptfs_printk(KERN_DEBUG, "Key size [%d]; key:\n", | 351 | ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n", |
352 | crypt_stat->key_size); | 352 | crypt_stat->key_size); |
353 | ecryptfs_dump_hex(crypt_stat->key, | 353 | ecryptfs_dump_hex(crypt_stat->key, |
354 | crypt_stat->key_size); | 354 | crypt_stat->key_size); |
@@ -413,10 +413,9 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page, | |||
413 | rc = ecryptfs_derive_iv(extent_iv, crypt_stat, | 413 | rc = ecryptfs_derive_iv(extent_iv, crypt_stat, |
414 | (extent_base + extent_offset)); | 414 | (extent_base + extent_offset)); |
415 | if (rc) { | 415 | if (rc) { |
416 | ecryptfs_printk(KERN_ERR, "Error attempting to " | 416 | ecryptfs_printk(KERN_ERR, "Error attempting to derive IV for " |
417 | "derive IV for extent [0x%.16x]; " | 417 | "extent [0x%.16llx]; rc = [%d]\n", |
418 | "rc = [%d]\n", (extent_base + extent_offset), | 418 | (unsigned long long)(extent_base + extent_offset), rc); |
419 | rc); | ||
420 | goto out; | 419 | goto out; |
421 | } | 420 | } |
422 | if (unlikely(ecryptfs_verbosity > 0)) { | 421 | if (unlikely(ecryptfs_verbosity > 0)) { |
@@ -443,9 +442,9 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page, | |||
443 | } | 442 | } |
444 | rc = 0; | 443 | rc = 0; |
445 | if (unlikely(ecryptfs_verbosity > 0)) { | 444 | if (unlikely(ecryptfs_verbosity > 0)) { |
446 | ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16x]; " | 445 | ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16llx]; " |
447 | "rc = [%d]\n", (extent_base + extent_offset), | 446 | "rc = [%d]\n", |
448 | rc); | 447 | (unsigned long long)(extent_base + extent_offset), rc); |
449 | ecryptfs_printk(KERN_DEBUG, "First 8 bytes after " | 448 | ecryptfs_printk(KERN_DEBUG, "First 8 bytes after " |
450 | "encryption:\n"); | 449 | "encryption:\n"); |
451 | ecryptfs_dump_hex((char *)(page_address(enc_extent_page)), 8); | 450 | ecryptfs_dump_hex((char *)(page_address(enc_extent_page)), 8); |
@@ -540,10 +539,9 @@ static int ecryptfs_decrypt_extent(struct page *page, | |||
540 | rc = ecryptfs_derive_iv(extent_iv, crypt_stat, | 539 | rc = ecryptfs_derive_iv(extent_iv, crypt_stat, |
541 | (extent_base + extent_offset)); | 540 | (extent_base + extent_offset)); |
542 | if (rc) { | 541 | if (rc) { |
543 | ecryptfs_printk(KERN_ERR, "Error attempting to " | 542 | ecryptfs_printk(KERN_ERR, "Error attempting to derive IV for " |
544 | "derive IV for extent [0x%.16x]; " | 543 | "extent [0x%.16llx]; rc = [%d]\n", |
545 | "rc = [%d]\n", (extent_base + extent_offset), | 544 | (unsigned long long)(extent_base + extent_offset), rc); |
546 | rc); | ||
547 | goto out; | 545 | goto out; |
548 | } | 546 | } |
549 | if (unlikely(ecryptfs_verbosity > 0)) { | 547 | if (unlikely(ecryptfs_verbosity > 0)) { |
@@ -571,9 +569,9 @@ static int ecryptfs_decrypt_extent(struct page *page, | |||
571 | } | 569 | } |
572 | rc = 0; | 570 | rc = 0; |
573 | if (unlikely(ecryptfs_verbosity > 0)) { | 571 | if (unlikely(ecryptfs_verbosity > 0)) { |
574 | ecryptfs_printk(KERN_DEBUG, "Decrypt extent [0x%.16x]; " | 572 | ecryptfs_printk(KERN_DEBUG, "Decrypt extent [0x%.16llx]; " |
575 | "rc = [%d]\n", (extent_base + extent_offset), | 573 | "rc = [%d]\n", |
576 | rc); | 574 | (unsigned long long)(extent_base + extent_offset), rc); |
577 | ecryptfs_printk(KERN_DEBUG, "First 8 bytes after " | 575 | ecryptfs_printk(KERN_DEBUG, "First 8 bytes after " |
578 | "decryption:\n"); | 576 | "decryption:\n"); |
579 | ecryptfs_dump_hex((char *)(page_address(page) | 577 | ecryptfs_dump_hex((char *)(page_address(page) |
@@ -780,7 +778,7 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat) | |||
780 | } | 778 | } |
781 | ecryptfs_printk(KERN_DEBUG, | 779 | ecryptfs_printk(KERN_DEBUG, |
782 | "Initializing cipher [%s]; strlen = [%d]; " | 780 | "Initializing cipher [%s]; strlen = [%d]; " |
783 | "key_size_bits = [%d]\n", | 781 | "key_size_bits = [%zd]\n", |
784 | crypt_stat->cipher, (int)strlen(crypt_stat->cipher), | 782 | crypt_stat->cipher, (int)strlen(crypt_stat->cipher), |
785 | crypt_stat->key_size << 3); | 783 | crypt_stat->key_size << 3); |
786 | if (crypt_stat->tfm) { | 784 | if (crypt_stat->tfm) { |
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 413a3c48f0bb..dbc84ed96336 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h | |||
@@ -192,7 +192,6 @@ ecryptfs_get_key_payload_data(struct key *key) | |||
192 | (((struct user_key_payload*)key->payload.data)->data); | 192 | (((struct user_key_payload*)key->payload.data)->data); |
193 | } | 193 | } |
194 | 194 | ||
195 | #define ECRYPTFS_SUPER_MAGIC 0xf15f | ||
196 | #define ECRYPTFS_MAX_KEYSET_SIZE 1024 | 195 | #define ECRYPTFS_MAX_KEYSET_SIZE 1024 |
197 | #define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32 | 196 | #define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32 |
198 | #define ECRYPTFS_MAX_NUM_ENC_KEYS 64 | 197 | #define ECRYPTFS_MAX_NUM_ENC_KEYS 64 |
@@ -584,6 +583,7 @@ ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt) | |||
584 | 583 | ||
585 | #define ecryptfs_printk(type, fmt, arg...) \ | 584 | #define ecryptfs_printk(type, fmt, arg...) \ |
586 | __ecryptfs_printk(type "%s: " fmt, __func__, ## arg); | 585 | __ecryptfs_printk(type "%s: " fmt, __func__, ## arg); |
586 | __attribute__ ((format(printf, 1, 2))) | ||
587 | void __ecryptfs_printk(const char *fmt, ...); | 587 | void __ecryptfs_printk(const char *fmt, ...); |
588 | 588 | ||
589 | extern const struct file_operations ecryptfs_main_fops; | 589 | extern const struct file_operations ecryptfs_main_fops; |
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 91da02987bff..81e10e6a9443 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c | |||
@@ -47,7 +47,7 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, | |||
47 | const struct iovec *iov, | 47 | const struct iovec *iov, |
48 | unsigned long nr_segs, loff_t pos) | 48 | unsigned long nr_segs, loff_t pos) |
49 | { | 49 | { |
50 | int rc; | 50 | ssize_t rc; |
51 | struct dentry *lower_dentry; | 51 | struct dentry *lower_dentry; |
52 | struct vfsmount *lower_vfsmount; | 52 | struct vfsmount *lower_vfsmount; |
53 | struct file *file = iocb->ki_filp; | 53 | struct file *file = iocb->ki_filp; |
@@ -191,18 +191,16 @@ static int ecryptfs_open(struct inode *inode, struct file *file) | |||
191 | | ECRYPTFS_ENCRYPTED); | 191 | | ECRYPTFS_ENCRYPTED); |
192 | } | 192 | } |
193 | mutex_unlock(&crypt_stat->cs_mutex); | 193 | mutex_unlock(&crypt_stat->cs_mutex); |
194 | if (!ecryptfs_inode_to_private(inode)->lower_file) { | 194 | rc = ecryptfs_init_persistent_file(ecryptfs_dentry); |
195 | rc = ecryptfs_init_persistent_file(ecryptfs_dentry); | 195 | if (rc) { |
196 | if (rc) { | 196 | printk(KERN_ERR "%s: Error attempting to initialize " |
197 | printk(KERN_ERR "%s: Error attempting to initialize " | 197 | "the persistent file for the dentry with name " |
198 | "the persistent file for the dentry with name " | 198 | "[%s]; rc = [%d]\n", __func__, |
199 | "[%s]; rc = [%d]\n", __func__, | 199 | ecryptfs_dentry->d_name.name, rc); |
200 | ecryptfs_dentry->d_name.name, rc); | 200 | goto out_free; |
201 | goto out_free; | ||
202 | } | ||
203 | } | 201 | } |
204 | if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY) | 202 | if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_ACCMODE) |
205 | && !(file->f_flags & O_RDONLY)) { | 203 | == O_RDONLY && (file->f_flags & O_ACCMODE) != O_RDONLY) { |
206 | rc = -EPERM; | 204 | rc = -EPERM; |
207 | printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs " | 205 | printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs " |
208 | "file must hence be opened RO\n", __func__); | 206 | "file must hence be opened RO\n", __func__); |
@@ -243,9 +241,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file) | |||
243 | } | 241 | } |
244 | } | 242 | } |
245 | mutex_unlock(&crypt_stat->cs_mutex); | 243 | mutex_unlock(&crypt_stat->cs_mutex); |
246 | ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = [0x%.16x] " | 244 | ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = " |
247 | "size: [0x%.16x]\n", inode, inode->i_ino, | 245 | "[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino, |
248 | i_size_read(inode)); | 246 | (unsigned long long)i_size_read(inode)); |
249 | goto out; | 247 | goto out; |
250 | out_free: | 248 | out_free: |
251 | kmem_cache_free(ecryptfs_file_info_cache, | 249 | kmem_cache_free(ecryptfs_file_info_cache, |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 64ff02330752..bd33f87a1907 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -185,15 +185,13 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry) | |||
185 | "context; rc = [%d]\n", rc); | 185 | "context; rc = [%d]\n", rc); |
186 | goto out; | 186 | goto out; |
187 | } | 187 | } |
188 | if (!ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->lower_file) { | 188 | rc = ecryptfs_init_persistent_file(ecryptfs_dentry); |
189 | rc = ecryptfs_init_persistent_file(ecryptfs_dentry); | 189 | if (rc) { |
190 | if (rc) { | 190 | printk(KERN_ERR "%s: Error attempting to initialize " |
191 | printk(KERN_ERR "%s: Error attempting to initialize " | 191 | "the persistent file for the dentry with name " |
192 | "the persistent file for the dentry with name " | 192 | "[%s]; rc = [%d]\n", __func__, |
193 | "[%s]; rc = [%d]\n", __func__, | 193 | ecryptfs_dentry->d_name.name, rc); |
194 | ecryptfs_dentry->d_name.name, rc); | 194 | goto out; |
195 | goto out; | ||
196 | } | ||
197 | } | 195 | } |
198 | rc = ecryptfs_write_metadata(ecryptfs_dentry); | 196 | rc = ecryptfs_write_metadata(ecryptfs_dentry); |
199 | if (rc) { | 197 | if (rc) { |
@@ -302,15 +300,13 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, | |||
302 | rc = -ENOMEM; | 300 | rc = -ENOMEM; |
303 | goto out; | 301 | goto out; |
304 | } | 302 | } |
305 | if (!ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->lower_file) { | 303 | rc = ecryptfs_init_persistent_file(ecryptfs_dentry); |
306 | rc = ecryptfs_init_persistent_file(ecryptfs_dentry); | 304 | if (rc) { |
307 | if (rc) { | 305 | printk(KERN_ERR "%s: Error attempting to initialize " |
308 | printk(KERN_ERR "%s: Error attempting to initialize " | 306 | "the persistent file for the dentry with name " |
309 | "the persistent file for the dentry with name " | 307 | "[%s]; rc = [%d]\n", __func__, |
310 | "[%s]; rc = [%d]\n", __func__, | 308 | ecryptfs_dentry->d_name.name, rc); |
311 | ecryptfs_dentry->d_name.name, rc); | 309 | goto out_free_kmem; |
312 | goto out_free_kmem; | ||
313 | } | ||
314 | } | 310 | } |
315 | crypt_stat = &ecryptfs_inode_to_private( | 311 | crypt_stat = &ecryptfs_inode_to_private( |
316 | ecryptfs_dentry->d_inode)->crypt_stat; | 312 | ecryptfs_dentry->d_inode)->crypt_stat; |
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index b1f6858a5223..c1436cff6f2d 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c | |||
@@ -59,7 +59,7 @@ static int process_request_key_err(long err_code) | |||
59 | break; | 59 | break; |
60 | default: | 60 | default: |
61 | ecryptfs_printk(KERN_WARNING, "Unknown error code: " | 61 | ecryptfs_printk(KERN_WARNING, "Unknown error code: " |
62 | "[0x%.16x]\n", err_code); | 62 | "[0x%.16lx]\n", err_code); |
63 | rc = -EINVAL; | 63 | rc = -EINVAL; |
64 | } | 64 | } |
65 | return rc; | 65 | return rc; |
@@ -130,7 +130,7 @@ int ecryptfs_write_packet_length(char *dest, size_t size, | |||
130 | } else { | 130 | } else { |
131 | rc = -EINVAL; | 131 | rc = -EINVAL; |
132 | ecryptfs_printk(KERN_WARNING, | 132 | ecryptfs_printk(KERN_WARNING, |
133 | "Unsupported packet size: [%d]\n", size); | 133 | "Unsupported packet size: [%zd]\n", size); |
134 | } | 134 | } |
135 | return rc; | 135 | return rc; |
136 | } | 136 | } |
@@ -1672,7 +1672,7 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, | |||
1672 | auth_tok->session_key.decrypted_key_size); | 1672 | auth_tok->session_key.decrypted_key_size); |
1673 | crypt_stat->flags |= ECRYPTFS_KEY_VALID; | 1673 | crypt_stat->flags |= ECRYPTFS_KEY_VALID; |
1674 | if (unlikely(ecryptfs_verbosity > 0)) { | 1674 | if (unlikely(ecryptfs_verbosity > 0)) { |
1675 | ecryptfs_printk(KERN_DEBUG, "FEK of size [%d]:\n", | 1675 | ecryptfs_printk(KERN_DEBUG, "FEK of size [%zd]:\n", |
1676 | crypt_stat->key_size); | 1676 | crypt_stat->key_size); |
1677 | ecryptfs_dump_hex(crypt_stat->key, | 1677 | ecryptfs_dump_hex(crypt_stat->key, |
1678 | crypt_stat->key_size); | 1678 | crypt_stat->key_size); |
@@ -1754,7 +1754,7 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, | |||
1754 | if (ECRYPTFS_SIG_SIZE != tag_11_contents_size) { | 1754 | if (ECRYPTFS_SIG_SIZE != tag_11_contents_size) { |
1755 | ecryptfs_printk(KERN_ERR, "Expected " | 1755 | ecryptfs_printk(KERN_ERR, "Expected " |
1756 | "signature of size [%d]; " | 1756 | "signature of size [%d]; " |
1757 | "read size [%d]\n", | 1757 | "read size [%zd]\n", |
1758 | ECRYPTFS_SIG_SIZE, | 1758 | ECRYPTFS_SIG_SIZE, |
1759 | tag_11_contents_size); | 1759 | tag_11_contents_size); |
1760 | rc = -EIO; | 1760 | rc = -EIO; |
@@ -1787,8 +1787,8 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, | |||
1787 | goto out_wipe_list; | 1787 | goto out_wipe_list; |
1788 | break; | 1788 | break; |
1789 | default: | 1789 | default: |
1790 | ecryptfs_printk(KERN_DEBUG, "No packet at offset " | 1790 | ecryptfs_printk(KERN_DEBUG, "No packet at offset [%zd] " |
1791 | "[%d] of the file header; hex value of " | 1791 | "of the file header; hex value of " |
1792 | "character is [0x%.2x]\n", i, src[i]); | 1792 | "character is [0x%.2x]\n", i, src[i]); |
1793 | next_packet_is_auth_tok_packet = 0; | 1793 | next_packet_is_auth_tok_packet = 0; |
1794 | } | 1794 | } |
@@ -1864,8 +1864,8 @@ found_matching_auth_tok: | |||
1864 | "session key for authentication token with sig " | 1864 | "session key for authentication token with sig " |
1865 | "[%.*s]; rc = [%d]. Removing auth tok " | 1865 | "[%.*s]; rc = [%d]. Removing auth tok " |
1866 | "candidate from the list and searching for " | 1866 | "candidate from the list and searching for " |
1867 | "the next match.\n", candidate_auth_tok_sig, | 1867 | "the next match.\n", ECRYPTFS_SIG_SIZE_HEX, |
1868 | ECRYPTFS_SIG_SIZE_HEX, rc); | 1868 | candidate_auth_tok_sig, rc); |
1869 | list_for_each_entry_safe(auth_tok_list_item, | 1869 | list_for_each_entry_safe(auth_tok_list_item, |
1870 | auth_tok_list_item_tmp, | 1870 | auth_tok_list_item_tmp, |
1871 | &auth_tok_list, list) { | 1871 | &auth_tok_list, list) { |
@@ -2168,7 +2168,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, | |||
2168 | if (encrypted_session_key_valid) { | 2168 | if (encrypted_session_key_valid) { |
2169 | ecryptfs_printk(KERN_DEBUG, "encrypted_session_key_valid != 0; " | 2169 | ecryptfs_printk(KERN_DEBUG, "encrypted_session_key_valid != 0; " |
2170 | "using auth_tok->session_key.encrypted_key, " | 2170 | "using auth_tok->session_key.encrypted_key, " |
2171 | "where key_rec->enc_key_size = [%d]\n", | 2171 | "where key_rec->enc_key_size = [%zd]\n", |
2172 | key_rec->enc_key_size); | 2172 | key_rec->enc_key_size); |
2173 | memcpy(key_rec->enc_key, | 2173 | memcpy(key_rec->enc_key, |
2174 | auth_tok->session_key.encrypted_key, | 2174 | auth_tok->session_key.encrypted_key, |
@@ -2198,7 +2198,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, | |||
2198 | if (rc < 1 || rc > 2) { | 2198 | if (rc < 1 || rc > 2) { |
2199 | ecryptfs_printk(KERN_ERR, "Error generating scatterlist " | 2199 | ecryptfs_printk(KERN_ERR, "Error generating scatterlist " |
2200 | "for crypt_stat session key; expected rc = 1; " | 2200 | "for crypt_stat session key; expected rc = 1; " |
2201 | "got rc = [%d]. key_rec->enc_key_size = [%d]\n", | 2201 | "got rc = [%d]. key_rec->enc_key_size = [%zd]\n", |
2202 | rc, key_rec->enc_key_size); | 2202 | rc, key_rec->enc_key_size); |
2203 | rc = -ENOMEM; | 2203 | rc = -ENOMEM; |
2204 | goto out; | 2204 | goto out; |
@@ -2209,7 +2209,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, | |||
2209 | ecryptfs_printk(KERN_ERR, "Error generating scatterlist " | 2209 | ecryptfs_printk(KERN_ERR, "Error generating scatterlist " |
2210 | "for crypt_stat encrypted session key; " | 2210 | "for crypt_stat encrypted session key; " |
2211 | "expected rc = 1; got rc = [%d]. " | 2211 | "expected rc = 1; got rc = [%d]. " |
2212 | "key_rec->enc_key_size = [%d]\n", rc, | 2212 | "key_rec->enc_key_size = [%zd]\n", rc, |
2213 | key_rec->enc_key_size); | 2213 | key_rec->enc_key_size); |
2214 | rc = -ENOMEM; | 2214 | rc = -ENOMEM; |
2215 | goto out; | 2215 | goto out; |
@@ -2224,7 +2224,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, | |||
2224 | goto out; | 2224 | goto out; |
2225 | } | 2225 | } |
2226 | rc = 0; | 2226 | rc = 0; |
2227 | ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes of the key\n", | 2227 | ecryptfs_printk(KERN_DEBUG, "Encrypting [%zd] bytes of the key\n", |
2228 | crypt_stat->key_size); | 2228 | crypt_stat->key_size); |
2229 | rc = crypto_blkcipher_encrypt(&desc, dst_sg, src_sg, | 2229 | rc = crypto_blkcipher_encrypt(&desc, dst_sg, src_sg, |
2230 | (*key_rec).enc_key_size); | 2230 | (*key_rec).enc_key_size); |
@@ -2235,7 +2235,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, | |||
2235 | } | 2235 | } |
2236 | ecryptfs_printk(KERN_DEBUG, "This should be the encrypted key:\n"); | 2236 | ecryptfs_printk(KERN_DEBUG, "This should be the encrypted key:\n"); |
2237 | if (ecryptfs_verbosity > 0) { | 2237 | if (ecryptfs_verbosity > 0) { |
2238 | ecryptfs_printk(KERN_DEBUG, "EFEK of size [%d]:\n", | 2238 | ecryptfs_printk(KERN_DEBUG, "EFEK of size [%zd]:\n", |
2239 | key_rec->enc_key_size); | 2239 | key_rec->enc_key_size); |
2240 | ecryptfs_dump_hex(key_rec->enc_key, | 2240 | ecryptfs_dump_hex(key_rec->enc_key, |
2241 | key_rec->enc_key_size); | 2241 | key_rec->enc_key_size); |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 9ed476906327..758323a0f09a 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/parser.h> | 36 | #include <linux/parser.h> |
37 | #include <linux/fs_stack.h> | 37 | #include <linux/fs_stack.h> |
38 | #include <linux/slab.h> | 38 | #include <linux/slab.h> |
39 | #include <linux/magic.h> | ||
39 | #include "ecryptfs_kernel.h" | 40 | #include "ecryptfs_kernel.h" |
40 | 41 | ||
41 | /** | 42 | /** |
@@ -141,13 +142,12 @@ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) | |||
141 | return rc; | 142 | return rc; |
142 | } | 143 | } |
143 | 144 | ||
144 | static inode *ecryptfs_get_inode(struct inode *lower_inode, | 145 | static struct inode *ecryptfs_get_inode(struct inode *lower_inode, |
145 | struct super_block *sb) | 146 | struct super_block *sb) |
146 | { | 147 | { |
147 | struct inode *inode; | 148 | struct inode *inode; |
148 | int rc = 0; | 149 | int rc = 0; |
149 | 150 | ||
150 | lower_inode = lower_dentry->d_inode; | ||
151 | if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb)) { | 151 | if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb)) { |
152 | rc = -EXDEV; | 152 | rc = -EXDEV; |
153 | goto out; | 153 | goto out; |
@@ -202,7 +202,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, | |||
202 | { | 202 | { |
203 | struct inode *lower_inode = lower_dentry->d_inode; | 203 | struct inode *lower_inode = lower_dentry->d_inode; |
204 | struct inode *inode = ecryptfs_get_inode(lower_inode, sb); | 204 | struct inode *inode = ecryptfs_get_inode(lower_inode, sb); |
205 | if (IS_ERR(inode) | 205 | if (IS_ERR(inode)) |
206 | return PTR_ERR(inode); | 206 | return PTR_ERR(inode); |
207 | if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD) | 207 | if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD) |
208 | d_add(dentry, inode); | 208 | d_add(dentry, inode); |
@@ -565,6 +565,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags | |||
565 | ecryptfs_set_superblock_lower(s, path.dentry->d_sb); | 565 | ecryptfs_set_superblock_lower(s, path.dentry->d_sb); |
566 | s->s_maxbytes = path.dentry->d_sb->s_maxbytes; | 566 | s->s_maxbytes = path.dentry->d_sb->s_maxbytes; |
567 | s->s_blocksize = path.dentry->d_sb->s_blocksize; | 567 | s->s_blocksize = path.dentry->d_sb->s_blocksize; |
568 | s->s_magic = ECRYPTFS_SUPER_MAGIC; | ||
568 | 569 | ||
569 | inode = ecryptfs_get_inode(path.dentry->d_inode, s); | 570 | inode = ecryptfs_get_inode(path.dentry->d_inode, s); |
570 | rc = PTR_ERR(inode); | 571 | rc = PTR_ERR(inode); |
@@ -809,9 +810,10 @@ static int __init ecryptfs_init(void) | |||
809 | ecryptfs_printk(KERN_ERR, "The eCryptfs extent size is " | 810 | ecryptfs_printk(KERN_ERR, "The eCryptfs extent size is " |
810 | "larger than the host's page size, and so " | 811 | "larger than the host's page size, and so " |
811 | "eCryptfs cannot run on this system. The " | 812 | "eCryptfs cannot run on this system. The " |
812 | "default eCryptfs extent size is [%d] bytes; " | 813 | "default eCryptfs extent size is [%u] bytes; " |
813 | "the page size is [%d] bytes.\n", | 814 | "the page size is [%lu] bytes.\n", |
814 | ECRYPTFS_DEFAULT_EXTENT_SIZE, PAGE_CACHE_SIZE); | 815 | ECRYPTFS_DEFAULT_EXTENT_SIZE, |
816 | (unsigned long)PAGE_CACHE_SIZE); | ||
815 | goto out; | 817 | goto out; |
816 | } | 818 | } |
817 | rc = ecryptfs_init_kmem_caches(); | 819 | rc = ecryptfs_init_kmem_caches(); |
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index b1d82756544b..cc64fca89f8d 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c | |||
@@ -65,7 +65,7 @@ static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc) | |||
65 | rc = ecryptfs_encrypt_page(page); | 65 | rc = ecryptfs_encrypt_page(page); |
66 | if (rc) { | 66 | if (rc) { |
67 | ecryptfs_printk(KERN_WARNING, "Error encrypting " | 67 | ecryptfs_printk(KERN_WARNING, "Error encrypting " |
68 | "page (upper index [0x%.16x])\n", page->index); | 68 | "page (upper index [0x%.16lx])\n", page->index); |
69 | ClearPageUptodate(page); | 69 | ClearPageUptodate(page); |
70 | goto out; | 70 | goto out; |
71 | } | 71 | } |
@@ -237,7 +237,7 @@ out: | |||
237 | ClearPageUptodate(page); | 237 | ClearPageUptodate(page); |
238 | else | 238 | else |
239 | SetPageUptodate(page); | 239 | SetPageUptodate(page); |
240 | ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n", | 240 | ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16lx]\n", |
241 | page->index); | 241 | page->index); |
242 | unlock_page(page); | 242 | unlock_page(page); |
243 | return rc; | 243 | return rc; |
@@ -290,6 +290,7 @@ static int ecryptfs_write_begin(struct file *file, | |||
290 | return -ENOMEM; | 290 | return -ENOMEM; |
291 | *pagep = page; | 291 | *pagep = page; |
292 | 292 | ||
293 | prev_page_end_size = ((loff_t)index << PAGE_CACHE_SHIFT); | ||
293 | if (!PageUptodate(page)) { | 294 | if (!PageUptodate(page)) { |
294 | struct ecryptfs_crypt_stat *crypt_stat = | 295 | struct ecryptfs_crypt_stat *crypt_stat = |
295 | &ecryptfs_inode_to_private(mapping->host)->crypt_stat; | 296 | &ecryptfs_inode_to_private(mapping->host)->crypt_stat; |
@@ -335,18 +336,23 @@ static int ecryptfs_write_begin(struct file *file, | |||
335 | SetPageUptodate(page); | 336 | SetPageUptodate(page); |
336 | } | 337 | } |
337 | } else { | 338 | } else { |
338 | rc = ecryptfs_decrypt_page(page); | 339 | if (prev_page_end_size |
339 | if (rc) { | 340 | >= i_size_read(page->mapping->host)) { |
340 | printk(KERN_ERR "%s: Error decrypting page " | 341 | zero_user(page, 0, PAGE_CACHE_SIZE); |
341 | "at index [%ld]; rc = [%d]\n", | 342 | } else { |
342 | __func__, page->index, rc); | 343 | rc = ecryptfs_decrypt_page(page); |
343 | ClearPageUptodate(page); | 344 | if (rc) { |
344 | goto out; | 345 | printk(KERN_ERR "%s: Error decrypting " |
346 | "page at index [%ld]; " | ||
347 | "rc = [%d]\n", | ||
348 | __func__, page->index, rc); | ||
349 | ClearPageUptodate(page); | ||
350 | goto out; | ||
351 | } | ||
345 | } | 352 | } |
346 | SetPageUptodate(page); | 353 | SetPageUptodate(page); |
347 | } | 354 | } |
348 | } | 355 | } |
349 | prev_page_end_size = ((loff_t)index << PAGE_CACHE_SHIFT); | ||
350 | /* If creating a page or more of holes, zero them out via truncate. | 356 | /* If creating a page or more of holes, zero them out via truncate. |
351 | * Note, this will increase i_size. */ | 357 | * Note, this will increase i_size. */ |
352 | if (index != 0) { | 358 | if (index != 0) { |
@@ -488,7 +494,7 @@ static int ecryptfs_write_end(struct file *file, | |||
488 | } else | 494 | } else |
489 | ecryptfs_printk(KERN_DEBUG, "Not a new file\n"); | 495 | ecryptfs_printk(KERN_DEBUG, "Not a new file\n"); |
490 | ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" | 496 | ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" |
491 | "(page w/ index = [0x%.16x], to = [%d])\n", index, to); | 497 | "(page w/ index = [0x%.16lx], to = [%d])\n", index, to); |
492 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { | 498 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { |
493 | rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, page, 0, | 499 | rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, page, 0, |
494 | to); | 500 | to); |
@@ -503,19 +509,20 @@ static int ecryptfs_write_end(struct file *file, | |||
503 | rc = fill_zeros_to_end_of_page(page, to); | 509 | rc = fill_zeros_to_end_of_page(page, to); |
504 | if (rc) { | 510 | if (rc) { |
505 | ecryptfs_printk(KERN_WARNING, "Error attempting to fill " | 511 | ecryptfs_printk(KERN_WARNING, "Error attempting to fill " |
506 | "zeros in page with index = [0x%.16x]\n", index); | 512 | "zeros in page with index = [0x%.16lx]\n", index); |
507 | goto out; | 513 | goto out; |
508 | } | 514 | } |
509 | rc = ecryptfs_encrypt_page(page); | 515 | rc = ecryptfs_encrypt_page(page); |
510 | if (rc) { | 516 | if (rc) { |
511 | ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper " | 517 | ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper " |
512 | "index [0x%.16x])\n", index); | 518 | "index [0x%.16lx])\n", index); |
513 | goto out; | 519 | goto out; |
514 | } | 520 | } |
515 | if (pos + copied > i_size_read(ecryptfs_inode)) { | 521 | if (pos + copied > i_size_read(ecryptfs_inode)) { |
516 | i_size_write(ecryptfs_inode, pos + copied); | 522 | i_size_write(ecryptfs_inode, pos + copied); |
517 | ecryptfs_printk(KERN_DEBUG, "Expanded file size to " | 523 | ecryptfs_printk(KERN_DEBUG, "Expanded file size to " |
518 | "[0x%.16x]\n", i_size_read(ecryptfs_inode)); | 524 | "[0x%.16llx]\n", |
525 | (unsigned long long)i_size_read(ecryptfs_inode)); | ||
519 | } | 526 | } |
520 | rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode); | 527 | rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode); |
521 | if (rc) | 528 | if (rc) |
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index cc8a9b7d6064..267d0ada4541 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -1114,6 +1114,17 @@ static int ep_send_events(struct eventpoll *ep, | |||
1114 | return ep_scan_ready_list(ep, ep_send_events_proc, &esed); | 1114 | return ep_scan_ready_list(ep, ep_send_events_proc, &esed); |
1115 | } | 1115 | } |
1116 | 1116 | ||
1117 | static inline struct timespec ep_set_mstimeout(long ms) | ||
1118 | { | ||
1119 | struct timespec now, ts = { | ||
1120 | .tv_sec = ms / MSEC_PER_SEC, | ||
1121 | .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC), | ||
1122 | }; | ||
1123 | |||
1124 | ktime_get_ts(&now); | ||
1125 | return timespec_add_safe(now, ts); | ||
1126 | } | ||
1127 | |||
1117 | static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, | 1128 | static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, |
1118 | int maxevents, long timeout) | 1129 | int maxevents, long timeout) |
1119 | { | 1130 | { |
@@ -1121,12 +1132,11 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, | |||
1121 | unsigned long flags; | 1132 | unsigned long flags; |
1122 | long slack; | 1133 | long slack; |
1123 | wait_queue_t wait; | 1134 | wait_queue_t wait; |
1124 | struct timespec end_time; | ||
1125 | ktime_t expires, *to = NULL; | 1135 | ktime_t expires, *to = NULL; |
1126 | 1136 | ||
1127 | if (timeout > 0) { | 1137 | if (timeout > 0) { |
1128 | ktime_get_ts(&end_time); | 1138 | struct timespec end_time = ep_set_mstimeout(timeout); |
1129 | timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC); | 1139 | |
1130 | slack = select_estimate_accuracy(&end_time); | 1140 | slack = select_estimate_accuracy(&end_time); |
1131 | to = &expires; | 1141 | to = &expires; |
1132 | *to = timespec_to_ktime(end_time); | 1142 | *to = timespec_to_ktime(end_time); |
@@ -120,7 +120,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) | |||
120 | goto out; | 120 | goto out; |
121 | 121 | ||
122 | file = do_filp_open(AT_FDCWD, tmp, | 122 | file = do_filp_open(AT_FDCWD, tmp, |
123 | O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, | 123 | O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0, |
124 | MAY_READ | MAY_EXEC | MAY_OPEN); | 124 | MAY_READ | MAY_EXEC | MAY_OPEN); |
125 | putname(tmp); | 125 | putname(tmp); |
126 | error = PTR_ERR(file); | 126 | error = PTR_ERR(file); |
@@ -723,7 +723,7 @@ struct file *open_exec(const char *name) | |||
723 | int err; | 723 | int err; |
724 | 724 | ||
725 | file = do_filp_open(AT_FDCWD, name, | 725 | file = do_filp_open(AT_FDCWD, name, |
726 | O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, | 726 | O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0, |
727 | MAY_EXEC | MAY_OPEN); | 727 | MAY_EXEC | MAY_OPEN); |
728 | if (IS_ERR(file)) | 728 | if (IS_ERR(file)) |
729 | goto out; | 729 | goto out; |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 42685424817b..a7555238c41a 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -1030,7 +1030,6 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) | |||
1030 | memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); | 1030 | memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); |
1031 | } | 1031 | } |
1032 | 1032 | ||
1033 | inode->i_mapping->backing_dev_info = sb->s_bdi; | ||
1034 | if (S_ISREG(inode->i_mode)) { | 1033 | if (S_ISREG(inode->i_mode)) { |
1035 | inode->i_op = &exofs_file_inode_operations; | 1034 | inode->i_op = &exofs_file_inode_operations; |
1036 | inode->i_fop = &exofs_file_operations; | 1035 | inode->i_fop = &exofs_file_operations; |
@@ -1131,7 +1130,6 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) | |||
1131 | 1130 | ||
1132 | sbi = sb->s_fs_info; | 1131 | sbi = sb->s_fs_info; |
1133 | 1132 | ||
1134 | inode->i_mapping->backing_dev_info = sb->s_bdi; | ||
1135 | sb->s_dirt = 1; | 1133 | sb->s_dirt = 1; |
1136 | inode_init_owner(inode, dir, mode); | 1134 | inode_init_owner(inode, dir, mode); |
1137 | inode->i_ino = sbi->s_nextid++; | 1135 | inode->i_ino = sbi->s_nextid++; |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 7aa767d4f06f..85c8cc8f2473 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -754,7 +754,7 @@ static int ext3_release_dquot(struct dquot *dquot); | |||
754 | static int ext3_mark_dquot_dirty(struct dquot *dquot); | 754 | static int ext3_mark_dquot_dirty(struct dquot *dquot); |
755 | static int ext3_write_info(struct super_block *sb, int type); | 755 | static int ext3_write_info(struct super_block *sb, int type); |
756 | static int ext3_quota_on(struct super_block *sb, int type, int format_id, | 756 | static int ext3_quota_on(struct super_block *sb, int type, int format_id, |
757 | char *path); | 757 | struct path *path); |
758 | static int ext3_quota_on_mount(struct super_block *sb, int type); | 758 | static int ext3_quota_on_mount(struct super_block *sb, int type); |
759 | static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, | 759 | static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, |
760 | size_t len, loff_t off); | 760 | size_t len, loff_t off); |
@@ -2877,27 +2877,20 @@ static int ext3_quota_on_mount(struct super_block *sb, int type) | |||
2877 | * Standard function to be called on quota_on | 2877 | * Standard function to be called on quota_on |
2878 | */ | 2878 | */ |
2879 | static int ext3_quota_on(struct super_block *sb, int type, int format_id, | 2879 | static int ext3_quota_on(struct super_block *sb, int type, int format_id, |
2880 | char *name) | 2880 | struct path *path) |
2881 | { | 2881 | { |
2882 | int err; | 2882 | int err; |
2883 | struct path path; | ||
2884 | 2883 | ||
2885 | if (!test_opt(sb, QUOTA)) | 2884 | if (!test_opt(sb, QUOTA)) |
2886 | return -EINVAL; | 2885 | return -EINVAL; |
2887 | 2886 | ||
2888 | err = kern_path(name, LOOKUP_FOLLOW, &path); | ||
2889 | if (err) | ||
2890 | return err; | ||
2891 | |||
2892 | /* Quotafile not on the same filesystem? */ | 2887 | /* Quotafile not on the same filesystem? */ |
2893 | if (path.mnt->mnt_sb != sb) { | 2888 | if (path->mnt->mnt_sb != sb) |
2894 | path_put(&path); | ||
2895 | return -EXDEV; | 2889 | return -EXDEV; |
2896 | } | ||
2897 | /* Journaling quota? */ | 2890 | /* Journaling quota? */ |
2898 | if (EXT3_SB(sb)->s_qf_names[type]) { | 2891 | if (EXT3_SB(sb)->s_qf_names[type]) { |
2899 | /* Quotafile not of fs root? */ | 2892 | /* Quotafile not of fs root? */ |
2900 | if (path.dentry->d_parent != sb->s_root) | 2893 | if (path->dentry->d_parent != sb->s_root) |
2901 | ext3_msg(sb, KERN_WARNING, | 2894 | ext3_msg(sb, KERN_WARNING, |
2902 | "warning: Quota file not on filesystem root. " | 2895 | "warning: Quota file not on filesystem root. " |
2903 | "Journaled quota will not work."); | 2896 | "Journaled quota will not work."); |
@@ -2907,7 +2900,7 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id, | |||
2907 | * When we journal data on quota file, we have to flush journal to see | 2900 | * When we journal data on quota file, we have to flush journal to see |
2908 | * all updates to the file when we bypass pagecache... | 2901 | * all updates to the file when we bypass pagecache... |
2909 | */ | 2902 | */ |
2910 | if (ext3_should_journal_data(path.dentry->d_inode)) { | 2903 | if (ext3_should_journal_data(path->dentry->d_inode)) { |
2911 | /* | 2904 | /* |
2912 | * We don't need to lock updates but journal_flush() could | 2905 | * We don't need to lock updates but journal_flush() could |
2913 | * otherwise be livelocked... | 2906 | * otherwise be livelocked... |
@@ -2915,15 +2908,11 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id, | |||
2915 | journal_lock_updates(EXT3_SB(sb)->s_journal); | 2908 | journal_lock_updates(EXT3_SB(sb)->s_journal); |
2916 | err = journal_flush(EXT3_SB(sb)->s_journal); | 2909 | err = journal_flush(EXT3_SB(sb)->s_journal); |
2917 | journal_unlock_updates(EXT3_SB(sb)->s_journal); | 2910 | journal_unlock_updates(EXT3_SB(sb)->s_journal); |
2918 | if (err) { | 2911 | if (err) |
2919 | path_put(&path); | ||
2920 | return err; | 2912 | return err; |
2921 | } | ||
2922 | } | 2913 | } |
2923 | 2914 | ||
2924 | err = dquot_quota_on_path(sb, type, format_id, &path); | 2915 | return dquot_quota_on(sb, type, format_id, path); |
2925 | path_put(&path); | ||
2926 | return err; | ||
2927 | } | 2916 | } |
2928 | 2917 | ||
2929 | /* Read data from quotafile - avoid pagecache and such because we cannot afford | 2918 | /* Read data from quotafile - avoid pagecache and such because we cannot afford |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1de65f572033..3aa0b72b3b94 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -848,6 +848,7 @@ struct ext4_inode_info { | |||
848 | atomic_t i_ioend_count; /* Number of outstanding io_end structs */ | 848 | atomic_t i_ioend_count; /* Number of outstanding io_end structs */ |
849 | /* current io_end structure for async DIO write*/ | 849 | /* current io_end structure for async DIO write*/ |
850 | ext4_io_end_t *cur_aio_dio; | 850 | ext4_io_end_t *cur_aio_dio; |
851 | atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */ | ||
851 | 852 | ||
852 | spinlock_t i_block_reservation_lock; | 853 | spinlock_t i_block_reservation_lock; |
853 | 854 | ||
@@ -2065,7 +2066,7 @@ extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
2065 | extern void ext4_ext_truncate(struct inode *); | 2066 | extern void ext4_ext_truncate(struct inode *); |
2066 | extern void ext4_ext_init(struct super_block *); | 2067 | extern void ext4_ext_init(struct super_block *); |
2067 | extern void ext4_ext_release(struct super_block *); | 2068 | extern void ext4_ext_release(struct super_block *); |
2068 | extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, | 2069 | extern long ext4_fallocate(struct file *file, int mode, loff_t offset, |
2069 | loff_t len); | 2070 | loff_t len); |
2070 | extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | 2071 | extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, |
2071 | ssize_t len); | 2072 | ssize_t len); |
@@ -2119,6 +2120,15 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh) | |||
2119 | 2120 | ||
2120 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) | 2121 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) |
2121 | 2122 | ||
2123 | /* For ioend & aio unwritten conversion wait queues */ | ||
2124 | #define EXT4_WQ_HASH_SZ 37 | ||
2125 | #define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\ | ||
2126 | EXT4_WQ_HASH_SZ]) | ||
2127 | #define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\ | ||
2128 | EXT4_WQ_HASH_SZ]) | ||
2129 | extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; | ||
2130 | extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; | ||
2131 | |||
2122 | #endif /* __KERNEL__ */ | 2132 | #endif /* __KERNEL__ */ |
2123 | 2133 | ||
2124 | #endif /* _EXT4_H */ | 2134 | #endif /* _EXT4_H */ |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 6b90b6825d32..686240e89df1 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -3174,9 +3174,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3174 | * that this IO needs to convertion to written when IO is | 3174 | * that this IO needs to convertion to written when IO is |
3175 | * completed | 3175 | * completed |
3176 | */ | 3176 | */ |
3177 | if (io) | 3177 | if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { |
3178 | io->flag = EXT4_IO_END_UNWRITTEN; | 3178 | io->flag = EXT4_IO_END_UNWRITTEN; |
3179 | else | 3179 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); |
3180 | } else | ||
3180 | ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); | 3181 | ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); |
3181 | if (ext4_should_dioread_nolock(inode)) | 3182 | if (ext4_should_dioread_nolock(inode)) |
3182 | map->m_flags |= EXT4_MAP_UNINIT; | 3183 | map->m_flags |= EXT4_MAP_UNINIT; |
@@ -3463,9 +3464,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3463 | * that we need to perform convertion when IO is done. | 3464 | * that we need to perform convertion when IO is done. |
3464 | */ | 3465 | */ |
3465 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { | 3466 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { |
3466 | if (io) | 3467 | if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { |
3467 | io->flag = EXT4_IO_END_UNWRITTEN; | 3468 | io->flag = EXT4_IO_END_UNWRITTEN; |
3468 | else | 3469 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); |
3470 | } else | ||
3469 | ext4_set_inode_state(inode, | 3471 | ext4_set_inode_state(inode, |
3470 | EXT4_STATE_DIO_UNWRITTEN); | 3472 | EXT4_STATE_DIO_UNWRITTEN); |
3471 | } | 3473 | } |
@@ -3627,14 +3629,15 @@ static void ext4_falloc_update_inode(struct inode *inode, | |||
3627 | } | 3629 | } |
3628 | 3630 | ||
3629 | /* | 3631 | /* |
3630 | * preallocate space for a file. This implements ext4's fallocate inode | 3632 | * preallocate space for a file. This implements ext4's fallocate file |
3631 | * operation, which gets called from sys_fallocate system call. | 3633 | * operation, which gets called from sys_fallocate system call. |
3632 | * For block-mapped files, posix_fallocate should fall back to the method | 3634 | * For block-mapped files, posix_fallocate should fall back to the method |
3633 | * of writing zeroes to the required new blocks (the same behavior which is | 3635 | * of writing zeroes to the required new blocks (the same behavior which is |
3634 | * expected for file systems which do not support fallocate() system call). | 3636 | * expected for file systems which do not support fallocate() system call). |
3635 | */ | 3637 | */ |
3636 | long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) | 3638 | long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) |
3637 | { | 3639 | { |
3640 | struct inode *inode = file->f_path.dentry->d_inode; | ||
3638 | handle_t *handle; | 3641 | handle_t *handle; |
3639 | loff_t new_size; | 3642 | loff_t new_size; |
3640 | unsigned int max_blocks; | 3643 | unsigned int max_blocks; |
@@ -3645,7 +3648,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) | |||
3645 | unsigned int credits, blkbits = inode->i_blkbits; | 3648 | unsigned int credits, blkbits = inode->i_blkbits; |
3646 | 3649 | ||
3647 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ | 3650 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ |
3648 | if (mode && (mode != FALLOC_FL_KEEP_SIZE)) | 3651 | if (mode & ~FALLOC_FL_KEEP_SIZE) |
3649 | return -EOPNOTSUPP; | 3652 | return -EOPNOTSUPP; |
3650 | 3653 | ||
3651 | /* | 3654 | /* |
@@ -3655,10 +3658,6 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) | |||
3655 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 3658 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
3656 | return -EOPNOTSUPP; | 3659 | return -EOPNOTSUPP; |
3657 | 3660 | ||
3658 | /* preallocation to directories is currently not supported */ | ||
3659 | if (S_ISDIR(inode->i_mode)) | ||
3660 | return -ENODEV; | ||
3661 | |||
3662 | map.m_lblk = offset >> blkbits; | 3661 | map.m_lblk = offset >> blkbits; |
3663 | /* | 3662 | /* |
3664 | * We can't just convert len to max_blocks because | 3663 | * We can't just convert len to max_blocks because |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index bb003dc9ffff..7b80d543b89e 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -55,11 +55,47 @@ static int ext4_release_file(struct inode *inode, struct file *filp) | |||
55 | return 0; | 55 | return 0; |
56 | } | 56 | } |
57 | 57 | ||
58 | static void ext4_aiodio_wait(struct inode *inode) | ||
59 | { | ||
60 | wait_queue_head_t *wq = ext4_ioend_wq(inode); | ||
61 | |||
62 | wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0)); | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * This tests whether the IO in question is block-aligned or not. | ||
67 | * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they | ||
68 | * are converted to written only after the IO is complete. Until they are | ||
69 | * mapped, these blocks appear as holes, so dio_zero_block() will assume that | ||
70 | * it needs to zero out portions of the start and/or end block. If 2 AIO | ||
71 | * threads are at work on the same unwritten block, they must be synchronized | ||
72 | * or one thread will zero the other's data, causing corruption. | ||
73 | */ | ||
74 | static int | ||
75 | ext4_unaligned_aio(struct inode *inode, const struct iovec *iov, | ||
76 | unsigned long nr_segs, loff_t pos) | ||
77 | { | ||
78 | struct super_block *sb = inode->i_sb; | ||
79 | int blockmask = sb->s_blocksize - 1; | ||
80 | size_t count = iov_length(iov, nr_segs); | ||
81 | loff_t final_size = pos + count; | ||
82 | |||
83 | if (pos >= inode->i_size) | ||
84 | return 0; | ||
85 | |||
86 | if ((pos & blockmask) || (final_size & blockmask)) | ||
87 | return 1; | ||
88 | |||
89 | return 0; | ||
90 | } | ||
91 | |||
58 | static ssize_t | 92 | static ssize_t |
59 | ext4_file_write(struct kiocb *iocb, const struct iovec *iov, | 93 | ext4_file_write(struct kiocb *iocb, const struct iovec *iov, |
60 | unsigned long nr_segs, loff_t pos) | 94 | unsigned long nr_segs, loff_t pos) |
61 | { | 95 | { |
62 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; | 96 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; |
97 | int unaligned_aio = 0; | ||
98 | int ret; | ||
63 | 99 | ||
64 | /* | 100 | /* |
65 | * If we have encountered a bitmap-format file, the size limit | 101 | * If we have encountered a bitmap-format file, the size limit |
@@ -78,9 +114,31 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
78 | nr_segs = iov_shorten((struct iovec *)iov, nr_segs, | 114 | nr_segs = iov_shorten((struct iovec *)iov, nr_segs, |
79 | sbi->s_bitmap_maxbytes - pos); | 115 | sbi->s_bitmap_maxbytes - pos); |
80 | } | 116 | } |
117 | } else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) && | ||
118 | !is_sync_kiocb(iocb))) { | ||
119 | unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos); | ||
120 | } | ||
121 | |||
122 | /* Unaligned direct AIO must be serialized; see comment above */ | ||
123 | if (unaligned_aio) { | ||
124 | static unsigned long unaligned_warn_time; | ||
125 | |||
126 | /* Warn about this once per day */ | ||
127 | if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ)) | ||
128 | ext4_msg(inode->i_sb, KERN_WARNING, | ||
129 | "Unaligned AIO/DIO on inode %ld by %s; " | ||
130 | "performance will be poor.", | ||
131 | inode->i_ino, current->comm); | ||
132 | mutex_lock(ext4_aio_mutex(inode)); | ||
133 | ext4_aiodio_wait(inode); | ||
81 | } | 134 | } |
82 | 135 | ||
83 | return generic_file_aio_write(iocb, iov, nr_segs, pos); | 136 | ret = generic_file_aio_write(iocb, iov, nr_segs, pos); |
137 | |||
138 | if (unaligned_aio) | ||
139 | mutex_unlock(ext4_aio_mutex(inode)); | ||
140 | |||
141 | return ret; | ||
84 | } | 142 | } |
85 | 143 | ||
86 | static const struct vm_operations_struct ext4_file_vm_ops = { | 144 | static const struct vm_operations_struct ext4_file_vm_ops = { |
@@ -210,6 +268,7 @@ const struct file_operations ext4_file_operations = { | |||
210 | .fsync = ext4_sync_file, | 268 | .fsync = ext4_sync_file, |
211 | .splice_read = generic_file_splice_read, | 269 | .splice_read = generic_file_splice_read, |
212 | .splice_write = generic_file_splice_write, | 270 | .splice_write = generic_file_splice_write, |
271 | .fallocate = ext4_fallocate, | ||
213 | }; | 272 | }; |
214 | 273 | ||
215 | const struct inode_operations ext4_file_inode_operations = { | 274 | const struct inode_operations ext4_file_inode_operations = { |
@@ -223,7 +282,6 @@ const struct inode_operations ext4_file_inode_operations = { | |||
223 | .removexattr = generic_removexattr, | 282 | .removexattr = generic_removexattr, |
224 | #endif | 283 | #endif |
225 | .check_acl = ext4_check_acl, | 284 | .check_acl = ext4_check_acl, |
226 | .fallocate = ext4_fallocate, | ||
227 | .fiemap = ext4_fiemap, | 285 | .fiemap = ext4_fiemap, |
228 | }; | 286 | }; |
229 | 287 | ||
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 851f49b2f9d2..d1fe09aea73d 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -342,10 +342,15 @@ static struct kmem_cache *ext4_free_ext_cachep; | |||
342 | /* We create slab caches for groupinfo data structures based on the | 342 | /* We create slab caches for groupinfo data structures based on the |
343 | * superblock block size. There will be one per mounted filesystem for | 343 | * superblock block size. There will be one per mounted filesystem for |
344 | * each unique s_blocksize_bits */ | 344 | * each unique s_blocksize_bits */ |
345 | #define NR_GRPINFO_CACHES \ | 345 | #define NR_GRPINFO_CACHES 8 |
346 | (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1) | ||
347 | static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES]; | 346 | static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES]; |
348 | 347 | ||
348 | static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = { | ||
349 | "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k", | ||
350 | "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k", | ||
351 | "ext4_groupinfo_64k", "ext4_groupinfo_128k" | ||
352 | }; | ||
353 | |||
349 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | 354 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, |
350 | ext4_group_t group); | 355 | ext4_group_t group); |
351 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | 356 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, |
@@ -2414,6 +2419,55 @@ err_freesgi: | |||
2414 | return -ENOMEM; | 2419 | return -ENOMEM; |
2415 | } | 2420 | } |
2416 | 2421 | ||
2422 | static void ext4_groupinfo_destroy_slabs(void) | ||
2423 | { | ||
2424 | int i; | ||
2425 | |||
2426 | for (i = 0; i < NR_GRPINFO_CACHES; i++) { | ||
2427 | if (ext4_groupinfo_caches[i]) | ||
2428 | kmem_cache_destroy(ext4_groupinfo_caches[i]); | ||
2429 | ext4_groupinfo_caches[i] = NULL; | ||
2430 | } | ||
2431 | } | ||
2432 | |||
2433 | static int ext4_groupinfo_create_slab(size_t size) | ||
2434 | { | ||
2435 | static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex); | ||
2436 | int slab_size; | ||
2437 | int blocksize_bits = order_base_2(size); | ||
2438 | int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; | ||
2439 | struct kmem_cache *cachep; | ||
2440 | |||
2441 | if (cache_index >= NR_GRPINFO_CACHES) | ||
2442 | return -EINVAL; | ||
2443 | |||
2444 | if (unlikely(cache_index < 0)) | ||
2445 | cache_index = 0; | ||
2446 | |||
2447 | mutex_lock(&ext4_grpinfo_slab_create_mutex); | ||
2448 | if (ext4_groupinfo_caches[cache_index]) { | ||
2449 | mutex_unlock(&ext4_grpinfo_slab_create_mutex); | ||
2450 | return 0; /* Already created */ | ||
2451 | } | ||
2452 | |||
2453 | slab_size = offsetof(struct ext4_group_info, | ||
2454 | bb_counters[blocksize_bits + 2]); | ||
2455 | |||
2456 | cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index], | ||
2457 | slab_size, 0, SLAB_RECLAIM_ACCOUNT, | ||
2458 | NULL); | ||
2459 | |||
2460 | mutex_unlock(&ext4_grpinfo_slab_create_mutex); | ||
2461 | if (!cachep) { | ||
2462 | printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n"); | ||
2463 | return -ENOMEM; | ||
2464 | } | ||
2465 | |||
2466 | ext4_groupinfo_caches[cache_index] = cachep; | ||
2467 | |||
2468 | return 0; | ||
2469 | } | ||
2470 | |||
2417 | int ext4_mb_init(struct super_block *sb, int needs_recovery) | 2471 | int ext4_mb_init(struct super_block *sb, int needs_recovery) |
2418 | { | 2472 | { |
2419 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2473 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
@@ -2421,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2421 | unsigned offset; | 2475 | unsigned offset; |
2422 | unsigned max; | 2476 | unsigned max; |
2423 | int ret; | 2477 | int ret; |
2424 | int cache_index; | ||
2425 | struct kmem_cache *cachep; | ||
2426 | char *namep = NULL; | ||
2427 | 2478 | ||
2428 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); | 2479 | i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); |
2429 | 2480 | ||
@@ -2440,30 +2491,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2440 | goto out; | 2491 | goto out; |
2441 | } | 2492 | } |
2442 | 2493 | ||
2443 | cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; | 2494 | ret = ext4_groupinfo_create_slab(sb->s_blocksize); |
2444 | cachep = ext4_groupinfo_caches[cache_index]; | 2495 | if (ret < 0) |
2445 | if (!cachep) { | 2496 | goto out; |
2446 | char name[32]; | ||
2447 | int len = offsetof(struct ext4_group_info, | ||
2448 | bb_counters[sb->s_blocksize_bits + 2]); | ||
2449 | |||
2450 | sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits); | ||
2451 | namep = kstrdup(name, GFP_KERNEL); | ||
2452 | if (!namep) { | ||
2453 | ret = -ENOMEM; | ||
2454 | goto out; | ||
2455 | } | ||
2456 | |||
2457 | /* Need to free the kmem_cache_name() when we | ||
2458 | * destroy the slab */ | ||
2459 | cachep = kmem_cache_create(namep, len, 0, | ||
2460 | SLAB_RECLAIM_ACCOUNT, NULL); | ||
2461 | if (!cachep) { | ||
2462 | ret = -ENOMEM; | ||
2463 | goto out; | ||
2464 | } | ||
2465 | ext4_groupinfo_caches[cache_index] = cachep; | ||
2466 | } | ||
2467 | 2497 | ||
2468 | /* order 0 is regular bitmap */ | 2498 | /* order 0 is regular bitmap */ |
2469 | sbi->s_mb_maxs[0] = sb->s_blocksize << 3; | 2499 | sbi->s_mb_maxs[0] = sb->s_blocksize << 3; |
@@ -2520,7 +2550,6 @@ out: | |||
2520 | if (ret) { | 2550 | if (ret) { |
2521 | kfree(sbi->s_mb_offsets); | 2551 | kfree(sbi->s_mb_offsets); |
2522 | kfree(sbi->s_mb_maxs); | 2552 | kfree(sbi->s_mb_maxs); |
2523 | kfree(namep); | ||
2524 | } | 2553 | } |
2525 | return ret; | 2554 | return ret; |
2526 | } | 2555 | } |
@@ -2734,7 +2763,6 @@ int __init ext4_init_mballoc(void) | |||
2734 | 2763 | ||
2735 | void ext4_exit_mballoc(void) | 2764 | void ext4_exit_mballoc(void) |
2736 | { | 2765 | { |
2737 | int i; | ||
2738 | /* | 2766 | /* |
2739 | * Wait for completion of call_rcu()'s on ext4_pspace_cachep | 2767 | * Wait for completion of call_rcu()'s on ext4_pspace_cachep |
2740 | * before destroying the slab cache. | 2768 | * before destroying the slab cache. |
@@ -2743,15 +2771,7 @@ void ext4_exit_mballoc(void) | |||
2743 | kmem_cache_destroy(ext4_pspace_cachep); | 2771 | kmem_cache_destroy(ext4_pspace_cachep); |
2744 | kmem_cache_destroy(ext4_ac_cachep); | 2772 | kmem_cache_destroy(ext4_ac_cachep); |
2745 | kmem_cache_destroy(ext4_free_ext_cachep); | 2773 | kmem_cache_destroy(ext4_free_ext_cachep); |
2746 | 2774 | ext4_groupinfo_destroy_slabs(); | |
2747 | for (i = 0; i < NR_GRPINFO_CACHES; i++) { | ||
2748 | struct kmem_cache *cachep = ext4_groupinfo_caches[i]; | ||
2749 | if (cachep) { | ||
2750 | char *name = (char *)kmem_cache_name(cachep); | ||
2751 | kmem_cache_destroy(cachep); | ||
2752 | kfree(name); | ||
2753 | } | ||
2754 | } | ||
2755 | ext4_remove_debugfs_entry(); | 2775 | ext4_remove_debugfs_entry(); |
2756 | } | 2776 | } |
2757 | 2777 | ||
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 7270dcfca92a..955cc309142f 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -32,14 +32,8 @@ | |||
32 | 32 | ||
33 | static struct kmem_cache *io_page_cachep, *io_end_cachep; | 33 | static struct kmem_cache *io_page_cachep, *io_end_cachep; |
34 | 34 | ||
35 | #define WQ_HASH_SZ 37 | ||
36 | #define to_ioend_wq(v) (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ]) | ||
37 | static wait_queue_head_t ioend_wq[WQ_HASH_SZ]; | ||
38 | |||
39 | int __init ext4_init_pageio(void) | 35 | int __init ext4_init_pageio(void) |
40 | { | 36 | { |
41 | int i; | ||
42 | |||
43 | io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT); | 37 | io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT); |
44 | if (io_page_cachep == NULL) | 38 | if (io_page_cachep == NULL) |
45 | return -ENOMEM; | 39 | return -ENOMEM; |
@@ -48,9 +42,6 @@ int __init ext4_init_pageio(void) | |||
48 | kmem_cache_destroy(io_page_cachep); | 42 | kmem_cache_destroy(io_page_cachep); |
49 | return -ENOMEM; | 43 | return -ENOMEM; |
50 | } | 44 | } |
51 | for (i = 0; i < WQ_HASH_SZ; i++) | ||
52 | init_waitqueue_head(&ioend_wq[i]); | ||
53 | |||
54 | return 0; | 45 | return 0; |
55 | } | 46 | } |
56 | 47 | ||
@@ -62,7 +53,7 @@ void ext4_exit_pageio(void) | |||
62 | 53 | ||
63 | void ext4_ioend_wait(struct inode *inode) | 54 | void ext4_ioend_wait(struct inode *inode) |
64 | { | 55 | { |
65 | wait_queue_head_t *wq = to_ioend_wq(inode); | 56 | wait_queue_head_t *wq = ext4_ioend_wq(inode); |
66 | 57 | ||
67 | wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); | 58 | wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); |
68 | } | 59 | } |
@@ -87,7 +78,7 @@ void ext4_free_io_end(ext4_io_end_t *io) | |||
87 | for (i = 0; i < io->num_io_pages; i++) | 78 | for (i = 0; i < io->num_io_pages; i++) |
88 | put_io_page(io->pages[i]); | 79 | put_io_page(io->pages[i]); |
89 | io->num_io_pages = 0; | 80 | io->num_io_pages = 0; |
90 | wq = to_ioend_wq(io->inode); | 81 | wq = ext4_ioend_wq(io->inode); |
91 | if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) && | 82 | if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) && |
92 | waitqueue_active(wq)) | 83 | waitqueue_active(wq)) |
93 | wake_up_all(wq); | 84 | wake_up_all(wq); |
@@ -102,6 +93,7 @@ int ext4_end_io_nolock(ext4_io_end_t *io) | |||
102 | struct inode *inode = io->inode; | 93 | struct inode *inode = io->inode; |
103 | loff_t offset = io->offset; | 94 | loff_t offset = io->offset; |
104 | ssize_t size = io->size; | 95 | ssize_t size = io->size; |
96 | wait_queue_head_t *wq; | ||
105 | int ret = 0; | 97 | int ret = 0; |
106 | 98 | ||
107 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," | 99 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," |
@@ -126,7 +118,16 @@ int ext4_end_io_nolock(ext4_io_end_t *io) | |||
126 | if (io->iocb) | 118 | if (io->iocb) |
127 | aio_complete(io->iocb, io->result, 0); | 119 | aio_complete(io->iocb, io->result, 0); |
128 | /* clear the DIO AIO unwritten flag */ | 120 | /* clear the DIO AIO unwritten flag */ |
129 | io->flag &= ~EXT4_IO_END_UNWRITTEN; | 121 | if (io->flag & EXT4_IO_END_UNWRITTEN) { |
122 | io->flag &= ~EXT4_IO_END_UNWRITTEN; | ||
123 | /* Wake up anyone waiting on unwritten extent conversion */ | ||
124 | wq = ext4_ioend_wq(io->inode); | ||
125 | if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) && | ||
126 | waitqueue_active(wq)) { | ||
127 | wake_up_all(wq); | ||
128 | } | ||
129 | } | ||
130 | |||
130 | return ret; | 131 | return ret; |
131 | } | 132 | } |
132 | 133 | ||
@@ -190,6 +191,7 @@ static void ext4_end_bio(struct bio *bio, int error) | |||
190 | struct inode *inode; | 191 | struct inode *inode; |
191 | unsigned long flags; | 192 | unsigned long flags; |
192 | int i; | 193 | int i; |
194 | sector_t bi_sector = bio->bi_sector; | ||
193 | 195 | ||
194 | BUG_ON(!io_end); | 196 | BUG_ON(!io_end); |
195 | bio->bi_private = NULL; | 197 | bio->bi_private = NULL; |
@@ -207,9 +209,7 @@ static void ext4_end_bio(struct bio *bio, int error) | |||
207 | if (error) | 209 | if (error) |
208 | SetPageError(page); | 210 | SetPageError(page); |
209 | BUG_ON(!head); | 211 | BUG_ON(!head); |
210 | if (head->b_size == PAGE_CACHE_SIZE) | 212 | if (head->b_size != PAGE_CACHE_SIZE) { |
211 | clear_buffer_dirty(head); | ||
212 | else { | ||
213 | loff_t offset; | 213 | loff_t offset; |
214 | loff_t io_end_offset = io_end->offset + io_end->size; | 214 | loff_t io_end_offset = io_end->offset + io_end->size; |
215 | 215 | ||
@@ -221,7 +221,6 @@ static void ext4_end_bio(struct bio *bio, int error) | |||
221 | if (error) | 221 | if (error) |
222 | buffer_io_error(bh); | 222 | buffer_io_error(bh); |
223 | 223 | ||
224 | clear_buffer_dirty(bh); | ||
225 | } | 224 | } |
226 | if (buffer_delay(bh)) | 225 | if (buffer_delay(bh)) |
227 | partial_write = 1; | 226 | partial_write = 1; |
@@ -257,7 +256,7 @@ static void ext4_end_bio(struct bio *bio, int error) | |||
257 | (unsigned long long) io_end->offset, | 256 | (unsigned long long) io_end->offset, |
258 | (long) io_end->size, | 257 | (long) io_end->size, |
259 | (unsigned long long) | 258 | (unsigned long long) |
260 | bio->bi_sector >> (inode->i_blkbits - 9)); | 259 | bi_sector >> (inode->i_blkbits - 9)); |
261 | } | 260 | } |
262 | 261 | ||
263 | /* Add the io_end to per-inode completed io list*/ | 262 | /* Add the io_end to per-inode completed io list*/ |
@@ -380,6 +379,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
380 | 379 | ||
381 | blocksize = 1 << inode->i_blkbits; | 380 | blocksize = 1 << inode->i_blkbits; |
382 | 381 | ||
382 | BUG_ON(!PageLocked(page)); | ||
383 | BUG_ON(PageWriteback(page)); | 383 | BUG_ON(PageWriteback(page)); |
384 | set_page_writeback(page); | 384 | set_page_writeback(page); |
385 | ClearPageError(page); | 385 | ClearPageError(page); |
@@ -397,12 +397,14 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
397 | for (bh = head = page_buffers(page), block_start = 0; | 397 | for (bh = head = page_buffers(page), block_start = 0; |
398 | bh != head || !block_start; | 398 | bh != head || !block_start; |
399 | block_start = block_end, bh = bh->b_this_page) { | 399 | block_start = block_end, bh = bh->b_this_page) { |
400 | |||
400 | block_end = block_start + blocksize; | 401 | block_end = block_start + blocksize; |
401 | if (block_start >= len) { | 402 | if (block_start >= len) { |
402 | clear_buffer_dirty(bh); | 403 | clear_buffer_dirty(bh); |
403 | set_buffer_uptodate(bh); | 404 | set_buffer_uptodate(bh); |
404 | continue; | 405 | continue; |
405 | } | 406 | } |
407 | clear_buffer_dirty(bh); | ||
406 | ret = io_submit_add_bh(io, io_page, inode, wbc, bh); | 408 | ret = io_submit_add_bh(io, io_page, inode, wbc, bh); |
407 | if (ret) { | 409 | if (ret) { |
408 | /* | 410 | /* |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cb10a06775e4..f6a318f836b2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -77,6 +77,7 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, | |||
77 | const char *dev_name, void *data); | 77 | const char *dev_name, void *data); |
78 | static void ext4_destroy_lazyinit_thread(void); | 78 | static void ext4_destroy_lazyinit_thread(void); |
79 | static void ext4_unregister_li_request(struct super_block *sb); | 79 | static void ext4_unregister_li_request(struct super_block *sb); |
80 | static void ext4_clear_request_list(void); | ||
80 | 81 | ||
81 | #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) | 82 | #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) |
82 | static struct file_system_type ext3_fs_type = { | 83 | static struct file_system_type ext3_fs_type = { |
@@ -832,6 +833,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
832 | ei->i_sync_tid = 0; | 833 | ei->i_sync_tid = 0; |
833 | ei->i_datasync_tid = 0; | 834 | ei->i_datasync_tid = 0; |
834 | atomic_set(&ei->i_ioend_count, 0); | 835 | atomic_set(&ei->i_ioend_count, 0); |
836 | atomic_set(&ei->i_aiodio_unwritten, 0); | ||
835 | 837 | ||
836 | return &ei->vfs_inode; | 838 | return &ei->vfs_inode; |
837 | } | 839 | } |
@@ -1161,7 +1163,7 @@ static int ext4_release_dquot(struct dquot *dquot); | |||
1161 | static int ext4_mark_dquot_dirty(struct dquot *dquot); | 1163 | static int ext4_mark_dquot_dirty(struct dquot *dquot); |
1162 | static int ext4_write_info(struct super_block *sb, int type); | 1164 | static int ext4_write_info(struct super_block *sb, int type); |
1163 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, | 1165 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, |
1164 | char *path); | 1166 | struct path *path); |
1165 | static int ext4_quota_off(struct super_block *sb, int type); | 1167 | static int ext4_quota_off(struct super_block *sb, int type); |
1166 | static int ext4_quota_on_mount(struct super_block *sb, int type); | 1168 | static int ext4_quota_on_mount(struct super_block *sb, int type); |
1167 | static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, | 1169 | static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, |
@@ -2716,6 +2718,8 @@ static void ext4_unregister_li_request(struct super_block *sb) | |||
2716 | mutex_unlock(&ext4_li_info->li_list_mtx); | 2718 | mutex_unlock(&ext4_li_info->li_list_mtx); |
2717 | } | 2719 | } |
2718 | 2720 | ||
2721 | static struct task_struct *ext4_lazyinit_task; | ||
2722 | |||
2719 | /* | 2723 | /* |
2720 | * This is the function where ext4lazyinit thread lives. It walks | 2724 | * This is the function where ext4lazyinit thread lives. It walks |
2721 | * through the request list searching for next scheduled filesystem. | 2725 | * through the request list searching for next scheduled filesystem. |
@@ -2784,6 +2788,10 @@ cont_thread: | |||
2784 | if (time_before(jiffies, next_wakeup)) | 2788 | if (time_before(jiffies, next_wakeup)) |
2785 | schedule(); | 2789 | schedule(); |
2786 | finish_wait(&eli->li_wait_daemon, &wait); | 2790 | finish_wait(&eli->li_wait_daemon, &wait); |
2791 | if (kthread_should_stop()) { | ||
2792 | ext4_clear_request_list(); | ||
2793 | goto exit_thread; | ||
2794 | } | ||
2787 | } | 2795 | } |
2788 | 2796 | ||
2789 | exit_thread: | 2797 | exit_thread: |
@@ -2808,6 +2816,7 @@ exit_thread: | |||
2808 | wake_up(&eli->li_wait_task); | 2816 | wake_up(&eli->li_wait_task); |
2809 | 2817 | ||
2810 | kfree(ext4_li_info); | 2818 | kfree(ext4_li_info); |
2819 | ext4_lazyinit_task = NULL; | ||
2811 | ext4_li_info = NULL; | 2820 | ext4_li_info = NULL; |
2812 | mutex_unlock(&ext4_li_mtx); | 2821 | mutex_unlock(&ext4_li_mtx); |
2813 | 2822 | ||
@@ -2830,11 +2839,10 @@ static void ext4_clear_request_list(void) | |||
2830 | 2839 | ||
2831 | static int ext4_run_lazyinit_thread(void) | 2840 | static int ext4_run_lazyinit_thread(void) |
2832 | { | 2841 | { |
2833 | struct task_struct *t; | 2842 | ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread, |
2834 | 2843 | ext4_li_info, "ext4lazyinit"); | |
2835 | t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit"); | 2844 | if (IS_ERR(ext4_lazyinit_task)) { |
2836 | if (IS_ERR(t)) { | 2845 | int err = PTR_ERR(ext4_lazyinit_task); |
2837 | int err = PTR_ERR(t); | ||
2838 | ext4_clear_request_list(); | 2846 | ext4_clear_request_list(); |
2839 | del_timer_sync(&ext4_li_info->li_timer); | 2847 | del_timer_sync(&ext4_li_info->li_timer); |
2840 | kfree(ext4_li_info); | 2848 | kfree(ext4_li_info); |
@@ -2985,16 +2993,10 @@ static void ext4_destroy_lazyinit_thread(void) | |||
2985 | * If thread exited earlier | 2993 | * If thread exited earlier |
2986 | * there's nothing to be done. | 2994 | * there's nothing to be done. |
2987 | */ | 2995 | */ |
2988 | if (!ext4_li_info) | 2996 | if (!ext4_li_info || !ext4_lazyinit_task) |
2989 | return; | 2997 | return; |
2990 | 2998 | ||
2991 | ext4_clear_request_list(); | 2999 | kthread_stop(ext4_lazyinit_task); |
2992 | |||
2993 | while (ext4_li_info->li_task) { | ||
2994 | wake_up(&ext4_li_info->li_wait_daemon); | ||
2995 | wait_event(ext4_li_info->li_wait_task, | ||
2996 | ext4_li_info->li_task == NULL); | ||
2997 | } | ||
2998 | } | 3000 | } |
2999 | 3001 | ||
3000 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) | 3002 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) |
@@ -4558,27 +4560,20 @@ static int ext4_quota_on_mount(struct super_block *sb, int type) | |||
4558 | * Standard function to be called on quota_on | 4560 | * Standard function to be called on quota_on |
4559 | */ | 4561 | */ |
4560 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, | 4562 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, |
4561 | char *name) | 4563 | struct path *path) |
4562 | { | 4564 | { |
4563 | int err; | 4565 | int err; |
4564 | struct path path; | ||
4565 | 4566 | ||
4566 | if (!test_opt(sb, QUOTA)) | 4567 | if (!test_opt(sb, QUOTA)) |
4567 | return -EINVAL; | 4568 | return -EINVAL; |
4568 | 4569 | ||
4569 | err = kern_path(name, LOOKUP_FOLLOW, &path); | ||
4570 | if (err) | ||
4571 | return err; | ||
4572 | |||
4573 | /* Quotafile not on the same filesystem? */ | 4570 | /* Quotafile not on the same filesystem? */ |
4574 | if (path.mnt->mnt_sb != sb) { | 4571 | if (path->mnt->mnt_sb != sb) |
4575 | path_put(&path); | ||
4576 | return -EXDEV; | 4572 | return -EXDEV; |
4577 | } | ||
4578 | /* Journaling quota? */ | 4573 | /* Journaling quota? */ |
4579 | if (EXT4_SB(sb)->s_qf_names[type]) { | 4574 | if (EXT4_SB(sb)->s_qf_names[type]) { |
4580 | /* Quotafile not in fs root? */ | 4575 | /* Quotafile not in fs root? */ |
4581 | if (path.dentry->d_parent != sb->s_root) | 4576 | if (path->dentry->d_parent != sb->s_root) |
4582 | ext4_msg(sb, KERN_WARNING, | 4577 | ext4_msg(sb, KERN_WARNING, |
4583 | "Quota file not on filesystem root. " | 4578 | "Quota file not on filesystem root. " |
4584 | "Journaled quota will not work"); | 4579 | "Journaled quota will not work"); |
@@ -4589,7 +4584,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, | |||
4589 | * all updates to the file when we bypass pagecache... | 4584 | * all updates to the file when we bypass pagecache... |
4590 | */ | 4585 | */ |
4591 | if (EXT4_SB(sb)->s_journal && | 4586 | if (EXT4_SB(sb)->s_journal && |
4592 | ext4_should_journal_data(path.dentry->d_inode)) { | 4587 | ext4_should_journal_data(path->dentry->d_inode)) { |
4593 | /* | 4588 | /* |
4594 | * We don't need to lock updates but journal_flush() could | 4589 | * We don't need to lock updates but journal_flush() could |
4595 | * otherwise be livelocked... | 4590 | * otherwise be livelocked... |
@@ -4597,15 +4592,11 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, | |||
4597 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); | 4592 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); |
4598 | err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); | 4593 | err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); |
4599 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 4594 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); |
4600 | if (err) { | 4595 | if (err) |
4601 | path_put(&path); | ||
4602 | return err; | 4596 | return err; |
4603 | } | ||
4604 | } | 4597 | } |
4605 | 4598 | ||
4606 | err = dquot_quota_on_path(sb, type, format_id, &path); | 4599 | return dquot_quota_on(sb, type, format_id, path); |
4607 | path_put(&path); | ||
4608 | return err; | ||
4609 | } | 4600 | } |
4610 | 4601 | ||
4611 | static int ext4_quota_off(struct super_block *sb, int type) | 4602 | static int ext4_quota_off(struct super_block *sb, int type) |
@@ -4779,7 +4770,7 @@ static struct file_system_type ext4_fs_type = { | |||
4779 | .fs_flags = FS_REQUIRES_DEV, | 4770 | .fs_flags = FS_REQUIRES_DEV, |
4780 | }; | 4771 | }; |
4781 | 4772 | ||
4782 | int __init ext4_init_feat_adverts(void) | 4773 | static int __init ext4_init_feat_adverts(void) |
4783 | { | 4774 | { |
4784 | struct ext4_features *ef; | 4775 | struct ext4_features *ef; |
4785 | int ret = -ENOMEM; | 4776 | int ret = -ENOMEM; |
@@ -4803,23 +4794,44 @@ out: | |||
4803 | return ret; | 4794 | return ret; |
4804 | } | 4795 | } |
4805 | 4796 | ||
4797 | static void ext4_exit_feat_adverts(void) | ||
4798 | { | ||
4799 | kobject_put(&ext4_feat->f_kobj); | ||
4800 | wait_for_completion(&ext4_feat->f_kobj_unregister); | ||
4801 | kfree(ext4_feat); | ||
4802 | } | ||
4803 | |||
4804 | /* Shared across all ext4 file systems */ | ||
4805 | wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; | ||
4806 | struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; | ||
4807 | |||
4806 | static int __init ext4_init_fs(void) | 4808 | static int __init ext4_init_fs(void) |
4807 | { | 4809 | { |
4808 | int err; | 4810 | int i, err; |
4809 | 4811 | ||
4810 | ext4_check_flag_values(); | 4812 | ext4_check_flag_values(); |
4813 | |||
4814 | for (i = 0; i < EXT4_WQ_HASH_SZ; i++) { | ||
4815 | mutex_init(&ext4__aio_mutex[i]); | ||
4816 | init_waitqueue_head(&ext4__ioend_wq[i]); | ||
4817 | } | ||
4818 | |||
4811 | err = ext4_init_pageio(); | 4819 | err = ext4_init_pageio(); |
4812 | if (err) | 4820 | if (err) |
4813 | return err; | 4821 | return err; |
4814 | err = ext4_init_system_zone(); | 4822 | err = ext4_init_system_zone(); |
4815 | if (err) | 4823 | if (err) |
4816 | goto out5; | 4824 | goto out7; |
4817 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); | 4825 | ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); |
4818 | if (!ext4_kset) | 4826 | if (!ext4_kset) |
4819 | goto out4; | 4827 | goto out6; |
4820 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); | 4828 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); |
4829 | if (!ext4_proc_root) | ||
4830 | goto out5; | ||
4821 | 4831 | ||
4822 | err = ext4_init_feat_adverts(); | 4832 | err = ext4_init_feat_adverts(); |
4833 | if (err) | ||
4834 | goto out4; | ||
4823 | 4835 | ||
4824 | err = ext4_init_mballoc(); | 4836 | err = ext4_init_mballoc(); |
4825 | if (err) | 4837 | if (err) |
@@ -4849,12 +4861,14 @@ out1: | |||
4849 | out2: | 4861 | out2: |
4850 | ext4_exit_mballoc(); | 4862 | ext4_exit_mballoc(); |
4851 | out3: | 4863 | out3: |
4852 | kfree(ext4_feat); | 4864 | ext4_exit_feat_adverts(); |
4865 | out4: | ||
4853 | remove_proc_entry("fs/ext4", NULL); | 4866 | remove_proc_entry("fs/ext4", NULL); |
4867 | out5: | ||
4854 | kset_unregister(ext4_kset); | 4868 | kset_unregister(ext4_kset); |
4855 | out4: | 4869 | out6: |
4856 | ext4_exit_system_zone(); | 4870 | ext4_exit_system_zone(); |
4857 | out5: | 4871 | out7: |
4858 | ext4_exit_pageio(); | 4872 | ext4_exit_pageio(); |
4859 | return err; | 4873 | return err; |
4860 | } | 4874 | } |
@@ -4868,6 +4882,7 @@ static void __exit ext4_exit_fs(void) | |||
4868 | destroy_inodecache(); | 4882 | destroy_inodecache(); |
4869 | ext4_exit_xattr(); | 4883 | ext4_exit_xattr(); |
4870 | ext4_exit_mballoc(); | 4884 | ext4_exit_mballoc(); |
4885 | ext4_exit_feat_adverts(); | ||
4871 | remove_proc_entry("fs/ext4", NULL); | 4886 | remove_proc_entry("fs/ext4", NULL); |
4872 | kset_unregister(ext4_kset); | 4887 | kset_unregister(ext4_kset); |
4873 | ext4_exit_system_zone(); | 4888 | ext4_exit_system_zone(); |
diff --git a/fs/fcntl.c b/fs/fcntl.c index ecc8b3954ed6..cb1026181bdc 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -815,7 +815,7 @@ static int __init fcntl_init(void) | |||
815 | __O_SYNC | O_DSYNC | FASYNC | | 815 | __O_SYNC | O_DSYNC | FASYNC | |
816 | O_DIRECT | O_LARGEFILE | O_DIRECTORY | | 816 | O_DIRECT | O_LARGEFILE | O_DIRECTORY | |
817 | O_NOFOLLOW | O_NOATIME | O_CLOEXEC | | 817 | O_NOFOLLOW | O_NOATIME | O_CLOEXEC | |
818 | FMODE_EXEC | 818 | __FMODE_EXEC |
819 | )); | 819 | )); |
820 | 820 | ||
821 | fasync_cache = kmem_cache_create("fasync_cache", | 821 | fasync_cache = kmem_cache_create("fasync_cache", |
diff --git a/fs/file_table.c b/fs/file_table.c index c3dee381f1b4..eb36b6b17e26 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -125,13 +125,13 @@ struct file *get_empty_filp(void) | |||
125 | goto fail; | 125 | goto fail; |
126 | 126 | ||
127 | percpu_counter_inc(&nr_files); | 127 | percpu_counter_inc(&nr_files); |
128 | f->f_cred = get_cred(cred); | ||
128 | if (security_file_alloc(f)) | 129 | if (security_file_alloc(f)) |
129 | goto fail_sec; | 130 | goto fail_sec; |
130 | 131 | ||
131 | INIT_LIST_HEAD(&f->f_u.fu_list); | 132 | INIT_LIST_HEAD(&f->f_u.fu_list); |
132 | atomic_long_set(&f->f_count, 1); | 133 | atomic_long_set(&f->f_count, 1); |
133 | rwlock_init(&f->f_owner.lock); | 134 | rwlock_init(&f->f_owner.lock); |
134 | f->f_cred = get_cred(cred); | ||
135 | spin_lock_init(&f->f_lock); | 135 | spin_lock_init(&f->f_lock); |
136 | eventpoll_init_file(f); | 136 | eventpoll_init_file(f); |
137 | /* f->f_version: 0 */ | 137 | /* f->f_version: 0 */ |
@@ -311,7 +311,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed) | |||
311 | struct files_struct *files = current->files; | 311 | struct files_struct *files = current->files; |
312 | 312 | ||
313 | *fput_needed = 0; | 313 | *fput_needed = 0; |
314 | if (likely((atomic_read(&files->count) == 1))) { | 314 | if (atomic_read(&files->count) == 1) { |
315 | file = fcheck_files(files, fd); | 315 | file = fcheck_files(files, fd); |
316 | } else { | 316 | } else { |
317 | rcu_read_lock(); | 317 | rcu_read_lock(); |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 3d06ccc953aa..59c6e4956786 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -84,13 +84,9 @@ static inline struct inode *wb_inode(struct list_head *head) | |||
84 | return list_entry(head, struct inode, i_wb_list); | 84 | return list_entry(head, struct inode, i_wb_list); |
85 | } | 85 | } |
86 | 86 | ||
87 | static void bdi_queue_work(struct backing_dev_info *bdi, | 87 | /* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */ |
88 | struct wb_writeback_work *work) | 88 | static void bdi_wakeup_flusher(struct backing_dev_info *bdi) |
89 | { | 89 | { |
90 | trace_writeback_queue(bdi, work); | ||
91 | |||
92 | spin_lock_bh(&bdi->wb_lock); | ||
93 | list_add_tail(&work->list, &bdi->work_list); | ||
94 | if (bdi->wb.task) { | 90 | if (bdi->wb.task) { |
95 | wake_up_process(bdi->wb.task); | 91 | wake_up_process(bdi->wb.task); |
96 | } else { | 92 | } else { |
@@ -98,15 +94,26 @@ static void bdi_queue_work(struct backing_dev_info *bdi, | |||
98 | * The bdi thread isn't there, wake up the forker thread which | 94 | * The bdi thread isn't there, wake up the forker thread which |
99 | * will create and run it. | 95 | * will create and run it. |
100 | */ | 96 | */ |
101 | trace_writeback_nothread(bdi, work); | ||
102 | wake_up_process(default_backing_dev_info.wb.task); | 97 | wake_up_process(default_backing_dev_info.wb.task); |
103 | } | 98 | } |
99 | } | ||
100 | |||
101 | static void bdi_queue_work(struct backing_dev_info *bdi, | ||
102 | struct wb_writeback_work *work) | ||
103 | { | ||
104 | trace_writeback_queue(bdi, work); | ||
105 | |||
106 | spin_lock_bh(&bdi->wb_lock); | ||
107 | list_add_tail(&work->list, &bdi->work_list); | ||
108 | if (!bdi->wb.task) | ||
109 | trace_writeback_nothread(bdi, work); | ||
110 | bdi_wakeup_flusher(bdi); | ||
104 | spin_unlock_bh(&bdi->wb_lock); | 111 | spin_unlock_bh(&bdi->wb_lock); |
105 | } | 112 | } |
106 | 113 | ||
107 | static void | 114 | static void |
108 | __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, | 115 | __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, |
109 | bool range_cyclic, bool for_background) | 116 | bool range_cyclic) |
110 | { | 117 | { |
111 | struct wb_writeback_work *work; | 118 | struct wb_writeback_work *work; |
112 | 119 | ||
@@ -126,7 +133,6 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, | |||
126 | work->sync_mode = WB_SYNC_NONE; | 133 | work->sync_mode = WB_SYNC_NONE; |
127 | work->nr_pages = nr_pages; | 134 | work->nr_pages = nr_pages; |
128 | work->range_cyclic = range_cyclic; | 135 | work->range_cyclic = range_cyclic; |
129 | work->for_background = for_background; | ||
130 | 136 | ||
131 | bdi_queue_work(bdi, work); | 137 | bdi_queue_work(bdi, work); |
132 | } | 138 | } |
@@ -144,7 +150,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, | |||
144 | */ | 150 | */ |
145 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) | 151 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) |
146 | { | 152 | { |
147 | __bdi_start_writeback(bdi, nr_pages, true, false); | 153 | __bdi_start_writeback(bdi, nr_pages, true); |
148 | } | 154 | } |
149 | 155 | ||
150 | /** | 156 | /** |
@@ -152,13 +158,21 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) | |||
152 | * @bdi: the backing device to write from | 158 | * @bdi: the backing device to write from |
153 | * | 159 | * |
154 | * Description: | 160 | * Description: |
155 | * This does WB_SYNC_NONE background writeback. The IO is only | 161 | * This makes sure WB_SYNC_NONE background writeback happens. When |
156 | * started when this function returns, we make no guarentees on | 162 | * this function returns, it is only guaranteed that for given BDI |
157 | * completion. Caller need not hold sb s_umount semaphore. | 163 | * some IO is happening if we are over background dirty threshold. |
164 | * Caller need not hold sb s_umount semaphore. | ||
158 | */ | 165 | */ |
159 | void bdi_start_background_writeback(struct backing_dev_info *bdi) | 166 | void bdi_start_background_writeback(struct backing_dev_info *bdi) |
160 | { | 167 | { |
161 | __bdi_start_writeback(bdi, LONG_MAX, true, true); | 168 | /* |
169 | * We just wake up the flusher thread. It will perform background | ||
170 | * writeback as soon as there is no other work to do. | ||
171 | */ | ||
172 | trace_writeback_wake_background(bdi); | ||
173 | spin_lock_bh(&bdi->wb_lock); | ||
174 | bdi_wakeup_flusher(bdi); | ||
175 | spin_unlock_bh(&bdi->wb_lock); | ||
162 | } | 176 | } |
163 | 177 | ||
164 | /* | 178 | /* |
@@ -616,6 +630,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
616 | }; | 630 | }; |
617 | unsigned long oldest_jif; | 631 | unsigned long oldest_jif; |
618 | long wrote = 0; | 632 | long wrote = 0; |
633 | long write_chunk; | ||
619 | struct inode *inode; | 634 | struct inode *inode; |
620 | 635 | ||
621 | if (wbc.for_kupdate) { | 636 | if (wbc.for_kupdate) { |
@@ -628,6 +643,24 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
628 | wbc.range_end = LLONG_MAX; | 643 | wbc.range_end = LLONG_MAX; |
629 | } | 644 | } |
630 | 645 | ||
646 | /* | ||
647 | * WB_SYNC_ALL mode does livelock avoidance by syncing dirty | ||
648 | * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX | ||
649 | * here avoids calling into writeback_inodes_wb() more than once. | ||
650 | * | ||
651 | * The intended call sequence for WB_SYNC_ALL writeback is: | ||
652 | * | ||
653 | * wb_writeback() | ||
654 | * __writeback_inodes_sb() <== called only once | ||
655 | * write_cache_pages() <== called once for each inode | ||
656 | * (quickly) tag currently dirty pages | ||
657 | * (maybe slowly) sync all tagged pages | ||
658 | */ | ||
659 | if (wbc.sync_mode == WB_SYNC_NONE) | ||
660 | write_chunk = MAX_WRITEBACK_PAGES; | ||
661 | else | ||
662 | write_chunk = LONG_MAX; | ||
663 | |||
631 | wbc.wb_start = jiffies; /* livelock avoidance */ | 664 | wbc.wb_start = jiffies; /* livelock avoidance */ |
632 | for (;;) { | 665 | for (;;) { |
633 | /* | 666 | /* |
@@ -637,6 +670,16 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
637 | break; | 670 | break; |
638 | 671 | ||
639 | /* | 672 | /* |
673 | * Background writeout and kupdate-style writeback may | ||
674 | * run forever. Stop them if there is other work to do | ||
675 | * so that e.g. sync can proceed. They'll be restarted | ||
676 | * after the other works are all done. | ||
677 | */ | ||
678 | if ((work->for_background || work->for_kupdate) && | ||
679 | !list_empty(&wb->bdi->work_list)) | ||
680 | break; | ||
681 | |||
682 | /* | ||
640 | * For background writeout, stop when we are below the | 683 | * For background writeout, stop when we are below the |
641 | * background dirty threshold | 684 | * background dirty threshold |
642 | */ | 685 | */ |
@@ -644,7 +687,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
644 | break; | 687 | break; |
645 | 688 | ||
646 | wbc.more_io = 0; | 689 | wbc.more_io = 0; |
647 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | 690 | wbc.nr_to_write = write_chunk; |
648 | wbc.pages_skipped = 0; | 691 | wbc.pages_skipped = 0; |
649 | 692 | ||
650 | trace_wbc_writeback_start(&wbc, wb->bdi); | 693 | trace_wbc_writeback_start(&wbc, wb->bdi); |
@@ -654,8 +697,8 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
654 | writeback_inodes_wb(wb, &wbc); | 697 | writeback_inodes_wb(wb, &wbc); |
655 | trace_wbc_writeback_written(&wbc, wb->bdi); | 698 | trace_wbc_writeback_written(&wbc, wb->bdi); |
656 | 699 | ||
657 | work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 700 | work->nr_pages -= write_chunk - wbc.nr_to_write; |
658 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 701 | wrote += write_chunk - wbc.nr_to_write; |
659 | 702 | ||
660 | /* | 703 | /* |
661 | * If we consumed everything, see if we have more | 704 | * If we consumed everything, see if we have more |
@@ -670,7 +713,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
670 | /* | 713 | /* |
671 | * Did we write something? Try for more | 714 | * Did we write something? Try for more |
672 | */ | 715 | */ |
673 | if (wbc.nr_to_write < MAX_WRITEBACK_PAGES) | 716 | if (wbc.nr_to_write < write_chunk) |
674 | continue; | 717 | continue; |
675 | /* | 718 | /* |
676 | * Nothing written. Wait for some inode to | 719 | * Nothing written. Wait for some inode to |
@@ -718,6 +761,23 @@ static unsigned long get_nr_dirty_pages(void) | |||
718 | get_nr_dirty_inodes(); | 761 | get_nr_dirty_inodes(); |
719 | } | 762 | } |
720 | 763 | ||
764 | static long wb_check_background_flush(struct bdi_writeback *wb) | ||
765 | { | ||
766 | if (over_bground_thresh()) { | ||
767 | |||
768 | struct wb_writeback_work work = { | ||
769 | .nr_pages = LONG_MAX, | ||
770 | .sync_mode = WB_SYNC_NONE, | ||
771 | .for_background = 1, | ||
772 | .range_cyclic = 1, | ||
773 | }; | ||
774 | |||
775 | return wb_writeback(wb, &work); | ||
776 | } | ||
777 | |||
778 | return 0; | ||
779 | } | ||
780 | |||
721 | static long wb_check_old_data_flush(struct bdi_writeback *wb) | 781 | static long wb_check_old_data_flush(struct bdi_writeback *wb) |
722 | { | 782 | { |
723 | unsigned long expired; | 783 | unsigned long expired; |
@@ -787,6 +847,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
787 | * Check for periodic writeback, kupdated() style | 847 | * Check for periodic writeback, kupdated() style |
788 | */ | 848 | */ |
789 | wrote += wb_check_old_data_flush(wb); | 849 | wrote += wb_check_old_data_flush(wb); |
850 | wrote += wb_check_background_flush(wb); | ||
790 | clear_bit(BDI_writeback_running, &wb->bdi->state); | 851 | clear_bit(BDI_writeback_running, &wb->bdi->state); |
791 | 852 | ||
792 | return wrote; | 853 | return wrote; |
@@ -873,7 +934,7 @@ void wakeup_flusher_threads(long nr_pages) | |||
873 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { | 934 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { |
874 | if (!bdi_has_dirty_io(bdi)) | 935 | if (!bdi_has_dirty_io(bdi)) |
875 | continue; | 936 | continue; |
876 | __bdi_start_writeback(bdi, nr_pages, false, false); | 937 | __bdi_start_writeback(bdi, nr_pages, false); |
877 | } | 938 | } |
878 | rcu_read_unlock(); | 939 | rcu_read_unlock(); |
879 | } | 940 | } |
@@ -1164,7 +1225,7 @@ EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle); | |||
1164 | * @sb: the superblock | 1225 | * @sb: the superblock |
1165 | * | 1226 | * |
1166 | * This function writes and waits on any dirty inode belonging to this | 1227 | * This function writes and waits on any dirty inode belonging to this |
1167 | * super_block. The number of pages synced is returned. | 1228 | * super_block. |
1168 | */ | 1229 | */ |
1169 | void sync_inodes_sb(struct super_block *sb) | 1230 | void sync_inodes_sb(struct super_block *sb) |
1170 | { | 1231 | { |
@@ -1242,11 +1303,11 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc) | |||
1242 | EXPORT_SYMBOL(sync_inode); | 1303 | EXPORT_SYMBOL(sync_inode); |
1243 | 1304 | ||
1244 | /** | 1305 | /** |
1245 | * sync_inode - write an inode to disk | 1306 | * sync_inode_metadata - write an inode to disk |
1246 | * @inode: the inode to sync | 1307 | * @inode: the inode to sync |
1247 | * @wait: wait for I/O to complete. | 1308 | * @wait: wait for I/O to complete. |
1248 | * | 1309 | * |
1249 | * Write an inode to disk and adjust it's dirty state after completion. | 1310 | * Write an inode to disk and adjust its dirty state after completion. |
1250 | * | 1311 | * |
1251 | * Note: only writes the actual inode, no associated data or other metadata. | 1312 | * Note: only writes the actual inode, no associated data or other metadata. |
1252 | */ | 1313 | */ |
diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 68ca487bedb1..78b519c13536 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c | |||
@@ -4,6 +4,19 @@ | |||
4 | #include <linux/path.h> | 4 | #include <linux/path.h> |
5 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
6 | #include <linux/fs_struct.h> | 6 | #include <linux/fs_struct.h> |
7 | #include "internal.h" | ||
8 | |||
9 | static inline void path_get_longterm(struct path *path) | ||
10 | { | ||
11 | path_get(path); | ||
12 | mnt_make_longterm(path->mnt); | ||
13 | } | ||
14 | |||
15 | static inline void path_put_longterm(struct path *path) | ||
16 | { | ||
17 | mnt_make_shortterm(path->mnt); | ||
18 | path_put(path); | ||
19 | } | ||
7 | 20 | ||
8 | /* | 21 | /* |
9 | * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. | 22 | * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. |
@@ -17,11 +30,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path) | |||
17 | write_seqcount_begin(&fs->seq); | 30 | write_seqcount_begin(&fs->seq); |
18 | old_root = fs->root; | 31 | old_root = fs->root; |
19 | fs->root = *path; | 32 | fs->root = *path; |
20 | path_get_long(path); | 33 | path_get_longterm(path); |
21 | write_seqcount_end(&fs->seq); | 34 | write_seqcount_end(&fs->seq); |
22 | spin_unlock(&fs->lock); | 35 | spin_unlock(&fs->lock); |
23 | if (old_root.dentry) | 36 | if (old_root.dentry) |
24 | path_put_long(&old_root); | 37 | path_put_longterm(&old_root); |
25 | } | 38 | } |
26 | 39 | ||
27 | /* | 40 | /* |
@@ -36,12 +49,12 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) | |||
36 | write_seqcount_begin(&fs->seq); | 49 | write_seqcount_begin(&fs->seq); |
37 | old_pwd = fs->pwd; | 50 | old_pwd = fs->pwd; |
38 | fs->pwd = *path; | 51 | fs->pwd = *path; |
39 | path_get_long(path); | 52 | path_get_longterm(path); |
40 | write_seqcount_end(&fs->seq); | 53 | write_seqcount_end(&fs->seq); |
41 | spin_unlock(&fs->lock); | 54 | spin_unlock(&fs->lock); |
42 | 55 | ||
43 | if (old_pwd.dentry) | 56 | if (old_pwd.dentry) |
44 | path_put_long(&old_pwd); | 57 | path_put_longterm(&old_pwd); |
45 | } | 58 | } |
46 | 59 | ||
47 | void chroot_fs_refs(struct path *old_root, struct path *new_root) | 60 | void chroot_fs_refs(struct path *old_root, struct path *new_root) |
@@ -59,13 +72,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) | |||
59 | write_seqcount_begin(&fs->seq); | 72 | write_seqcount_begin(&fs->seq); |
60 | if (fs->root.dentry == old_root->dentry | 73 | if (fs->root.dentry == old_root->dentry |
61 | && fs->root.mnt == old_root->mnt) { | 74 | && fs->root.mnt == old_root->mnt) { |
62 | path_get_long(new_root); | 75 | path_get_longterm(new_root); |
63 | fs->root = *new_root; | 76 | fs->root = *new_root; |
64 | count++; | 77 | count++; |
65 | } | 78 | } |
66 | if (fs->pwd.dentry == old_root->dentry | 79 | if (fs->pwd.dentry == old_root->dentry |
67 | && fs->pwd.mnt == old_root->mnt) { | 80 | && fs->pwd.mnt == old_root->mnt) { |
68 | path_get_long(new_root); | 81 | path_get_longterm(new_root); |
69 | fs->pwd = *new_root; | 82 | fs->pwd = *new_root; |
70 | count++; | 83 | count++; |
71 | } | 84 | } |
@@ -76,13 +89,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) | |||
76 | } while_each_thread(g, p); | 89 | } while_each_thread(g, p); |
77 | read_unlock(&tasklist_lock); | 90 | read_unlock(&tasklist_lock); |
78 | while (count--) | 91 | while (count--) |
79 | path_put_long(old_root); | 92 | path_put_longterm(old_root); |
80 | } | 93 | } |
81 | 94 | ||
82 | void free_fs_struct(struct fs_struct *fs) | 95 | void free_fs_struct(struct fs_struct *fs) |
83 | { | 96 | { |
84 | path_put_long(&fs->root); | 97 | path_put_longterm(&fs->root); |
85 | path_put_long(&fs->pwd); | 98 | path_put_longterm(&fs->pwd); |
86 | kmem_cache_free(fs_cachep, fs); | 99 | kmem_cache_free(fs_cachep, fs); |
87 | } | 100 | } |
88 | 101 | ||
@@ -118,9 +131,9 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) | |||
118 | 131 | ||
119 | spin_lock(&old->lock); | 132 | spin_lock(&old->lock); |
120 | fs->root = old->root; | 133 | fs->root = old->root; |
121 | path_get_long(&fs->root); | 134 | path_get_longterm(&fs->root); |
122 | fs->pwd = old->pwd; | 135 | fs->pwd = old->pwd; |
123 | path_get_long(&fs->pwd); | 136 | path_get_longterm(&fs->pwd); |
124 | spin_unlock(&old->lock); | 137 | spin_unlock(&old->lock); |
125 | } | 138 | } |
126 | return fs; | 139 | return fs; |
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index b9f34eaede09..48a18f184d50 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c | |||
@@ -101,7 +101,7 @@ int fscache_submit_exclusive_op(struct fscache_object *object, | |||
101 | object->n_ops++; | 101 | object->n_ops++; |
102 | object->n_exclusive++; /* reads and writes must wait */ | 102 | object->n_exclusive++; /* reads and writes must wait */ |
103 | 103 | ||
104 | if (object->n_ops > 0) { | 104 | if (object->n_ops > 1) { |
105 | atomic_inc(&op->usage); | 105 | atomic_inc(&op->usage); |
106 | list_add_tail(&op->pend_link, &object->pending_ops); | 106 | list_add_tail(&op->pend_link, &object->pending_ops); |
107 | fscache_stat(&fscache_n_op_pend); | 107 | fscache_stat(&fscache_n_op_pend); |
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index fca6689e12e6..7cfdcb913363 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
@@ -19,6 +19,8 @@ | |||
19 | #include <linux/fs.h> | 19 | #include <linux/fs.h> |
20 | #include <linux/gfs2_ondisk.h> | 20 | #include <linux/gfs2_ondisk.h> |
21 | #include <linux/ext2_fs.h> | 21 | #include <linux/ext2_fs.h> |
22 | #include <linux/falloc.h> | ||
23 | #include <linux/swap.h> | ||
22 | #include <linux/crc32.h> | 24 | #include <linux/crc32.h> |
23 | #include <linux/writeback.h> | 25 | #include <linux/writeback.h> |
24 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
@@ -610,6 +612,260 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
610 | return generic_file_aio_write(iocb, iov, nr_segs, pos); | 612 | return generic_file_aio_write(iocb, iov, nr_segs, pos); |
611 | } | 613 | } |
612 | 614 | ||
615 | static void empty_write_end(struct page *page, unsigned from, | ||
616 | unsigned to) | ||
617 | { | ||
618 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); | ||
619 | |||
620 | page_zero_new_buffers(page, from, to); | ||
621 | flush_dcache_page(page); | ||
622 | mark_page_accessed(page); | ||
623 | |||
624 | if (!gfs2_is_writeback(ip)) | ||
625 | gfs2_page_add_databufs(ip, page, from, to); | ||
626 | |||
627 | block_commit_write(page, from, to); | ||
628 | } | ||
629 | |||
630 | static int write_empty_blocks(struct page *page, unsigned from, unsigned to) | ||
631 | { | ||
632 | unsigned start, end, next; | ||
633 | struct buffer_head *bh, *head; | ||
634 | int error; | ||
635 | |||
636 | if (!page_has_buffers(page)) { | ||
637 | error = __block_write_begin(page, from, to - from, gfs2_block_map); | ||
638 | if (unlikely(error)) | ||
639 | return error; | ||
640 | |||
641 | empty_write_end(page, from, to); | ||
642 | return 0; | ||
643 | } | ||
644 | |||
645 | bh = head = page_buffers(page); | ||
646 | next = end = 0; | ||
647 | while (next < from) { | ||
648 | next += bh->b_size; | ||
649 | bh = bh->b_this_page; | ||
650 | } | ||
651 | start = next; | ||
652 | do { | ||
653 | next += bh->b_size; | ||
654 | if (buffer_mapped(bh)) { | ||
655 | if (end) { | ||
656 | error = __block_write_begin(page, start, end - start, | ||
657 | gfs2_block_map); | ||
658 | if (unlikely(error)) | ||
659 | return error; | ||
660 | empty_write_end(page, start, end); | ||
661 | end = 0; | ||
662 | } | ||
663 | start = next; | ||
664 | } | ||
665 | else | ||
666 | end = next; | ||
667 | bh = bh->b_this_page; | ||
668 | } while (next < to); | ||
669 | |||
670 | if (end) { | ||
671 | error = __block_write_begin(page, start, end - start, gfs2_block_map); | ||
672 | if (unlikely(error)) | ||
673 | return error; | ||
674 | empty_write_end(page, start, end); | ||
675 | } | ||
676 | |||
677 | return 0; | ||
678 | } | ||
679 | |||
680 | static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, | ||
681 | int mode) | ||
682 | { | ||
683 | struct gfs2_inode *ip = GFS2_I(inode); | ||
684 | struct buffer_head *dibh; | ||
685 | int error; | ||
686 | u64 start = offset >> PAGE_CACHE_SHIFT; | ||
687 | unsigned int start_offset = offset & ~PAGE_CACHE_MASK; | ||
688 | u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT; | ||
689 | pgoff_t curr; | ||
690 | struct page *page; | ||
691 | unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK; | ||
692 | unsigned int from, to; | ||
693 | |||
694 | if (!end_offset) | ||
695 | end_offset = PAGE_CACHE_SIZE; | ||
696 | |||
697 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
698 | if (unlikely(error)) | ||
699 | goto out; | ||
700 | |||
701 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
702 | |||
703 | if (gfs2_is_stuffed(ip)) { | ||
704 | error = gfs2_unstuff_dinode(ip, NULL); | ||
705 | if (unlikely(error)) | ||
706 | goto out; | ||
707 | } | ||
708 | |||
709 | curr = start; | ||
710 | offset = start << PAGE_CACHE_SHIFT; | ||
711 | from = start_offset; | ||
712 | to = PAGE_CACHE_SIZE; | ||
713 | while (curr <= end) { | ||
714 | page = grab_cache_page_write_begin(inode->i_mapping, curr, | ||
715 | AOP_FLAG_NOFS); | ||
716 | if (unlikely(!page)) { | ||
717 | error = -ENOMEM; | ||
718 | goto out; | ||
719 | } | ||
720 | |||
721 | if (curr == end) | ||
722 | to = end_offset; | ||
723 | error = write_empty_blocks(page, from, to); | ||
724 | if (!error && offset + to > inode->i_size && | ||
725 | !(mode & FALLOC_FL_KEEP_SIZE)) { | ||
726 | i_size_write(inode, offset + to); | ||
727 | } | ||
728 | unlock_page(page); | ||
729 | page_cache_release(page); | ||
730 | if (error) | ||
731 | goto out; | ||
732 | curr++; | ||
733 | offset += PAGE_CACHE_SIZE; | ||
734 | from = 0; | ||
735 | } | ||
736 | |||
737 | gfs2_dinode_out(ip, dibh->b_data); | ||
738 | mark_inode_dirty(inode); | ||
739 | |||
740 | brelse(dibh); | ||
741 | |||
742 | out: | ||
743 | return error; | ||
744 | } | ||
745 | |||
746 | static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, | ||
747 | unsigned int *data_blocks, unsigned int *ind_blocks) | ||
748 | { | ||
749 | const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
750 | unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone; | ||
751 | unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); | ||
752 | |||
753 | for (tmp = max_data; tmp > sdp->sd_diptrs;) { | ||
754 | tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); | ||
755 | max_data -= tmp; | ||
756 | } | ||
757 | /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, | ||
758 | so it might end up with fewer data blocks */ | ||
759 | if (max_data <= *data_blocks) | ||
760 | return; | ||
761 | *data_blocks = max_data; | ||
762 | *ind_blocks = max_blocks - max_data; | ||
763 | *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; | ||
764 | if (*len > max) { | ||
765 | *len = max; | ||
766 | gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks); | ||
767 | } | ||
768 | } | ||
769 | |||
770 | static long gfs2_fallocate(struct file *file, int mode, loff_t offset, | ||
771 | loff_t len) | ||
772 | { | ||
773 | struct inode *inode = file->f_path.dentry->d_inode; | ||
774 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
775 | struct gfs2_inode *ip = GFS2_I(inode); | ||
776 | unsigned int data_blocks = 0, ind_blocks = 0, rblocks; | ||
777 | loff_t bytes, max_bytes; | ||
778 | struct gfs2_alloc *al; | ||
779 | int error; | ||
780 | loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; | ||
781 | next = (next + 1) << sdp->sd_sb.sb_bsize_shift; | ||
782 | |||
783 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ | ||
784 | if (mode & ~FALLOC_FL_KEEP_SIZE) | ||
785 | return -EOPNOTSUPP; | ||
786 | |||
787 | offset = (offset >> sdp->sd_sb.sb_bsize_shift) << | ||
788 | sdp->sd_sb.sb_bsize_shift; | ||
789 | |||
790 | len = next - offset; | ||
791 | bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; | ||
792 | if (!bytes) | ||
793 | bytes = UINT_MAX; | ||
794 | |||
795 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); | ||
796 | error = gfs2_glock_nq(&ip->i_gh); | ||
797 | if (unlikely(error)) | ||
798 | goto out_uninit; | ||
799 | |||
800 | if (!gfs2_write_alloc_required(ip, offset, len)) | ||
801 | goto out_unlock; | ||
802 | |||
803 | while (len > 0) { | ||
804 | if (len < bytes) | ||
805 | bytes = len; | ||
806 | al = gfs2_alloc_get(ip); | ||
807 | if (!al) { | ||
808 | error = -ENOMEM; | ||
809 | goto out_unlock; | ||
810 | } | ||
811 | |||
812 | error = gfs2_quota_lock_check(ip); | ||
813 | if (error) | ||
814 | goto out_alloc_put; | ||
815 | |||
816 | retry: | ||
817 | gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); | ||
818 | |||
819 | al->al_requested = data_blocks + ind_blocks; | ||
820 | error = gfs2_inplace_reserve(ip); | ||
821 | if (error) { | ||
822 | if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { | ||
823 | bytes >>= 1; | ||
824 | goto retry; | ||
825 | } | ||
826 | goto out_qunlock; | ||
827 | } | ||
828 | max_bytes = bytes; | ||
829 | calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks); | ||
830 | al->al_requested = data_blocks + ind_blocks; | ||
831 | |||
832 | rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + | ||
833 | RES_RG_HDR + gfs2_rg_blocks(al); | ||
834 | if (gfs2_is_jdata(ip)) | ||
835 | rblocks += data_blocks ? data_blocks : 1; | ||
836 | |||
837 | error = gfs2_trans_begin(sdp, rblocks, | ||
838 | PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); | ||
839 | if (error) | ||
840 | goto out_trans_fail; | ||
841 | |||
842 | error = fallocate_chunk(inode, offset, max_bytes, mode); | ||
843 | gfs2_trans_end(sdp); | ||
844 | |||
845 | if (error) | ||
846 | goto out_trans_fail; | ||
847 | |||
848 | len -= max_bytes; | ||
849 | offset += max_bytes; | ||
850 | gfs2_inplace_release(ip); | ||
851 | gfs2_quota_unlock(ip); | ||
852 | gfs2_alloc_put(ip); | ||
853 | } | ||
854 | goto out_unlock; | ||
855 | |||
856 | out_trans_fail: | ||
857 | gfs2_inplace_release(ip); | ||
858 | out_qunlock: | ||
859 | gfs2_quota_unlock(ip); | ||
860 | out_alloc_put: | ||
861 | gfs2_alloc_put(ip); | ||
862 | out_unlock: | ||
863 | gfs2_glock_dq(&ip->i_gh); | ||
864 | out_uninit: | ||
865 | gfs2_holder_uninit(&ip->i_gh); | ||
866 | return error; | ||
867 | } | ||
868 | |||
613 | #ifdef CONFIG_GFS2_FS_LOCKING_DLM | 869 | #ifdef CONFIG_GFS2_FS_LOCKING_DLM |
614 | 870 | ||
615 | /** | 871 | /** |
@@ -765,6 +1021,7 @@ const struct file_operations gfs2_file_fops = { | |||
765 | .splice_read = generic_file_splice_read, | 1021 | .splice_read = generic_file_splice_read, |
766 | .splice_write = generic_file_splice_write, | 1022 | .splice_write = generic_file_splice_write, |
767 | .setlease = gfs2_setlease, | 1023 | .setlease = gfs2_setlease, |
1024 | .fallocate = gfs2_fallocate, | ||
768 | }; | 1025 | }; |
769 | 1026 | ||
770 | const struct file_operations gfs2_dir_fops = { | 1027 | const struct file_operations gfs2_dir_fops = { |
@@ -794,6 +1051,7 @@ const struct file_operations gfs2_file_fops_nolock = { | |||
794 | .splice_read = generic_file_splice_read, | 1051 | .splice_read = generic_file_splice_read, |
795 | .splice_write = generic_file_splice_write, | 1052 | .splice_write = generic_file_splice_write, |
796 | .setlease = generic_setlease, | 1053 | .setlease = generic_setlease, |
1054 | .fallocate = gfs2_fallocate, | ||
797 | }; | 1055 | }; |
798 | 1056 | ||
799 | const struct file_operations gfs2_dir_fops_nolock = { | 1057 | const struct file_operations gfs2_dir_fops_nolock = { |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 2232b3c780bd..7aa7d4f8984a 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -74,16 +74,14 @@ static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) | |||
74 | } | 74 | } |
75 | 75 | ||
76 | /** | 76 | /** |
77 | * GFS2 lookup code fills in vfs inode contents based on info obtained | 77 | * gfs2_set_iop - Sets inode operations |
78 | * from directory entry inside gfs2_inode_lookup(). This has caused issues | 78 | * @inode: The inode with correct i_mode filled in |
79 | * with NFS code path since its get_dentry routine doesn't have the relevant | ||
80 | * directory entry when gfs2_inode_lookup() is invoked. Part of the code | ||
81 | * segment inside gfs2_inode_lookup code needs to get moved around. | ||
82 | * | 79 | * |
83 | * Clears I_NEW as well. | 80 | * GFS2 lookup code fills in vfs inode contents based on info obtained |
84 | **/ | 81 | * from directory entry inside gfs2_inode_lookup(). |
82 | */ | ||
85 | 83 | ||
86 | void gfs2_set_iop(struct inode *inode) | 84 | static void gfs2_set_iop(struct inode *inode) |
87 | { | 85 | { |
88 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 86 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
89 | umode_t mode = inode->i_mode; | 87 | umode_t mode = inode->i_mode; |
@@ -106,8 +104,6 @@ void gfs2_set_iop(struct inode *inode) | |||
106 | inode->i_op = &gfs2_file_iops; | 104 | inode->i_op = &gfs2_file_iops; |
107 | init_special_inode(inode, inode->i_mode, inode->i_rdev); | 105 | init_special_inode(inode, inode->i_mode, inode->i_rdev); |
108 | } | 106 | } |
109 | |||
110 | unlock_new_inode(inode); | ||
111 | } | 107 | } |
112 | 108 | ||
113 | /** | 109 | /** |
@@ -119,10 +115,8 @@ void gfs2_set_iop(struct inode *inode) | |||
119 | * Returns: A VFS inode, or an error | 115 | * Returns: A VFS inode, or an error |
120 | */ | 116 | */ |
121 | 117 | ||
122 | struct inode *gfs2_inode_lookup(struct super_block *sb, | 118 | struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, |
123 | unsigned int type, | 119 | u64 no_addr, u64 no_formal_ino) |
124 | u64 no_addr, | ||
125 | u64 no_formal_ino) | ||
126 | { | 120 | { |
127 | struct inode *inode; | 121 | struct inode *inode; |
128 | struct gfs2_inode *ip; | 122 | struct gfs2_inode *ip; |
@@ -152,51 +146,37 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, | |||
152 | error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); | 146 | error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); |
153 | if (unlikely(error)) | 147 | if (unlikely(error)) |
154 | goto fail_iopen; | 148 | goto fail_iopen; |
155 | ip->i_iopen_gh.gh_gl->gl_object = ip; | ||
156 | 149 | ||
150 | ip->i_iopen_gh.gh_gl->gl_object = ip; | ||
157 | gfs2_glock_put(io_gl); | 151 | gfs2_glock_put(io_gl); |
158 | io_gl = NULL; | 152 | io_gl = NULL; |
159 | 153 | ||
160 | if ((type == DT_UNKNOWN) && (no_formal_ino == 0)) | ||
161 | goto gfs2_nfsbypass; | ||
162 | |||
163 | inode->i_mode = DT2IF(type); | ||
164 | |||
165 | /* | ||
166 | * We must read the inode in order to work out its type in | ||
167 | * this case. Note that this doesn't happen often as we normally | ||
168 | * know the type beforehand. This code path only occurs during | ||
169 | * unlinked inode recovery (where it is safe to do this glock, | ||
170 | * which is not true in the general case). | ||
171 | */ | ||
172 | if (type == DT_UNKNOWN) { | 154 | if (type == DT_UNKNOWN) { |
173 | struct gfs2_holder gh; | 155 | /* Inode glock must be locked already */ |
174 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | 156 | error = gfs2_inode_refresh(GFS2_I(inode)); |
175 | if (unlikely(error)) | 157 | if (error) |
176 | goto fail_glock; | 158 | goto fail_refresh; |
177 | /* Inode is now uptodate */ | 159 | } else { |
178 | gfs2_glock_dq_uninit(&gh); | 160 | inode->i_mode = DT2IF(type); |
179 | } | 161 | } |
180 | 162 | ||
181 | gfs2_set_iop(inode); | 163 | gfs2_set_iop(inode); |
164 | unlock_new_inode(inode); | ||
182 | } | 165 | } |
183 | 166 | ||
184 | gfs2_nfsbypass: | ||
185 | return inode; | 167 | return inode; |
186 | fail_glock: | 168 | |
187 | gfs2_glock_dq(&ip->i_iopen_gh); | 169 | fail_refresh: |
170 | ip->i_iopen_gh.gh_gl->gl_object = NULL; | ||
171 | gfs2_glock_dq_uninit(&ip->i_iopen_gh); | ||
188 | fail_iopen: | 172 | fail_iopen: |
189 | if (io_gl) | 173 | if (io_gl) |
190 | gfs2_glock_put(io_gl); | 174 | gfs2_glock_put(io_gl); |
191 | fail_put: | 175 | fail_put: |
192 | if (inode->i_state & I_NEW) | 176 | ip->i_gl->gl_object = NULL; |
193 | ip->i_gl->gl_object = NULL; | ||
194 | gfs2_glock_put(ip->i_gl); | 177 | gfs2_glock_put(ip->i_gl); |
195 | fail: | 178 | fail: |
196 | if (inode->i_state & I_NEW) | 179 | iget_failed(inode); |
197 | iget_failed(inode); | ||
198 | else | ||
199 | iput(inode); | ||
200 | return ERR_PTR(error); | 180 | return ERR_PTR(error); |
201 | } | 181 | } |
202 | 182 | ||
@@ -221,14 +201,6 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, | |||
221 | if (IS_ERR(inode)) | 201 | if (IS_ERR(inode)) |
222 | goto fail; | 202 | goto fail; |
223 | 203 | ||
224 | error = gfs2_inode_refresh(GFS2_I(inode)); | ||
225 | if (error) | ||
226 | goto fail_iput; | ||
227 | |||
228 | /* Pick up the works we bypass in gfs2_inode_lookup */ | ||
229 | if (inode->i_state & I_NEW) | ||
230 | gfs2_set_iop(inode); | ||
231 | |||
232 | /* Two extra checks for NFS only */ | 204 | /* Two extra checks for NFS only */ |
233 | if (no_formal_ino) { | 205 | if (no_formal_ino) { |
234 | error = -ESTALE; | 206 | error = -ESTALE; |
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 732a183efdb3..3e00a66e7cbd 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
@@ -96,7 +96,6 @@ err: | |||
96 | return -EIO; | 96 | return -EIO; |
97 | } | 97 | } |
98 | 98 | ||
99 | extern void gfs2_set_iop(struct inode *inode); | ||
100 | extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, | 99 | extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, |
101 | u64 no_addr, u64 no_formal_ino); | 100 | u64 no_addr, u64 no_formal_ino); |
102 | extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, | 101 | extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, |
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 040b5a2e6556..d8b26ac2e20b 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
@@ -18,8 +18,6 @@ | |||
18 | #include <linux/gfs2_ondisk.h> | 18 | #include <linux/gfs2_ondisk.h> |
19 | #include <linux/crc32.h> | 19 | #include <linux/crc32.h> |
20 | #include <linux/fiemap.h> | 20 | #include <linux/fiemap.h> |
21 | #include <linux/swap.h> | ||
22 | #include <linux/falloc.h> | ||
23 | #include <asm/uaccess.h> | 21 | #include <asm/uaccess.h> |
24 | 22 | ||
25 | #include "gfs2.h" | 23 | #include "gfs2.h" |
@@ -1257,261 +1255,6 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name) | |||
1257 | return ret; | 1255 | return ret; |
1258 | } | 1256 | } |
1259 | 1257 | ||
1260 | static void empty_write_end(struct page *page, unsigned from, | ||
1261 | unsigned to) | ||
1262 | { | ||
1263 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); | ||
1264 | |||
1265 | page_zero_new_buffers(page, from, to); | ||
1266 | flush_dcache_page(page); | ||
1267 | mark_page_accessed(page); | ||
1268 | |||
1269 | if (!gfs2_is_writeback(ip)) | ||
1270 | gfs2_page_add_databufs(ip, page, from, to); | ||
1271 | |||
1272 | block_commit_write(page, from, to); | ||
1273 | } | ||
1274 | |||
1275 | |||
1276 | static int write_empty_blocks(struct page *page, unsigned from, unsigned to) | ||
1277 | { | ||
1278 | unsigned start, end, next; | ||
1279 | struct buffer_head *bh, *head; | ||
1280 | int error; | ||
1281 | |||
1282 | if (!page_has_buffers(page)) { | ||
1283 | error = __block_write_begin(page, from, to - from, gfs2_block_map); | ||
1284 | if (unlikely(error)) | ||
1285 | return error; | ||
1286 | |||
1287 | empty_write_end(page, from, to); | ||
1288 | return 0; | ||
1289 | } | ||
1290 | |||
1291 | bh = head = page_buffers(page); | ||
1292 | next = end = 0; | ||
1293 | while (next < from) { | ||
1294 | next += bh->b_size; | ||
1295 | bh = bh->b_this_page; | ||
1296 | } | ||
1297 | start = next; | ||
1298 | do { | ||
1299 | next += bh->b_size; | ||
1300 | if (buffer_mapped(bh)) { | ||
1301 | if (end) { | ||
1302 | error = __block_write_begin(page, start, end - start, | ||
1303 | gfs2_block_map); | ||
1304 | if (unlikely(error)) | ||
1305 | return error; | ||
1306 | empty_write_end(page, start, end); | ||
1307 | end = 0; | ||
1308 | } | ||
1309 | start = next; | ||
1310 | } | ||
1311 | else | ||
1312 | end = next; | ||
1313 | bh = bh->b_this_page; | ||
1314 | } while (next < to); | ||
1315 | |||
1316 | if (end) { | ||
1317 | error = __block_write_begin(page, start, end - start, gfs2_block_map); | ||
1318 | if (unlikely(error)) | ||
1319 | return error; | ||
1320 | empty_write_end(page, start, end); | ||
1321 | } | ||
1322 | |||
1323 | return 0; | ||
1324 | } | ||
1325 | |||
1326 | static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, | ||
1327 | int mode) | ||
1328 | { | ||
1329 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1330 | struct buffer_head *dibh; | ||
1331 | int error; | ||
1332 | u64 start = offset >> PAGE_CACHE_SHIFT; | ||
1333 | unsigned int start_offset = offset & ~PAGE_CACHE_MASK; | ||
1334 | u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT; | ||
1335 | pgoff_t curr; | ||
1336 | struct page *page; | ||
1337 | unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK; | ||
1338 | unsigned int from, to; | ||
1339 | |||
1340 | if (!end_offset) | ||
1341 | end_offset = PAGE_CACHE_SIZE; | ||
1342 | |||
1343 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
1344 | if (unlikely(error)) | ||
1345 | goto out; | ||
1346 | |||
1347 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
1348 | |||
1349 | if (gfs2_is_stuffed(ip)) { | ||
1350 | error = gfs2_unstuff_dinode(ip, NULL); | ||
1351 | if (unlikely(error)) | ||
1352 | goto out; | ||
1353 | } | ||
1354 | |||
1355 | curr = start; | ||
1356 | offset = start << PAGE_CACHE_SHIFT; | ||
1357 | from = start_offset; | ||
1358 | to = PAGE_CACHE_SIZE; | ||
1359 | while (curr <= end) { | ||
1360 | page = grab_cache_page_write_begin(inode->i_mapping, curr, | ||
1361 | AOP_FLAG_NOFS); | ||
1362 | if (unlikely(!page)) { | ||
1363 | error = -ENOMEM; | ||
1364 | goto out; | ||
1365 | } | ||
1366 | |||
1367 | if (curr == end) | ||
1368 | to = end_offset; | ||
1369 | error = write_empty_blocks(page, from, to); | ||
1370 | if (!error && offset + to > inode->i_size && | ||
1371 | !(mode & FALLOC_FL_KEEP_SIZE)) { | ||
1372 | i_size_write(inode, offset + to); | ||
1373 | } | ||
1374 | unlock_page(page); | ||
1375 | page_cache_release(page); | ||
1376 | if (error) | ||
1377 | goto out; | ||
1378 | curr++; | ||
1379 | offset += PAGE_CACHE_SIZE; | ||
1380 | from = 0; | ||
1381 | } | ||
1382 | |||
1383 | gfs2_dinode_out(ip, dibh->b_data); | ||
1384 | mark_inode_dirty(inode); | ||
1385 | |||
1386 | brelse(dibh); | ||
1387 | |||
1388 | out: | ||
1389 | return error; | ||
1390 | } | ||
1391 | |||
1392 | static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, | ||
1393 | unsigned int *data_blocks, unsigned int *ind_blocks) | ||
1394 | { | ||
1395 | const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
1396 | unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone; | ||
1397 | unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); | ||
1398 | |||
1399 | for (tmp = max_data; tmp > sdp->sd_diptrs;) { | ||
1400 | tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); | ||
1401 | max_data -= tmp; | ||
1402 | } | ||
1403 | /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, | ||
1404 | so it might end up with fewer data blocks */ | ||
1405 | if (max_data <= *data_blocks) | ||
1406 | return; | ||
1407 | *data_blocks = max_data; | ||
1408 | *ind_blocks = max_blocks - max_data; | ||
1409 | *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; | ||
1410 | if (*len > max) { | ||
1411 | *len = max; | ||
1412 | gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks); | ||
1413 | } | ||
1414 | } | ||
1415 | |||
1416 | static long gfs2_fallocate(struct inode *inode, int mode, loff_t offset, | ||
1417 | loff_t len) | ||
1418 | { | ||
1419 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
1420 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1421 | unsigned int data_blocks = 0, ind_blocks = 0, rblocks; | ||
1422 | loff_t bytes, max_bytes; | ||
1423 | struct gfs2_alloc *al; | ||
1424 | int error; | ||
1425 | loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; | ||
1426 | next = (next + 1) << sdp->sd_sb.sb_bsize_shift; | ||
1427 | |||
1428 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ | ||
1429 | if (mode && (mode != FALLOC_FL_KEEP_SIZE)) | ||
1430 | return -EOPNOTSUPP; | ||
1431 | |||
1432 | offset = (offset >> sdp->sd_sb.sb_bsize_shift) << | ||
1433 | sdp->sd_sb.sb_bsize_shift; | ||
1434 | |||
1435 | len = next - offset; | ||
1436 | bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; | ||
1437 | if (!bytes) | ||
1438 | bytes = UINT_MAX; | ||
1439 | |||
1440 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); | ||
1441 | error = gfs2_glock_nq(&ip->i_gh); | ||
1442 | if (unlikely(error)) | ||
1443 | goto out_uninit; | ||
1444 | |||
1445 | if (!gfs2_write_alloc_required(ip, offset, len)) | ||
1446 | goto out_unlock; | ||
1447 | |||
1448 | while (len > 0) { | ||
1449 | if (len < bytes) | ||
1450 | bytes = len; | ||
1451 | al = gfs2_alloc_get(ip); | ||
1452 | if (!al) { | ||
1453 | error = -ENOMEM; | ||
1454 | goto out_unlock; | ||
1455 | } | ||
1456 | |||
1457 | error = gfs2_quota_lock_check(ip); | ||
1458 | if (error) | ||
1459 | goto out_alloc_put; | ||
1460 | |||
1461 | retry: | ||
1462 | gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); | ||
1463 | |||
1464 | al->al_requested = data_blocks + ind_blocks; | ||
1465 | error = gfs2_inplace_reserve(ip); | ||
1466 | if (error) { | ||
1467 | if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { | ||
1468 | bytes >>= 1; | ||
1469 | goto retry; | ||
1470 | } | ||
1471 | goto out_qunlock; | ||
1472 | } | ||
1473 | max_bytes = bytes; | ||
1474 | calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks); | ||
1475 | al->al_requested = data_blocks + ind_blocks; | ||
1476 | |||
1477 | rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + | ||
1478 | RES_RG_HDR + gfs2_rg_blocks(al); | ||
1479 | if (gfs2_is_jdata(ip)) | ||
1480 | rblocks += data_blocks ? data_blocks : 1; | ||
1481 | |||
1482 | error = gfs2_trans_begin(sdp, rblocks, | ||
1483 | PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); | ||
1484 | if (error) | ||
1485 | goto out_trans_fail; | ||
1486 | |||
1487 | error = fallocate_chunk(inode, offset, max_bytes, mode); | ||
1488 | gfs2_trans_end(sdp); | ||
1489 | |||
1490 | if (error) | ||
1491 | goto out_trans_fail; | ||
1492 | |||
1493 | len -= max_bytes; | ||
1494 | offset += max_bytes; | ||
1495 | gfs2_inplace_release(ip); | ||
1496 | gfs2_quota_unlock(ip); | ||
1497 | gfs2_alloc_put(ip); | ||
1498 | } | ||
1499 | goto out_unlock; | ||
1500 | |||
1501 | out_trans_fail: | ||
1502 | gfs2_inplace_release(ip); | ||
1503 | out_qunlock: | ||
1504 | gfs2_quota_unlock(ip); | ||
1505 | out_alloc_put: | ||
1506 | gfs2_alloc_put(ip); | ||
1507 | out_unlock: | ||
1508 | gfs2_glock_dq(&ip->i_gh); | ||
1509 | out_uninit: | ||
1510 | gfs2_holder_uninit(&ip->i_gh); | ||
1511 | return error; | ||
1512 | } | ||
1513 | |||
1514 | |||
1515 | static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 1258 | static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
1516 | u64 start, u64 len) | 1259 | u64 start, u64 len) |
1517 | { | 1260 | { |
@@ -1562,7 +1305,6 @@ const struct inode_operations gfs2_file_iops = { | |||
1562 | .getxattr = gfs2_getxattr, | 1305 | .getxattr = gfs2_getxattr, |
1563 | .listxattr = gfs2_listxattr, | 1306 | .listxattr = gfs2_listxattr, |
1564 | .removexattr = gfs2_removexattr, | 1307 | .removexattr = gfs2_removexattr, |
1565 | .fallocate = gfs2_fallocate, | ||
1566 | .fiemap = gfs2_fiemap, | 1308 | .fiemap = gfs2_fiemap, |
1567 | }; | 1309 | }; |
1568 | 1310 | ||
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 16c2ecac7eb7..ec73ed70bae1 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -1336,6 +1336,7 @@ static void gfs2_evict_inode(struct inode *inode) | |||
1336 | if (error) | 1336 | if (error) |
1337 | goto out_truncate; | 1337 | goto out_truncate; |
1338 | 1338 | ||
1339 | ip->i_iopen_gh.gh_flags |= GL_NOCACHE; | ||
1339 | gfs2_glock_dq_wait(&ip->i_iopen_gh); | 1340 | gfs2_glock_dq_wait(&ip->i_iopen_gh); |
1340 | gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); | 1341 | gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); |
1341 | error = gfs2_glock_nq(&ip->i_iopen_gh); | 1342 | error = gfs2_glock_nq(&ip->i_iopen_gh); |
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index 52a0bcaa7b6d..b1991a2a08e0 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c | |||
@@ -397,8 +397,8 @@ int hfsplus_file_extend(struct inode *inode) | |||
397 | u32 start, len, goal; | 397 | u32 start, len, goal; |
398 | int res; | 398 | int res; |
399 | 399 | ||
400 | if (sbi->total_blocks - sbi->free_blocks + 8 > | 400 | if (sbi->alloc_file->i_size * 8 < |
401 | sbi->alloc_file->i_size * 8) { | 401 | sbi->total_blocks - sbi->free_blocks + 8) { |
402 | /* extend alloc file */ | 402 | /* extend alloc file */ |
403 | printk(KERN_ERR "hfs: extend alloc file! " | 403 | printk(KERN_ERR "hfs: extend alloc file! " |
404 | "(%llu,%u,%u)\n", | 404 | "(%llu,%u,%u)\n", |
diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c index d66ad113b1cc..40ad88c12c64 100644 --- a/fs/hfsplus/part_tbl.c +++ b/fs/hfsplus/part_tbl.c | |||
@@ -134,7 +134,7 @@ int hfs_part_find(struct super_block *sb, | |||
134 | res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK, | 134 | res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK, |
135 | data, READ); | 135 | data, READ); |
136 | if (res) | 136 | if (res) |
137 | return res; | 137 | goto out; |
138 | 138 | ||
139 | switch (be16_to_cpu(*((__be16 *)data))) { | 139 | switch (be16_to_cpu(*((__be16 *)data))) { |
140 | case HFS_OLD_PMAP_MAGIC: | 140 | case HFS_OLD_PMAP_MAGIC: |
@@ -147,7 +147,7 @@ int hfs_part_find(struct super_block *sb, | |||
147 | res = -ENOENT; | 147 | res = -ENOENT; |
148 | break; | 148 | break; |
149 | } | 149 | } |
150 | 150 | out: | |
151 | kfree(data); | 151 | kfree(data); |
152 | return res; | 152 | return res; |
153 | } | 153 | } |
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 9a3b4795f43c..b49b55584c84 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c | |||
@@ -338,20 +338,22 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
338 | struct inode *root, *inode; | 338 | struct inode *root, *inode; |
339 | struct qstr str; | 339 | struct qstr str; |
340 | struct nls_table *nls = NULL; | 340 | struct nls_table *nls = NULL; |
341 | int err = -EINVAL; | 341 | int err; |
342 | 342 | ||
343 | err = -EINVAL; | ||
343 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); | 344 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); |
344 | if (!sbi) | 345 | if (!sbi) |
345 | return -ENOMEM; | 346 | goto out; |
346 | 347 | ||
347 | sb->s_fs_info = sbi; | 348 | sb->s_fs_info = sbi; |
348 | mutex_init(&sbi->alloc_mutex); | 349 | mutex_init(&sbi->alloc_mutex); |
349 | mutex_init(&sbi->vh_mutex); | 350 | mutex_init(&sbi->vh_mutex); |
350 | hfsplus_fill_defaults(sbi); | 351 | hfsplus_fill_defaults(sbi); |
352 | |||
353 | err = -EINVAL; | ||
351 | if (!hfsplus_parse_options(data, sbi)) { | 354 | if (!hfsplus_parse_options(data, sbi)) { |
352 | printk(KERN_ERR "hfs: unable to parse mount options\n"); | 355 | printk(KERN_ERR "hfs: unable to parse mount options\n"); |
353 | err = -EINVAL; | 356 | goto out_unload_nls; |
354 | goto cleanup; | ||
355 | } | 357 | } |
356 | 358 | ||
357 | /* temporarily use utf8 to correctly find the hidden dir below */ | 359 | /* temporarily use utf8 to correctly find the hidden dir below */ |
@@ -359,16 +361,14 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
359 | sbi->nls = load_nls("utf8"); | 361 | sbi->nls = load_nls("utf8"); |
360 | if (!sbi->nls) { | 362 | if (!sbi->nls) { |
361 | printk(KERN_ERR "hfs: unable to load nls for utf8\n"); | 363 | printk(KERN_ERR "hfs: unable to load nls for utf8\n"); |
362 | err = -EINVAL; | 364 | goto out_unload_nls; |
363 | goto cleanup; | ||
364 | } | 365 | } |
365 | 366 | ||
366 | /* Grab the volume header */ | 367 | /* Grab the volume header */ |
367 | if (hfsplus_read_wrapper(sb)) { | 368 | if (hfsplus_read_wrapper(sb)) { |
368 | if (!silent) | 369 | if (!silent) |
369 | printk(KERN_WARNING "hfs: unable to find HFS+ superblock\n"); | 370 | printk(KERN_WARNING "hfs: unable to find HFS+ superblock\n"); |
370 | err = -EINVAL; | 371 | goto out_unload_nls; |
371 | goto cleanup; | ||
372 | } | 372 | } |
373 | vhdr = sbi->s_vhdr; | 373 | vhdr = sbi->s_vhdr; |
374 | 374 | ||
@@ -377,7 +377,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
377 | if (be16_to_cpu(vhdr->version) < HFSPLUS_MIN_VERSION || | 377 | if (be16_to_cpu(vhdr->version) < HFSPLUS_MIN_VERSION || |
378 | be16_to_cpu(vhdr->version) > HFSPLUS_CURRENT_VERSION) { | 378 | be16_to_cpu(vhdr->version) > HFSPLUS_CURRENT_VERSION) { |
379 | printk(KERN_ERR "hfs: wrong filesystem version\n"); | 379 | printk(KERN_ERR "hfs: wrong filesystem version\n"); |
380 | goto cleanup; | 380 | goto out_free_vhdr; |
381 | } | 381 | } |
382 | sbi->total_blocks = be32_to_cpu(vhdr->total_blocks); | 382 | sbi->total_blocks = be32_to_cpu(vhdr->total_blocks); |
383 | sbi->free_blocks = be32_to_cpu(vhdr->free_blocks); | 383 | sbi->free_blocks = be32_to_cpu(vhdr->free_blocks); |
@@ -421,19 +421,19 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
421 | sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); | 421 | sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); |
422 | if (!sbi->ext_tree) { | 422 | if (!sbi->ext_tree) { |
423 | printk(KERN_ERR "hfs: failed to load extents file\n"); | 423 | printk(KERN_ERR "hfs: failed to load extents file\n"); |
424 | goto cleanup; | 424 | goto out_free_vhdr; |
425 | } | 425 | } |
426 | sbi->cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID); | 426 | sbi->cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID); |
427 | if (!sbi->cat_tree) { | 427 | if (!sbi->cat_tree) { |
428 | printk(KERN_ERR "hfs: failed to load catalog file\n"); | 428 | printk(KERN_ERR "hfs: failed to load catalog file\n"); |
429 | goto cleanup; | 429 | goto out_close_ext_tree; |
430 | } | 430 | } |
431 | 431 | ||
432 | inode = hfsplus_iget(sb, HFSPLUS_ALLOC_CNID); | 432 | inode = hfsplus_iget(sb, HFSPLUS_ALLOC_CNID); |
433 | if (IS_ERR(inode)) { | 433 | if (IS_ERR(inode)) { |
434 | printk(KERN_ERR "hfs: failed to load allocation file\n"); | 434 | printk(KERN_ERR "hfs: failed to load allocation file\n"); |
435 | err = PTR_ERR(inode); | 435 | err = PTR_ERR(inode); |
436 | goto cleanup; | 436 | goto out_close_cat_tree; |
437 | } | 437 | } |
438 | sbi->alloc_file = inode; | 438 | sbi->alloc_file = inode; |
439 | 439 | ||
@@ -442,14 +442,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
442 | if (IS_ERR(root)) { | 442 | if (IS_ERR(root)) { |
443 | printk(KERN_ERR "hfs: failed to load root directory\n"); | 443 | printk(KERN_ERR "hfs: failed to load root directory\n"); |
444 | err = PTR_ERR(root); | 444 | err = PTR_ERR(root); |
445 | goto cleanup; | 445 | goto out_put_alloc_file; |
446 | } | ||
447 | sb->s_d_op = &hfsplus_dentry_operations; | ||
448 | sb->s_root = d_alloc_root(root); | ||
449 | if (!sb->s_root) { | ||
450 | iput(root); | ||
451 | err = -ENOMEM; | ||
452 | goto cleanup; | ||
453 | } | 446 | } |
454 | 447 | ||
455 | str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; | 448 | str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; |
@@ -459,46 +452,69 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
459 | if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { | 452 | if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { |
460 | hfs_find_exit(&fd); | 453 | hfs_find_exit(&fd); |
461 | if (entry.type != cpu_to_be16(HFSPLUS_FOLDER)) | 454 | if (entry.type != cpu_to_be16(HFSPLUS_FOLDER)) |
462 | goto cleanup; | 455 | goto out_put_root; |
463 | inode = hfsplus_iget(sb, be32_to_cpu(entry.folder.id)); | 456 | inode = hfsplus_iget(sb, be32_to_cpu(entry.folder.id)); |
464 | if (IS_ERR(inode)) { | 457 | if (IS_ERR(inode)) { |
465 | err = PTR_ERR(inode); | 458 | err = PTR_ERR(inode); |
466 | goto cleanup; | 459 | goto out_put_root; |
467 | } | 460 | } |
468 | sbi->hidden_dir = inode; | 461 | sbi->hidden_dir = inode; |
469 | } else | 462 | } else |
470 | hfs_find_exit(&fd); | 463 | hfs_find_exit(&fd); |
471 | 464 | ||
472 | if (sb->s_flags & MS_RDONLY) | 465 | if (!(sb->s_flags & MS_RDONLY)) { |
473 | goto out; | 466 | /* |
467 | * H+LX == hfsplusutils, H+Lx == this driver, H+lx is unused | ||
468 | * all three are registered with Apple for our use | ||
469 | */ | ||
470 | vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION); | ||
471 | vhdr->modify_date = hfsp_now2mt(); | ||
472 | be32_add_cpu(&vhdr->write_count, 1); | ||
473 | vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); | ||
474 | vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); | ||
475 | hfsplus_sync_fs(sb, 1); | ||
474 | 476 | ||
475 | /* H+LX == hfsplusutils, H+Lx == this driver, H+lx is unused | 477 | if (!sbi->hidden_dir) { |
476 | * all three are registered with Apple for our use | 478 | mutex_lock(&sbi->vh_mutex); |
477 | */ | 479 | sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR); |
478 | vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION); | 480 | hfsplus_create_cat(sbi->hidden_dir->i_ino, root, &str, |
479 | vhdr->modify_date = hfsp_now2mt(); | 481 | sbi->hidden_dir); |
480 | be32_add_cpu(&vhdr->write_count, 1); | 482 | mutex_unlock(&sbi->vh_mutex); |
481 | vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); | 483 | |
482 | vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); | 484 | hfsplus_mark_inode_dirty(sbi->hidden_dir, |
483 | hfsplus_sync_fs(sb, 1); | 485 | HFSPLUS_I_CAT_DIRTY); |
484 | 486 | } | |
485 | if (!sbi->hidden_dir) { | ||
486 | mutex_lock(&sbi->vh_mutex); | ||
487 | sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR); | ||
488 | hfsplus_create_cat(sbi->hidden_dir->i_ino, sb->s_root->d_inode, | ||
489 | &str, sbi->hidden_dir); | ||
490 | mutex_unlock(&sbi->vh_mutex); | ||
491 | |||
492 | hfsplus_mark_inode_dirty(sbi->hidden_dir, HFSPLUS_I_CAT_DIRTY); | ||
493 | } | 487 | } |
494 | out: | 488 | |
489 | sb->s_d_op = &hfsplus_dentry_operations; | ||
490 | sb->s_root = d_alloc_root(root); | ||
491 | if (!sb->s_root) { | ||
492 | err = -ENOMEM; | ||
493 | goto out_put_hidden_dir; | ||
494 | } | ||
495 | |||
495 | unload_nls(sbi->nls); | 496 | unload_nls(sbi->nls); |
496 | sbi->nls = nls; | 497 | sbi->nls = nls; |
497 | return 0; | 498 | return 0; |
498 | 499 | ||
499 | cleanup: | 500 | out_put_hidden_dir: |
500 | hfsplus_put_super(sb); | 501 | iput(sbi->hidden_dir); |
502 | out_put_root: | ||
503 | iput(sbi->alloc_file); | ||
504 | out_put_alloc_file: | ||
505 | iput(sbi->alloc_file); | ||
506 | out_close_cat_tree: | ||
507 | hfs_btree_close(sbi->cat_tree); | ||
508 | out_close_ext_tree: | ||
509 | hfs_btree_close(sbi->ext_tree); | ||
510 | out_free_vhdr: | ||
511 | kfree(sbi->s_vhdr); | ||
512 | kfree(sbi->s_backup_vhdr); | ||
513 | out_unload_nls: | ||
514 | unload_nls(sbi->nls); | ||
501 | unload_nls(nls); | 515 | unload_nls(nls); |
516 | kfree(sbi); | ||
517 | out: | ||
502 | return err; | 518 | return err; |
503 | } | 519 | } |
504 | 520 | ||
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 196231794f64..3031d81f5f0f 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c | |||
@@ -167,7 +167,7 @@ reread: | |||
167 | break; | 167 | break; |
168 | case cpu_to_be16(HFSP_WRAP_MAGIC): | 168 | case cpu_to_be16(HFSP_WRAP_MAGIC): |
169 | if (!hfsplus_read_mdb(sbi->s_vhdr, &wd)) | 169 | if (!hfsplus_read_mdb(sbi->s_vhdr, &wd)) |
170 | goto out; | 170 | goto out_free_backup_vhdr; |
171 | wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT; | 171 | wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT; |
172 | part_start += wd.ablk_start + wd.embed_start * wd.ablk_size; | 172 | part_start += wd.ablk_start + wd.embed_start * wd.ablk_size; |
173 | part_size = wd.embed_count * wd.ablk_size; | 173 | part_size = wd.embed_count * wd.ablk_size; |
@@ -179,7 +179,7 @@ reread: | |||
179 | * (should do this only for cdrom/loop though) | 179 | * (should do this only for cdrom/loop though) |
180 | */ | 180 | */ |
181 | if (hfs_part_find(sb, &part_start, &part_size)) | 181 | if (hfs_part_find(sb, &part_start, &part_size)) |
182 | goto out; | 182 | goto out_free_backup_vhdr; |
183 | goto reread; | 183 | goto reread; |
184 | } | 184 | } |
185 | 185 | ||
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 56f0da1cfd10..1ae35baa539e 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c | |||
@@ -281,7 +281,7 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
281 | attr->ia_size != i_size_read(inode)) { | 281 | attr->ia_size != i_size_read(inode)) { |
282 | error = vmtruncate(inode, attr->ia_size); | 282 | error = vmtruncate(inode, attr->ia_size); |
283 | if (error) | 283 | if (error) |
284 | return error; | 284 | goto out_unlock; |
285 | } | 285 | } |
286 | 286 | ||
287 | setattr_copy(inode, attr); | 287 | setattr_copy(inode, attr); |
diff --git a/fs/internal.h b/fs/internal.h index 9687c2ee2735..0663568b1247 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -70,6 +70,10 @@ extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, | |||
70 | extern void release_mounts(struct list_head *); | 70 | extern void release_mounts(struct list_head *); |
71 | extern void umount_tree(struct vfsmount *, int, struct list_head *); | 71 | extern void umount_tree(struct vfsmount *, int, struct list_head *); |
72 | extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); | 72 | extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); |
73 | extern int finish_automount(struct vfsmount *, struct path *); | ||
74 | |||
75 | extern void mnt_make_longterm(struct vfsmount *); | ||
76 | extern void mnt_make_shortterm(struct vfsmount *); | ||
73 | 77 | ||
74 | extern void __init mnt_init(void); | 78 | extern void __init mnt_init(void); |
75 | 79 | ||
diff --git a/fs/ioctl.c b/fs/ioctl.c index d6cc16476620..1eebeb72b202 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c | |||
@@ -86,7 +86,7 @@ int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical, | |||
86 | u64 phys, u64 len, u32 flags) | 86 | u64 phys, u64 len, u32 flags) |
87 | { | 87 | { |
88 | struct fiemap_extent extent; | 88 | struct fiemap_extent extent; |
89 | struct fiemap_extent *dest = fieinfo->fi_extents_start; | 89 | struct fiemap_extent __user *dest = fieinfo->fi_extents_start; |
90 | 90 | ||
91 | /* only count the extents */ | 91 | /* only count the extents */ |
92 | if (fieinfo->fi_extents_max == 0) { | 92 | if (fieinfo->fi_extents_max == 0) { |
@@ -173,6 +173,7 @@ static int fiemap_check_ranges(struct super_block *sb, | |||
173 | static int ioctl_fiemap(struct file *filp, unsigned long arg) | 173 | static int ioctl_fiemap(struct file *filp, unsigned long arg) |
174 | { | 174 | { |
175 | struct fiemap fiemap; | 175 | struct fiemap fiemap; |
176 | struct fiemap __user *ufiemap = (struct fiemap __user *) arg; | ||
176 | struct fiemap_extent_info fieinfo = { 0, }; | 177 | struct fiemap_extent_info fieinfo = { 0, }; |
177 | struct inode *inode = filp->f_path.dentry->d_inode; | 178 | struct inode *inode = filp->f_path.dentry->d_inode; |
178 | struct super_block *sb = inode->i_sb; | 179 | struct super_block *sb = inode->i_sb; |
@@ -182,8 +183,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg) | |||
182 | if (!inode->i_op->fiemap) | 183 | if (!inode->i_op->fiemap) |
183 | return -EOPNOTSUPP; | 184 | return -EOPNOTSUPP; |
184 | 185 | ||
185 | if (copy_from_user(&fiemap, (struct fiemap __user *)arg, | 186 | if (copy_from_user(&fiemap, ufiemap, sizeof(fiemap))) |
186 | sizeof(struct fiemap))) | ||
187 | return -EFAULT; | 187 | return -EFAULT; |
188 | 188 | ||
189 | if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS) | 189 | if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS) |
@@ -196,7 +196,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg) | |||
196 | 196 | ||
197 | fieinfo.fi_flags = fiemap.fm_flags; | 197 | fieinfo.fi_flags = fiemap.fm_flags; |
198 | fieinfo.fi_extents_max = fiemap.fm_extent_count; | 198 | fieinfo.fi_extents_max = fiemap.fm_extent_count; |
199 | fieinfo.fi_extents_start = (struct fiemap_extent *)(arg + sizeof(fiemap)); | 199 | fieinfo.fi_extents_start = ufiemap->fm_extents; |
200 | 200 | ||
201 | if (fiemap.fm_extent_count != 0 && | 201 | if (fiemap.fm_extent_count != 0 && |
202 | !access_ok(VERIFY_WRITE, fieinfo.fi_extents_start, | 202 | !access_ok(VERIFY_WRITE, fieinfo.fi_extents_start, |
@@ -209,7 +209,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg) | |||
209 | error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start, len); | 209 | error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start, len); |
210 | fiemap.fm_flags = fieinfo.fi_flags; | 210 | fiemap.fm_flags = fieinfo.fi_flags; |
211 | fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped; | 211 | fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped; |
212 | if (copy_to_user((char *)arg, &fiemap, sizeof(fiemap))) | 212 | if (copy_to_user(ufiemap, &fiemap, sizeof(fiemap))) |
213 | error = -EFAULT; | 213 | error = -EFAULT; |
214 | 214 | ||
215 | return error; | 215 | return error; |
@@ -273,6 +273,13 @@ int __generic_block_fiemap(struct inode *inode, | |||
273 | len = isize; | 273 | len = isize; |
274 | } | 274 | } |
275 | 275 | ||
276 | /* | ||
277 | * Some filesystems can't deal with being asked to map less than | ||
278 | * blocksize, so make sure our len is at least block length. | ||
279 | */ | ||
280 | if (logical_to_blk(inode, len) == 0) | ||
281 | len = blk_to_logical(inode, 1); | ||
282 | |||
276 | start_blk = logical_to_blk(inode, start); | 283 | start_blk = logical_to_blk(inode, start); |
277 | last_blk = logical_to_blk(inode, start + len - 1); | 284 | last_blk = logical_to_blk(inode, start + len - 1); |
278 | 285 | ||
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 9e4686900f18..97e73469b2c4 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -473,7 +473,8 @@ int __jbd2_log_space_left(journal_t *journal) | |||
473 | } | 473 | } |
474 | 474 | ||
475 | /* | 475 | /* |
476 | * Called under j_state_lock. Returns true if a transaction commit was started. | 476 | * Called with j_state_lock locked for writing. |
477 | * Returns true if a transaction commit was started. | ||
477 | */ | 478 | */ |
478 | int __jbd2_log_start_commit(journal_t *journal, tid_t target) | 479 | int __jbd2_log_start_commit(journal_t *journal, tid_t target) |
479 | { | 480 | { |
@@ -520,11 +521,13 @@ int jbd2_journal_force_commit_nested(journal_t *journal) | |||
520 | { | 521 | { |
521 | transaction_t *transaction = NULL; | 522 | transaction_t *transaction = NULL; |
522 | tid_t tid; | 523 | tid_t tid; |
524 | int need_to_start = 0; | ||
523 | 525 | ||
524 | read_lock(&journal->j_state_lock); | 526 | read_lock(&journal->j_state_lock); |
525 | if (journal->j_running_transaction && !current->journal_info) { | 527 | if (journal->j_running_transaction && !current->journal_info) { |
526 | transaction = journal->j_running_transaction; | 528 | transaction = journal->j_running_transaction; |
527 | __jbd2_log_start_commit(journal, transaction->t_tid); | 529 | if (!tid_geq(journal->j_commit_request, transaction->t_tid)) |
530 | need_to_start = 1; | ||
528 | } else if (journal->j_committing_transaction) | 531 | } else if (journal->j_committing_transaction) |
529 | transaction = journal->j_committing_transaction; | 532 | transaction = journal->j_committing_transaction; |
530 | 533 | ||
@@ -535,6 +538,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal) | |||
535 | 538 | ||
536 | tid = transaction->t_tid; | 539 | tid = transaction->t_tid; |
537 | read_unlock(&journal->j_state_lock); | 540 | read_unlock(&journal->j_state_lock); |
541 | if (need_to_start) | ||
542 | jbd2_log_start_commit(journal, tid); | ||
538 | jbd2_log_wait_commit(journal, tid); | 543 | jbd2_log_wait_commit(journal, tid); |
539 | return 1; | 544 | return 1; |
540 | } | 545 | } |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index faad2bd787c7..1d1191050f99 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -117,10 +117,10 @@ static inline void update_t_max_wait(transaction_t *transaction) | |||
117 | static int start_this_handle(journal_t *journal, handle_t *handle, | 117 | static int start_this_handle(journal_t *journal, handle_t *handle, |
118 | int gfp_mask) | 118 | int gfp_mask) |
119 | { | 119 | { |
120 | transaction_t *transaction; | 120 | transaction_t *transaction, *new_transaction = NULL; |
121 | int needed; | 121 | tid_t tid; |
122 | int nblocks = handle->h_buffer_credits; | 122 | int needed, need_to_start; |
123 | transaction_t *new_transaction = NULL; | 123 | int nblocks = handle->h_buffer_credits; |
124 | 124 | ||
125 | if (nblocks > journal->j_max_transaction_buffers) { | 125 | if (nblocks > journal->j_max_transaction_buffers) { |
126 | printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", | 126 | printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", |
@@ -222,8 +222,11 @@ repeat: | |||
222 | atomic_sub(nblocks, &transaction->t_outstanding_credits); | 222 | atomic_sub(nblocks, &transaction->t_outstanding_credits); |
223 | prepare_to_wait(&journal->j_wait_transaction_locked, &wait, | 223 | prepare_to_wait(&journal->j_wait_transaction_locked, &wait, |
224 | TASK_UNINTERRUPTIBLE); | 224 | TASK_UNINTERRUPTIBLE); |
225 | __jbd2_log_start_commit(journal, transaction->t_tid); | 225 | tid = transaction->t_tid; |
226 | need_to_start = !tid_geq(journal->j_commit_request, tid); | ||
226 | read_unlock(&journal->j_state_lock); | 227 | read_unlock(&journal->j_state_lock); |
228 | if (need_to_start) | ||
229 | jbd2_log_start_commit(journal, tid); | ||
227 | schedule(); | 230 | schedule(); |
228 | finish_wait(&journal->j_wait_transaction_locked, &wait); | 231 | finish_wait(&journal->j_wait_transaction_locked, &wait); |
229 | goto repeat; | 232 | goto repeat; |
@@ -442,7 +445,8 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask) | |||
442 | { | 445 | { |
443 | transaction_t *transaction = handle->h_transaction; | 446 | transaction_t *transaction = handle->h_transaction; |
444 | journal_t *journal = transaction->t_journal; | 447 | journal_t *journal = transaction->t_journal; |
445 | int ret; | 448 | tid_t tid; |
449 | int need_to_start, ret; | ||
446 | 450 | ||
447 | /* If we've had an abort of any type, don't even think about | 451 | /* If we've had an abort of any type, don't even think about |
448 | * actually doing the restart! */ | 452 | * actually doing the restart! */ |
@@ -465,8 +469,11 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask) | |||
465 | spin_unlock(&transaction->t_handle_lock); | 469 | spin_unlock(&transaction->t_handle_lock); |
466 | 470 | ||
467 | jbd_debug(2, "restarting handle %p\n", handle); | 471 | jbd_debug(2, "restarting handle %p\n", handle); |
468 | __jbd2_log_start_commit(journal, transaction->t_tid); | 472 | tid = transaction->t_tid; |
473 | need_to_start = !tid_geq(journal->j_commit_request, tid); | ||
469 | read_unlock(&journal->j_state_lock); | 474 | read_unlock(&journal->j_state_lock); |
475 | if (need_to_start) | ||
476 | jbd2_log_start_commit(journal, tid); | ||
470 | 477 | ||
471 | lock_map_release(&handle->h_lockdep_map); | 478 | lock_map_release(&handle->h_lockdep_map); |
472 | handle->h_buffer_credits = nblocks; | 479 | handle->h_buffer_credits = nblocks; |
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c index 85c6be2db02f..3005ec4520ad 100644 --- a/fs/jffs2/build.c +++ b/fs/jffs2/build.c | |||
@@ -336,14 +336,13 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c) | |||
336 | size = sizeof(struct jffs2_eraseblock) * c->nr_blocks; | 336 | size = sizeof(struct jffs2_eraseblock) * c->nr_blocks; |
337 | #ifndef __ECOS | 337 | #ifndef __ECOS |
338 | if (jffs2_blocks_use_vmalloc(c)) | 338 | if (jffs2_blocks_use_vmalloc(c)) |
339 | c->blocks = vmalloc(size); | 339 | c->blocks = vzalloc(size); |
340 | else | 340 | else |
341 | #endif | 341 | #endif |
342 | c->blocks = kmalloc(size, GFP_KERNEL); | 342 | c->blocks = kzalloc(size, GFP_KERNEL); |
343 | if (!c->blocks) | 343 | if (!c->blocks) |
344 | return -ENOMEM; | 344 | return -ENOMEM; |
345 | 345 | ||
346 | memset(c->blocks, 0, size); | ||
347 | for (i=0; i<c->nr_blocks; i++) { | 346 | for (i=0; i<c->nr_blocks; i++) { |
348 | INIT_LIST_HEAD(&c->blocks[i].list); | 347 | INIT_LIST_HEAD(&c->blocks[i].list); |
349 | c->blocks[i].offset = i * c->sector_size; | 348 | c->blocks[i].offset = i * c->sector_size; |
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h index f864005de64c..0bc6a6c80a56 100644 --- a/fs/jffs2/jffs2_fs_sb.h +++ b/fs/jffs2/jffs2_fs_sb.h | |||
@@ -144,4 +144,4 @@ struct jffs2_sb_info { | |||
144 | void *os_priv; | 144 | void *os_priv; |
145 | }; | 145 | }; |
146 | 146 | ||
147 | #endif /* _JFFS2_FB_SB */ | 147 | #endif /* _JFFS2_FS_SB */ |
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index 9b572ca40a49..4f9cc0482949 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c | |||
@@ -151,7 +151,7 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat | |||
151 | JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", | 151 | JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", |
152 | offset, je32_to_cpu(rx.hdr_crc), crc); | 152 | offset, je32_to_cpu(rx.hdr_crc), crc); |
153 | xd->flags |= JFFS2_XFLAGS_INVALID; | 153 | xd->flags |= JFFS2_XFLAGS_INVALID; |
154 | return EIO; | 154 | return -EIO; |
155 | } | 155 | } |
156 | totlen = PAD(sizeof(rx) + rx.name_len + 1 + je16_to_cpu(rx.value_len)); | 156 | totlen = PAD(sizeof(rx) + rx.name_len + 1 + je16_to_cpu(rx.value_len)); |
157 | if (je16_to_cpu(rx.magic) != JFFS2_MAGIC_BITMASK | 157 | if (je16_to_cpu(rx.magic) != JFFS2_MAGIC_BITMASK |
@@ -167,7 +167,7 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat | |||
167 | je32_to_cpu(rx.xid), xd->xid, | 167 | je32_to_cpu(rx.xid), xd->xid, |
168 | je32_to_cpu(rx.version), xd->version); | 168 | je32_to_cpu(rx.version), xd->version); |
169 | xd->flags |= JFFS2_XFLAGS_INVALID; | 169 | xd->flags |= JFFS2_XFLAGS_INVALID; |
170 | return EIO; | 170 | return -EIO; |
171 | } | 171 | } |
172 | xd->xprefix = rx.xprefix; | 172 | xd->xprefix = rx.xprefix; |
173 | xd->name_len = rx.name_len; | 173 | xd->name_len = rx.name_len; |
@@ -230,7 +230,7 @@ static int do_load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum | |||
230 | ref_offset(xd->node), xd->data_crc, crc); | 230 | ref_offset(xd->node), xd->data_crc, crc); |
231 | kfree(data); | 231 | kfree(data); |
232 | xd->flags |= JFFS2_XFLAGS_INVALID; | 232 | xd->flags |= JFFS2_XFLAGS_INVALID; |
233 | return EIO; | 233 | return -EIO; |
234 | } | 234 | } |
235 | 235 | ||
236 | xd->flags |= JFFS2_XFLAGS_HOT; | 236 | xd->flags |= JFFS2_XFLAGS_HOT; |
@@ -268,7 +268,7 @@ static int load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *x | |||
268 | if (xd->xname) | 268 | if (xd->xname) |
269 | return 0; | 269 | return 0; |
270 | if (xd->flags & JFFS2_XFLAGS_INVALID) | 270 | if (xd->flags & JFFS2_XFLAGS_INVALID) |
271 | return EIO; | 271 | return -EIO; |
272 | if (unlikely(is_xattr_datum_unchecked(c, xd))) | 272 | if (unlikely(is_xattr_datum_unchecked(c, xd))) |
273 | rc = do_verify_xattr_datum(c, xd); | 273 | rc = do_verify_xattr_datum(c, xd); |
274 | if (!rc) | 274 | if (!rc) |
@@ -460,7 +460,7 @@ static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref | |||
460 | if (crc != je32_to_cpu(rr.node_crc)) { | 460 | if (crc != je32_to_cpu(rr.node_crc)) { |
461 | JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", | 461 | JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n", |
462 | offset, je32_to_cpu(rr.node_crc), crc); | 462 | offset, je32_to_cpu(rr.node_crc), crc); |
463 | return EIO; | 463 | return -EIO; |
464 | } | 464 | } |
465 | if (je16_to_cpu(rr.magic) != JFFS2_MAGIC_BITMASK | 465 | if (je16_to_cpu(rr.magic) != JFFS2_MAGIC_BITMASK |
466 | || je16_to_cpu(rr.nodetype) != JFFS2_NODETYPE_XREF | 466 | || je16_to_cpu(rr.nodetype) != JFFS2_NODETYPE_XREF |
@@ -470,7 +470,7 @@ static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref | |||
470 | offset, je16_to_cpu(rr.magic), JFFS2_MAGIC_BITMASK, | 470 | offset, je16_to_cpu(rr.magic), JFFS2_MAGIC_BITMASK, |
471 | je16_to_cpu(rr.nodetype), JFFS2_NODETYPE_XREF, | 471 | je16_to_cpu(rr.nodetype), JFFS2_NODETYPE_XREF, |
472 | je32_to_cpu(rr.totlen), PAD(sizeof(rr))); | 472 | je32_to_cpu(rr.totlen), PAD(sizeof(rr))); |
473 | return EIO; | 473 | return -EIO; |
474 | } | 474 | } |
475 | ref->ino = je32_to_cpu(rr.ino); | 475 | ref->ino = je32_to_cpu(rr.ino); |
476 | ref->xid = je32_to_cpu(rr.xid); | 476 | ref->xid = je32_to_cpu(rr.xid); |
diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 5f1bcb2f06f3..b7c99bfb3da6 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c | |||
@@ -520,7 +520,7 @@ static struct nlm_host *next_host_state(struct hlist_head *cache, | |||
520 | struct nsm_handle *nsm, | 520 | struct nsm_handle *nsm, |
521 | const struct nlm_reboot *info) | 521 | const struct nlm_reboot *info) |
522 | { | 522 | { |
523 | struct nlm_host *host = NULL; | 523 | struct nlm_host *host; |
524 | struct hlist_head *chain; | 524 | struct hlist_head *chain; |
525 | struct hlist_node *pos; | 525 | struct hlist_node *pos; |
526 | 526 | ||
@@ -532,12 +532,13 @@ static struct nlm_host *next_host_state(struct hlist_head *cache, | |||
532 | host->h_state++; | 532 | host->h_state++; |
533 | 533 | ||
534 | nlm_get_host(host); | 534 | nlm_get_host(host); |
535 | goto out; | 535 | mutex_unlock(&nlm_host_mutex); |
536 | return host; | ||
536 | } | 537 | } |
537 | } | 538 | } |
538 | out: | 539 | |
539 | mutex_unlock(&nlm_host_mutex); | 540 | mutex_unlock(&nlm_host_mutex); |
540 | return host; | 541 | return NULL; |
541 | } | 542 | } |
542 | 543 | ||
543 | /** | 544 | /** |
diff --git a/fs/locks.c b/fs/locks.c index 08415b2a6d36..0f3998291f78 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -444,15 +444,9 @@ static void lease_release_private_callback(struct file_lock *fl) | |||
444 | fl->fl_file->f_owner.signum = 0; | 444 | fl->fl_file->f_owner.signum = 0; |
445 | } | 445 | } |
446 | 446 | ||
447 | static int lease_mylease_callback(struct file_lock *fl, struct file_lock *try) | ||
448 | { | ||
449 | return fl->fl_file == try->fl_file; | ||
450 | } | ||
451 | |||
452 | static const struct lock_manager_operations lease_manager_ops = { | 447 | static const struct lock_manager_operations lease_manager_ops = { |
453 | .fl_break = lease_break_callback, | 448 | .fl_break = lease_break_callback, |
454 | .fl_release_private = lease_release_private_callback, | 449 | .fl_release_private = lease_release_private_callback, |
455 | .fl_mylease = lease_mylease_callback, | ||
456 | .fl_change = lease_modify, | 450 | .fl_change = lease_modify, |
457 | }; | 451 | }; |
458 | 452 | ||
@@ -1405,7 +1399,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) | |||
1405 | for (before = &inode->i_flock; | 1399 | for (before = &inode->i_flock; |
1406 | ((fl = *before) != NULL) && IS_LEASE(fl); | 1400 | ((fl = *before) != NULL) && IS_LEASE(fl); |
1407 | before = &fl->fl_next) { | 1401 | before = &fl->fl_next) { |
1408 | if (lease->fl_lmops->fl_mylease(fl, lease)) | 1402 | if (fl->fl_file == filp) |
1409 | my_before = before; | 1403 | my_before = before; |
1410 | else if (fl->fl_type == (F_INPROGRESS | F_UNLCK)) | 1404 | else if (fl->fl_type == (F_INPROGRESS | F_UNLCK)) |
1411 | /* | 1405 | /* |
diff --git a/fs/mpage.c b/fs/mpage.c index fd56ca2ea556..d78455a81ec9 100644 --- a/fs/mpage.c +++ b/fs/mpage.c | |||
@@ -40,7 +40,7 @@ | |||
40 | * status of that page is hard. See end_buffer_async_read() for the details. | 40 | * status of that page is hard. See end_buffer_async_read() for the details. |
41 | * There is no point in duplicating all that complexity. | 41 | * There is no point in duplicating all that complexity. |
42 | */ | 42 | */ |
43 | static void mpage_end_io_read(struct bio *bio, int err) | 43 | static void mpage_end_io(struct bio *bio, int err) |
44 | { | 44 | { |
45 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 45 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); |
46 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | 46 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; |
@@ -50,44 +50,29 @@ static void mpage_end_io_read(struct bio *bio, int err) | |||
50 | 50 | ||
51 | if (--bvec >= bio->bi_io_vec) | 51 | if (--bvec >= bio->bi_io_vec) |
52 | prefetchw(&bvec->bv_page->flags); | 52 | prefetchw(&bvec->bv_page->flags); |
53 | 53 | if (bio_data_dir(bio) == READ) { | |
54 | if (uptodate) { | 54 | if (uptodate) { |
55 | SetPageUptodate(page); | 55 | SetPageUptodate(page); |
56 | } else { | 56 | } else { |
57 | ClearPageUptodate(page); | 57 | ClearPageUptodate(page); |
58 | SetPageError(page); | 58 | SetPageError(page); |
59 | } | 59 | } |
60 | unlock_page(page); | 60 | unlock_page(page); |
61 | } while (bvec >= bio->bi_io_vec); | 61 | } else { /* bio_data_dir(bio) == WRITE */ |
62 | bio_put(bio); | 62 | if (!uptodate) { |
63 | } | 63 | SetPageError(page); |
64 | 64 | if (page->mapping) | |
65 | static void mpage_end_io_write(struct bio *bio, int err) | 65 | set_bit(AS_EIO, &page->mapping->flags); |
66 | { | 66 | } |
67 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 67 | end_page_writeback(page); |
68 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
69 | |||
70 | do { | ||
71 | struct page *page = bvec->bv_page; | ||
72 | |||
73 | if (--bvec >= bio->bi_io_vec) | ||
74 | prefetchw(&bvec->bv_page->flags); | ||
75 | |||
76 | if (!uptodate){ | ||
77 | SetPageError(page); | ||
78 | if (page->mapping) | ||
79 | set_bit(AS_EIO, &page->mapping->flags); | ||
80 | } | 68 | } |
81 | end_page_writeback(page); | ||
82 | } while (bvec >= bio->bi_io_vec); | 69 | } while (bvec >= bio->bi_io_vec); |
83 | bio_put(bio); | 70 | bio_put(bio); |
84 | } | 71 | } |
85 | 72 | ||
86 | static struct bio *mpage_bio_submit(int rw, struct bio *bio) | 73 | static struct bio *mpage_bio_submit(int rw, struct bio *bio) |
87 | { | 74 | { |
88 | bio->bi_end_io = mpage_end_io_read; | 75 | bio->bi_end_io = mpage_end_io; |
89 | if (rw == WRITE) | ||
90 | bio->bi_end_io = mpage_end_io_write; | ||
91 | submit_bio(rw, bio); | 76 | submit_bio(rw, bio); |
92 | return NULL; | 77 | return NULL; |
93 | } | 78 | } |
diff --git a/fs/namei.c b/fs/namei.c index 0b14f6910fc6..ec4b2d0190a8 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -368,18 +368,6 @@ void path_get(struct path *path) | |||
368 | EXPORT_SYMBOL(path_get); | 368 | EXPORT_SYMBOL(path_get); |
369 | 369 | ||
370 | /** | 370 | /** |
371 | * path_get_long - get a long reference to a path | ||
372 | * @path: path to get the reference to | ||
373 | * | ||
374 | * Given a path increment the reference count to the dentry and the vfsmount. | ||
375 | */ | ||
376 | void path_get_long(struct path *path) | ||
377 | { | ||
378 | mntget_long(path->mnt); | ||
379 | dget(path->dentry); | ||
380 | } | ||
381 | |||
382 | /** | ||
383 | * path_put - put a reference to a path | 371 | * path_put - put a reference to a path |
384 | * @path: path to put the reference to | 372 | * @path: path to put the reference to |
385 | * | 373 | * |
@@ -393,18 +381,6 @@ void path_put(struct path *path) | |||
393 | EXPORT_SYMBOL(path_put); | 381 | EXPORT_SYMBOL(path_put); |
394 | 382 | ||
395 | /** | 383 | /** |
396 | * path_put_long - put a long reference to a path | ||
397 | * @path: path to put the reference to | ||
398 | * | ||
399 | * Given a path decrement the reference count to the dentry and the vfsmount. | ||
400 | */ | ||
401 | void path_put_long(struct path *path) | ||
402 | { | ||
403 | dput(path->dentry); | ||
404 | mntput_long(path->mnt); | ||
405 | } | ||
406 | |||
407 | /** | ||
408 | * nameidata_drop_rcu - drop this nameidata out of rcu-walk | 384 | * nameidata_drop_rcu - drop this nameidata out of rcu-walk |
409 | * @nd: nameidata pathwalk data to drop | 385 | * @nd: nameidata pathwalk data to drop |
410 | * Returns: 0 on success, -ECHILD on failure | 386 | * Returns: 0 on success, -ECHILD on failure |
@@ -479,6 +455,14 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry | |||
479 | struct fs_struct *fs = current->fs; | 455 | struct fs_struct *fs = current->fs; |
480 | struct dentry *parent = nd->path.dentry; | 456 | struct dentry *parent = nd->path.dentry; |
481 | 457 | ||
458 | /* | ||
459 | * It can be possible to revalidate the dentry that we started | ||
460 | * the path walk with. force_reval_path may also revalidate the | ||
461 | * dentry already committed to the nameidata. | ||
462 | */ | ||
463 | if (unlikely(parent == dentry)) | ||
464 | return nameidata_drop_rcu(nd); | ||
465 | |||
482 | BUG_ON(!(nd->flags & LOOKUP_RCU)); | 466 | BUG_ON(!(nd->flags & LOOKUP_RCU)); |
483 | if (nd->root.mnt) { | 467 | if (nd->root.mnt) { |
484 | spin_lock(&fs->lock); | 468 | spin_lock(&fs->lock); |
@@ -577,12 +561,23 @@ static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd) | |||
577 | */ | 561 | */ |
578 | void release_open_intent(struct nameidata *nd) | 562 | void release_open_intent(struct nameidata *nd) |
579 | { | 563 | { |
580 | if (nd->intent.open.file->f_path.dentry == NULL) | 564 | struct file *file = nd->intent.open.file; |
581 | put_filp(nd->intent.open.file); | 565 | |
582 | else | 566 | if (file && !IS_ERR(file)) { |
583 | fput(nd->intent.open.file); | 567 | if (file->f_path.dentry == NULL) |
568 | put_filp(file); | ||
569 | else | ||
570 | fput(file); | ||
571 | } | ||
584 | } | 572 | } |
585 | 573 | ||
574 | /* | ||
575 | * Call d_revalidate and handle filesystems that request rcu-walk | ||
576 | * to be dropped. This may be called and return in rcu-walk mode, | ||
577 | * regardless of success or error. If -ECHILD is returned, the caller | ||
578 | * must return -ECHILD back up the path walk stack so path walk may | ||
579 | * be restarted in ref-walk mode. | ||
580 | */ | ||
586 | static int d_revalidate(struct dentry *dentry, struct nameidata *nd) | 581 | static int d_revalidate(struct dentry *dentry, struct nameidata *nd) |
587 | { | 582 | { |
588 | int status; | 583 | int status; |
@@ -673,6 +668,9 @@ force_reval_path(struct path *path, struct nameidata *nd) | |||
673 | return 0; | 668 | return 0; |
674 | 669 | ||
675 | if (!status) { | 670 | if (!status) { |
671 | /* Don't d_invalidate in rcu-walk mode */ | ||
672 | if (nameidata_drop_rcu(nd)) | ||
673 | return -ECHILD; | ||
676 | d_invalidate(dentry); | 674 | d_invalidate(dentry); |
677 | status = -ESTALE; | 675 | status = -ESTALE; |
678 | } | 676 | } |
@@ -761,7 +759,8 @@ static void path_put_conditional(struct path *path, struct nameidata *nd) | |||
761 | mntput(path->mnt); | 759 | mntput(path->mnt); |
762 | } | 760 | } |
763 | 761 | ||
764 | static inline void path_to_nameidata(struct path *path, struct nameidata *nd) | 762 | static inline void path_to_nameidata(const struct path *path, |
763 | struct nameidata *nd) | ||
765 | { | 764 | { |
766 | if (!(nd->flags & LOOKUP_RCU)) { | 765 | if (!(nd->flags & LOOKUP_RCU)) { |
767 | dput(nd->path.dentry); | 766 | dput(nd->path.dentry); |
@@ -773,20 +772,16 @@ static inline void path_to_nameidata(struct path *path, struct nameidata *nd) | |||
773 | } | 772 | } |
774 | 773 | ||
775 | static __always_inline int | 774 | static __always_inline int |
776 | __do_follow_link(struct path *path, struct nameidata *nd, void **p) | 775 | __do_follow_link(const struct path *link, struct nameidata *nd, void **p) |
777 | { | 776 | { |
778 | int error; | 777 | int error; |
779 | struct dentry *dentry = path->dentry; | 778 | struct dentry *dentry = link->dentry; |
780 | 779 | ||
781 | touch_atime(path->mnt, dentry); | 780 | touch_atime(link->mnt, dentry); |
782 | nd_set_link(nd, NULL); | 781 | nd_set_link(nd, NULL); |
783 | 782 | ||
784 | if (path->mnt != nd->path.mnt) { | 783 | if (link->mnt == nd->path.mnt) |
785 | path_to_nameidata(path, nd); | 784 | mntget(link->mnt); |
786 | nd->inode = nd->path.dentry->d_inode; | ||
787 | dget(dentry); | ||
788 | } | ||
789 | mntget(path->mnt); | ||
790 | 785 | ||
791 | nd->last_type = LAST_BIND; | 786 | nd->last_type = LAST_BIND; |
792 | *p = dentry->d_inode->i_op->follow_link(dentry, nd); | 787 | *p = dentry->d_inode->i_op->follow_link(dentry, nd); |
@@ -877,54 +872,148 @@ int follow_up(struct path *path) | |||
877 | } | 872 | } |
878 | 873 | ||
879 | /* | 874 | /* |
880 | * serialization is taken care of in namespace.c | 875 | * Perform an automount |
876 | * - return -EISDIR to tell follow_managed() to stop and return the path we | ||
877 | * were called with. | ||
881 | */ | 878 | */ |
882 | static void __follow_mount_rcu(struct nameidata *nd, struct path *path, | 879 | static int follow_automount(struct path *path, unsigned flags, |
883 | struct inode **inode) | 880 | bool *need_mntput) |
884 | { | 881 | { |
885 | while (d_mountpoint(path->dentry)) { | 882 | struct vfsmount *mnt; |
886 | struct vfsmount *mounted; | 883 | int err; |
887 | mounted = __lookup_mnt(path->mnt, path->dentry, 1); | 884 | |
888 | if (!mounted) | 885 | if (!path->dentry->d_op || !path->dentry->d_op->d_automount) |
889 | return; | 886 | return -EREMOTE; |
890 | path->mnt = mounted; | 887 | |
891 | path->dentry = mounted->mnt_root; | 888 | /* We don't want to mount if someone supplied AT_NO_AUTOMOUNT |
892 | nd->seq = read_seqcount_begin(&path->dentry->d_seq); | 889 | * and this is the terminal part of the path. |
893 | *inode = path->dentry->d_inode; | 890 | */ |
891 | if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_CONTINUE)) | ||
892 | return -EISDIR; /* we actually want to stop here */ | ||
893 | |||
894 | /* We want to mount if someone is trying to open/create a file of any | ||
895 | * type under the mountpoint, wants to traverse through the mountpoint | ||
896 | * or wants to open the mounted directory. | ||
897 | * | ||
898 | * We don't want to mount if someone's just doing a stat and they've | ||
899 | * set AT_SYMLINK_NOFOLLOW - unless they're stat'ing a directory and | ||
900 | * appended a '/' to the name. | ||
901 | */ | ||
902 | if (!(flags & LOOKUP_FOLLOW) && | ||
903 | !(flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY | | ||
904 | LOOKUP_OPEN | LOOKUP_CREATE))) | ||
905 | return -EISDIR; | ||
906 | |||
907 | current->total_link_count++; | ||
908 | if (current->total_link_count >= 40) | ||
909 | return -ELOOP; | ||
910 | |||
911 | mnt = path->dentry->d_op->d_automount(path); | ||
912 | if (IS_ERR(mnt)) { | ||
913 | /* | ||
914 | * The filesystem is allowed to return -EISDIR here to indicate | ||
915 | * it doesn't want to automount. For instance, autofs would do | ||
916 | * this so that its userspace daemon can mount on this dentry. | ||
917 | * | ||
918 | * However, we can only permit this if it's a terminal point in | ||
919 | * the path being looked up; if it wasn't then the remainder of | ||
920 | * the path is inaccessible and we should say so. | ||
921 | */ | ||
922 | if (PTR_ERR(mnt) == -EISDIR && (flags & LOOKUP_CONTINUE)) | ||
923 | return -EREMOTE; | ||
924 | return PTR_ERR(mnt); | ||
894 | } | 925 | } |
895 | } | ||
896 | 926 | ||
897 | static int __follow_mount(struct path *path) | 927 | if (!mnt) /* mount collision */ |
898 | { | 928 | return 0; |
899 | int res = 0; | 929 | |
900 | while (d_mountpoint(path->dentry)) { | 930 | err = finish_automount(mnt, path); |
901 | struct vfsmount *mounted = lookup_mnt(path); | 931 | |
902 | if (!mounted) | 932 | switch (err) { |
903 | break; | 933 | case -EBUSY: |
934 | /* Someone else made a mount here whilst we were busy */ | ||
935 | return 0; | ||
936 | case 0: | ||
904 | dput(path->dentry); | 937 | dput(path->dentry); |
905 | if (res) | 938 | if (*need_mntput) |
906 | mntput(path->mnt); | 939 | mntput(path->mnt); |
907 | path->mnt = mounted; | 940 | path->mnt = mnt; |
908 | path->dentry = dget(mounted->mnt_root); | 941 | path->dentry = dget(mnt->mnt_root); |
909 | res = 1; | 942 | *need_mntput = true; |
943 | return 0; | ||
944 | default: | ||
945 | return err; | ||
910 | } | 946 | } |
911 | return res; | 947 | |
912 | } | 948 | } |
913 | 949 | ||
914 | static void follow_mount(struct path *path) | 950 | /* |
951 | * Handle a dentry that is managed in some way. | ||
952 | * - Flagged for transit management (autofs) | ||
953 | * - Flagged as mountpoint | ||
954 | * - Flagged as automount point | ||
955 | * | ||
956 | * This may only be called in refwalk mode. | ||
957 | * | ||
958 | * Serialization is taken care of in namespace.c | ||
959 | */ | ||
960 | static int follow_managed(struct path *path, unsigned flags) | ||
915 | { | 961 | { |
916 | while (d_mountpoint(path->dentry)) { | 962 | unsigned managed; |
917 | struct vfsmount *mounted = lookup_mnt(path); | 963 | bool need_mntput = false; |
918 | if (!mounted) | 964 | int ret; |
919 | break; | 965 | |
920 | dput(path->dentry); | 966 | /* Given that we're not holding a lock here, we retain the value in a |
921 | mntput(path->mnt); | 967 | * local variable for each dentry as we look at it so that we don't see |
922 | path->mnt = mounted; | 968 | * the components of that value change under us */ |
923 | path->dentry = dget(mounted->mnt_root); | 969 | while (managed = ACCESS_ONCE(path->dentry->d_flags), |
970 | managed &= DCACHE_MANAGED_DENTRY, | ||
971 | unlikely(managed != 0)) { | ||
972 | /* Allow the filesystem to manage the transit without i_mutex | ||
973 | * being held. */ | ||
974 | if (managed & DCACHE_MANAGE_TRANSIT) { | ||
975 | BUG_ON(!path->dentry->d_op); | ||
976 | BUG_ON(!path->dentry->d_op->d_manage); | ||
977 | ret = path->dentry->d_op->d_manage(path->dentry, | ||
978 | false, false); | ||
979 | if (ret < 0) | ||
980 | return ret == -EISDIR ? 0 : ret; | ||
981 | } | ||
982 | |||
983 | /* Transit to a mounted filesystem. */ | ||
984 | if (managed & DCACHE_MOUNTED) { | ||
985 | struct vfsmount *mounted = lookup_mnt(path); | ||
986 | if (mounted) { | ||
987 | dput(path->dentry); | ||
988 | if (need_mntput) | ||
989 | mntput(path->mnt); | ||
990 | path->mnt = mounted; | ||
991 | path->dentry = dget(mounted->mnt_root); | ||
992 | need_mntput = true; | ||
993 | continue; | ||
994 | } | ||
995 | |||
996 | /* Something is mounted on this dentry in another | ||
997 | * namespace and/or whatever was mounted there in this | ||
998 | * namespace got unmounted before we managed to get the | ||
999 | * vfsmount_lock */ | ||
1000 | } | ||
1001 | |||
1002 | /* Handle an automount point */ | ||
1003 | if (managed & DCACHE_NEED_AUTOMOUNT) { | ||
1004 | ret = follow_automount(path, flags, &need_mntput); | ||
1005 | if (ret < 0) | ||
1006 | return ret == -EISDIR ? 0 : ret; | ||
1007 | continue; | ||
1008 | } | ||
1009 | |||
1010 | /* We didn't change the current path point */ | ||
1011 | break; | ||
924 | } | 1012 | } |
1013 | return 0; | ||
925 | } | 1014 | } |
926 | 1015 | ||
927 | int follow_down(struct path *path) | 1016 | int follow_down_one(struct path *path) |
928 | { | 1017 | { |
929 | struct vfsmount *mounted; | 1018 | struct vfsmount *mounted; |
930 | 1019 | ||
@@ -939,13 +1028,41 @@ int follow_down(struct path *path) | |||
939 | return 0; | 1028 | return 0; |
940 | } | 1029 | } |
941 | 1030 | ||
1031 | /* | ||
1032 | * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we | ||
1033 | * meet a managed dentry and we're not walking to "..". True is returned to | ||
1034 | * continue, false to abort. | ||
1035 | */ | ||
1036 | static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, | ||
1037 | struct inode **inode, bool reverse_transit) | ||
1038 | { | ||
1039 | while (d_mountpoint(path->dentry)) { | ||
1040 | struct vfsmount *mounted; | ||
1041 | if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) && | ||
1042 | !reverse_transit && | ||
1043 | path->dentry->d_op->d_manage(path->dentry, false, true) < 0) | ||
1044 | return false; | ||
1045 | mounted = __lookup_mnt(path->mnt, path->dentry, 1); | ||
1046 | if (!mounted) | ||
1047 | break; | ||
1048 | path->mnt = mounted; | ||
1049 | path->dentry = mounted->mnt_root; | ||
1050 | nd->seq = read_seqcount_begin(&path->dentry->d_seq); | ||
1051 | *inode = path->dentry->d_inode; | ||
1052 | } | ||
1053 | |||
1054 | if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) | ||
1055 | return reverse_transit; | ||
1056 | return true; | ||
1057 | } | ||
1058 | |||
942 | static int follow_dotdot_rcu(struct nameidata *nd) | 1059 | static int follow_dotdot_rcu(struct nameidata *nd) |
943 | { | 1060 | { |
944 | struct inode *inode = nd->inode; | 1061 | struct inode *inode = nd->inode; |
945 | 1062 | ||
946 | set_root_rcu(nd); | 1063 | set_root_rcu(nd); |
947 | 1064 | ||
948 | while(1) { | 1065 | while (1) { |
949 | if (nd->path.dentry == nd->root.dentry && | 1066 | if (nd->path.dentry == nd->root.dentry && |
950 | nd->path.mnt == nd->root.mnt) { | 1067 | nd->path.mnt == nd->root.mnt) { |
951 | break; | 1068 | break; |
@@ -968,12 +1085,80 @@ static int follow_dotdot_rcu(struct nameidata *nd) | |||
968 | nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); | 1085 | nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); |
969 | inode = nd->path.dentry->d_inode; | 1086 | inode = nd->path.dentry->d_inode; |
970 | } | 1087 | } |
971 | __follow_mount_rcu(nd, &nd->path, &inode); | 1088 | __follow_mount_rcu(nd, &nd->path, &inode, true); |
972 | nd->inode = inode; | 1089 | nd->inode = inode; |
973 | 1090 | ||
974 | return 0; | 1091 | return 0; |
975 | } | 1092 | } |
976 | 1093 | ||
1094 | /* | ||
1095 | * Follow down to the covering mount currently visible to userspace. At each | ||
1096 | * point, the filesystem owning that dentry may be queried as to whether the | ||
1097 | * caller is permitted to proceed or not. | ||
1098 | * | ||
1099 | * Care must be taken as namespace_sem may be held (indicated by mounting_here | ||
1100 | * being true). | ||
1101 | */ | ||
1102 | int follow_down(struct path *path, bool mounting_here) | ||
1103 | { | ||
1104 | unsigned managed; | ||
1105 | int ret; | ||
1106 | |||
1107 | while (managed = ACCESS_ONCE(path->dentry->d_flags), | ||
1108 | unlikely(managed & DCACHE_MANAGED_DENTRY)) { | ||
1109 | /* Allow the filesystem to manage the transit without i_mutex | ||
1110 | * being held. | ||
1111 | * | ||
1112 | * We indicate to the filesystem if someone is trying to mount | ||
1113 | * something here. This gives autofs the chance to deny anyone | ||
1114 | * other than its daemon the right to mount on its | ||
1115 | * superstructure. | ||
1116 | * | ||
1117 | * The filesystem may sleep at this point. | ||
1118 | */ | ||
1119 | if (managed & DCACHE_MANAGE_TRANSIT) { | ||
1120 | BUG_ON(!path->dentry->d_op); | ||
1121 | BUG_ON(!path->dentry->d_op->d_manage); | ||
1122 | ret = path->dentry->d_op->d_manage( | ||
1123 | path->dentry, mounting_here, false); | ||
1124 | if (ret < 0) | ||
1125 | return ret == -EISDIR ? 0 : ret; | ||
1126 | } | ||
1127 | |||
1128 | /* Transit to a mounted filesystem. */ | ||
1129 | if (managed & DCACHE_MOUNTED) { | ||
1130 | struct vfsmount *mounted = lookup_mnt(path); | ||
1131 | if (!mounted) | ||
1132 | break; | ||
1133 | dput(path->dentry); | ||
1134 | mntput(path->mnt); | ||
1135 | path->mnt = mounted; | ||
1136 | path->dentry = dget(mounted->mnt_root); | ||
1137 | continue; | ||
1138 | } | ||
1139 | |||
1140 | /* Don't handle automount points here */ | ||
1141 | break; | ||
1142 | } | ||
1143 | return 0; | ||
1144 | } | ||
1145 | |||
1146 | /* | ||
1147 | * Skip to top of mountpoint pile in refwalk mode for follow_dotdot() | ||
1148 | */ | ||
1149 | static void follow_mount(struct path *path) | ||
1150 | { | ||
1151 | while (d_mountpoint(path->dentry)) { | ||
1152 | struct vfsmount *mounted = lookup_mnt(path); | ||
1153 | if (!mounted) | ||
1154 | break; | ||
1155 | dput(path->dentry); | ||
1156 | mntput(path->mnt); | ||
1157 | path->mnt = mounted; | ||
1158 | path->dentry = dget(mounted->mnt_root); | ||
1159 | } | ||
1160 | } | ||
1161 | |||
977 | static void follow_dotdot(struct nameidata *nd) | 1162 | static void follow_dotdot(struct nameidata *nd) |
978 | { | 1163 | { |
979 | set_root(nd); | 1164 | set_root(nd); |
@@ -1038,12 +1223,14 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, | |||
1038 | struct vfsmount *mnt = nd->path.mnt; | 1223 | struct vfsmount *mnt = nd->path.mnt; |
1039 | struct dentry *dentry, *parent = nd->path.dentry; | 1224 | struct dentry *dentry, *parent = nd->path.dentry; |
1040 | struct inode *dir; | 1225 | struct inode *dir; |
1226 | int err; | ||
1227 | |||
1041 | /* | 1228 | /* |
1042 | * See if the low-level filesystem might want | 1229 | * See if the low-level filesystem might want |
1043 | * to use its own hash.. | 1230 | * to use its own hash.. |
1044 | */ | 1231 | */ |
1045 | if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { | 1232 | if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { |
1046 | int err = parent->d_op->d_hash(parent, nd->inode, name); | 1233 | err = parent->d_op->d_hash(parent, nd->inode, name); |
1047 | if (err < 0) | 1234 | if (err < 0) |
1048 | return err; | 1235 | return err; |
1049 | } | 1236 | } |
@@ -1070,22 +1257,30 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, | |||
1070 | nd->seq = seq; | 1257 | nd->seq = seq; |
1071 | if (dentry->d_flags & DCACHE_OP_REVALIDATE) | 1258 | if (dentry->d_flags & DCACHE_OP_REVALIDATE) |
1072 | goto need_revalidate; | 1259 | goto need_revalidate; |
1260 | done2: | ||
1073 | path->mnt = mnt; | 1261 | path->mnt = mnt; |
1074 | path->dentry = dentry; | 1262 | path->dentry = dentry; |
1075 | __follow_mount_rcu(nd, path, inode); | 1263 | if (likely(__follow_mount_rcu(nd, path, inode, false))) |
1076 | } else { | 1264 | return 0; |
1077 | dentry = __d_lookup(parent, name); | 1265 | if (nameidata_drop_rcu(nd)) |
1078 | if (!dentry) | 1266 | return -ECHILD; |
1079 | goto need_lookup; | 1267 | /* fallthru */ |
1268 | } | ||
1269 | dentry = __d_lookup(parent, name); | ||
1270 | if (!dentry) | ||
1271 | goto need_lookup; | ||
1080 | found: | 1272 | found: |
1081 | if (dentry->d_flags & DCACHE_OP_REVALIDATE) | 1273 | if (dentry->d_flags & DCACHE_OP_REVALIDATE) |
1082 | goto need_revalidate; | 1274 | goto need_revalidate; |
1083 | done: | 1275 | done: |
1084 | path->mnt = mnt; | 1276 | path->mnt = mnt; |
1085 | path->dentry = dentry; | 1277 | path->dentry = dentry; |
1086 | __follow_mount(path); | 1278 | err = follow_managed(path, nd->flags); |
1087 | *inode = path->dentry->d_inode; | 1279 | if (unlikely(err < 0)) { |
1088 | } | 1280 | path_put_conditional(path, nd); |
1281 | return err; | ||
1282 | } | ||
1283 | *inode = path->dentry->d_inode; | ||
1089 | return 0; | 1284 | return 0; |
1090 | 1285 | ||
1091 | need_lookup: | 1286 | need_lookup: |
@@ -1124,6 +1319,8 @@ need_revalidate: | |||
1124 | goto need_lookup; | 1319 | goto need_lookup; |
1125 | if (IS_ERR(dentry)) | 1320 | if (IS_ERR(dentry)) |
1126 | goto fail; | 1321 | goto fail; |
1322 | if (nd->flags & LOOKUP_RCU) | ||
1323 | goto done2; | ||
1127 | goto done; | 1324 | goto done; |
1128 | 1325 | ||
1129 | fail: | 1326 | fail: |
@@ -1131,17 +1328,6 @@ fail: | |||
1131 | } | 1328 | } |
1132 | 1329 | ||
1133 | /* | 1330 | /* |
1134 | * This is a temporary kludge to deal with "automount" symlinks; proper | ||
1135 | * solution is to trigger them on follow_mount(), so that do_lookup() | ||
1136 | * would DTRT. To be killed before 2.6.34-final. | ||
1137 | */ | ||
1138 | static inline int follow_on_final(struct inode *inode, unsigned lookup_flags) | ||
1139 | { | ||
1140 | return inode && unlikely(inode->i_op->follow_link) && | ||
1141 | ((lookup_flags & LOOKUP_FOLLOW) || S_ISDIR(inode->i_mode)); | ||
1142 | } | ||
1143 | |||
1144 | /* | ||
1145 | * Name resolution. | 1331 | * Name resolution. |
1146 | * This is the basic name resolution function, turning a pathname into | 1332 | * This is the basic name resolution function, turning a pathname into |
1147 | * the final dentry. We expect 'base' to be positive and a directory. | 1333 | * the final dentry. We expect 'base' to be positive and a directory. |
@@ -1279,7 +1465,8 @@ last_component: | |||
1279 | err = do_lookup(nd, &this, &next, &inode); | 1465 | err = do_lookup(nd, &this, &next, &inode); |
1280 | if (err) | 1466 | if (err) |
1281 | break; | 1467 | break; |
1282 | if (follow_on_final(inode, lookup_flags)) { | 1468 | if (inode && unlikely(inode->i_op->follow_link) && |
1469 | (lookup_flags & LOOKUP_FOLLOW)) { | ||
1283 | if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry)) | 1470 | if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry)) |
1284 | return -ECHILD; | 1471 | return -ECHILD; |
1285 | BUG_ON(inode != next.dentry->d_inode); | 1472 | BUG_ON(inode != next.dentry->d_inode); |
@@ -2082,8 +2269,6 @@ static struct file *finish_open(struct nameidata *nd, | |||
2082 | return filp; | 2269 | return filp; |
2083 | 2270 | ||
2084 | exit: | 2271 | exit: |
2085 | if (!IS_ERR(nd->intent.open.file)) | ||
2086 | release_open_intent(nd); | ||
2087 | path_put(&nd->path); | 2272 | path_put(&nd->path); |
2088 | return ERR_PTR(error); | 2273 | return ERR_PTR(error); |
2089 | } | 2274 | } |
@@ -2105,11 +2290,13 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
2105 | dir = nd->path.dentry; | 2290 | dir = nd->path.dentry; |
2106 | case LAST_DOT: | 2291 | case LAST_DOT: |
2107 | if (need_reval_dot(dir)) { | 2292 | if (need_reval_dot(dir)) { |
2108 | error = d_revalidate(nd->path.dentry, nd); | 2293 | int status = d_revalidate(nd->path.dentry, nd); |
2109 | if (!error) | 2294 | if (!status) |
2110 | error = -ESTALE; | 2295 | status = -ESTALE; |
2111 | if (error < 0) | 2296 | if (status < 0) { |
2297 | error = status; | ||
2112 | goto exit; | 2298 | goto exit; |
2299 | } | ||
2113 | } | 2300 | } |
2114 | /* fallthrough */ | 2301 | /* fallthrough */ |
2115 | case LAST_ROOT: | 2302 | case LAST_ROOT: |
@@ -2179,11 +2366,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
2179 | if (open_flag & O_EXCL) | 2366 | if (open_flag & O_EXCL) |
2180 | goto exit_dput; | 2367 | goto exit_dput; |
2181 | 2368 | ||
2182 | if (__follow_mount(path)) { | 2369 | error = follow_managed(path, nd->flags); |
2183 | error = -ELOOP; | 2370 | if (error < 0) |
2184 | if (open_flag & O_NOFOLLOW) | 2371 | goto exit_dput; |
2185 | goto exit_dput; | ||
2186 | } | ||
2187 | 2372 | ||
2188 | error = -ENOENT; | 2373 | error = -ENOENT; |
2189 | if (!path->dentry->d_inode) | 2374 | if (!path->dentry->d_inode) |
@@ -2206,8 +2391,6 @@ exit_mutex_unlock: | |||
2206 | exit_dput: | 2391 | exit_dput: |
2207 | path_put_conditional(path, nd); | 2392 | path_put_conditional(path, nd); |
2208 | exit: | 2393 | exit: |
2209 | if (!IS_ERR(nd->intent.open.file)) | ||
2210 | release_open_intent(nd); | ||
2211 | path_put(&nd->path); | 2394 | path_put(&nd->path); |
2212 | return ERR_PTR(error); | 2395 | return ERR_PTR(error); |
2213 | } | 2396 | } |
@@ -2294,6 +2477,7 @@ struct file *do_filp_open(int dfd, const char *pathname, | |||
2294 | } | 2477 | } |
2295 | audit_inode(pathname, nd.path.dentry); | 2478 | audit_inode(pathname, nd.path.dentry); |
2296 | filp = finish_open(&nd, open_flag, acc_mode); | 2479 | filp = finish_open(&nd, open_flag, acc_mode); |
2480 | release_open_intent(&nd); | ||
2297 | return filp; | 2481 | return filp; |
2298 | 2482 | ||
2299 | creat: | 2483 | creat: |
@@ -2328,11 +2512,11 @@ reval: | |||
2328 | nd.flags = flags; | 2512 | nd.flags = flags; |
2329 | filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); | 2513 | filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); |
2330 | while (unlikely(!filp)) { /* trailing symlink */ | 2514 | while (unlikely(!filp)) { /* trailing symlink */ |
2331 | struct path holder; | 2515 | struct path link = path; |
2516 | struct inode *linki = link.dentry->d_inode; | ||
2332 | void *cookie; | 2517 | void *cookie; |
2333 | error = -ELOOP; | 2518 | error = -ELOOP; |
2334 | /* S_ISDIR part is a temporary automount kludge */ | 2519 | if (!(nd.flags & LOOKUP_FOLLOW)) |
2335 | if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(nd.inode->i_mode)) | ||
2336 | goto exit_dput; | 2520 | goto exit_dput; |
2337 | if (count++ == 32) | 2521 | if (count++ == 32) |
2338 | goto exit_dput; | 2522 | goto exit_dput; |
@@ -2348,29 +2532,29 @@ reval: | |||
2348 | * just set LAST_BIND. | 2532 | * just set LAST_BIND. |
2349 | */ | 2533 | */ |
2350 | nd.flags |= LOOKUP_PARENT; | 2534 | nd.flags |= LOOKUP_PARENT; |
2351 | error = security_inode_follow_link(path.dentry, &nd); | 2535 | error = security_inode_follow_link(link.dentry, &nd); |
2352 | if (error) | 2536 | if (error) |
2353 | goto exit_dput; | 2537 | goto exit_dput; |
2354 | error = __do_follow_link(&path, &nd, &cookie); | 2538 | error = __do_follow_link(&link, &nd, &cookie); |
2355 | if (unlikely(error)) { | 2539 | if (unlikely(error)) { |
2356 | if (!IS_ERR(cookie) && nd.inode->i_op->put_link) | 2540 | if (!IS_ERR(cookie) && linki->i_op->put_link) |
2357 | nd.inode->i_op->put_link(path.dentry, &nd, cookie); | 2541 | linki->i_op->put_link(link.dentry, &nd, cookie); |
2358 | /* nd.path had been dropped */ | 2542 | /* nd.path had been dropped */ |
2359 | nd.path = path; | 2543 | nd.path = link; |
2360 | goto out_path; | 2544 | goto out_path; |
2361 | } | 2545 | } |
2362 | holder = path; | ||
2363 | nd.flags &= ~LOOKUP_PARENT; | 2546 | nd.flags &= ~LOOKUP_PARENT; |
2364 | filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); | 2547 | filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); |
2365 | if (nd.inode->i_op->put_link) | 2548 | if (linki->i_op->put_link) |
2366 | nd.inode->i_op->put_link(holder.dentry, &nd, cookie); | 2549 | linki->i_op->put_link(link.dentry, &nd, cookie); |
2367 | path_put(&holder); | 2550 | path_put(&link); |
2368 | } | 2551 | } |
2369 | out: | 2552 | out: |
2370 | if (nd.root.mnt) | 2553 | if (nd.root.mnt) |
2371 | path_put(&nd.root); | 2554 | path_put(&nd.root); |
2372 | if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) | 2555 | if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) |
2373 | goto reval; | 2556 | goto reval; |
2557 | release_open_intent(&nd); | ||
2374 | return filp; | 2558 | return filp; |
2375 | 2559 | ||
2376 | exit_dput: | 2560 | exit_dput: |
@@ -2378,8 +2562,6 @@ exit_dput: | |||
2378 | out_path: | 2562 | out_path: |
2379 | path_put(&nd.path); | 2563 | path_put(&nd.path); |
2380 | out_filp: | 2564 | out_filp: |
2381 | if (!IS_ERR(nd.intent.open.file)) | ||
2382 | release_open_intent(&nd); | ||
2383 | filp = ERR_PTR(error); | 2565 | filp = ERR_PTR(error); |
2384 | goto out; | 2566 | goto out; |
2385 | } | 2567 | } |
@@ -3392,6 +3574,7 @@ const struct inode_operations page_symlink_inode_operations = { | |||
3392 | }; | 3574 | }; |
3393 | 3575 | ||
3394 | EXPORT_SYMBOL(user_path_at); | 3576 | EXPORT_SYMBOL(user_path_at); |
3577 | EXPORT_SYMBOL(follow_down_one); | ||
3395 | EXPORT_SYMBOL(follow_down); | 3578 | EXPORT_SYMBOL(follow_down); |
3396 | EXPORT_SYMBOL(follow_up); | 3579 | EXPORT_SYMBOL(follow_up); |
3397 | EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ | 3580 | EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ |
diff --git a/fs/namespace.c b/fs/namespace.c index 3ddfd9046c44..7b0b95371696 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -183,7 +183,7 @@ static inline void mnt_dec_count(struct vfsmount *mnt) | |||
183 | unsigned int mnt_get_count(struct vfsmount *mnt) | 183 | unsigned int mnt_get_count(struct vfsmount *mnt) |
184 | { | 184 | { |
185 | #ifdef CONFIG_SMP | 185 | #ifdef CONFIG_SMP |
186 | unsigned int count = atomic_read(&mnt->mnt_longrefs); | 186 | unsigned int count = 0; |
187 | int cpu; | 187 | int cpu; |
188 | 188 | ||
189 | for_each_possible_cpu(cpu) { | 189 | for_each_possible_cpu(cpu) { |
@@ -217,7 +217,7 @@ struct vfsmount *alloc_vfsmnt(const char *name) | |||
217 | if (!mnt->mnt_pcp) | 217 | if (!mnt->mnt_pcp) |
218 | goto out_free_devname; | 218 | goto out_free_devname; |
219 | 219 | ||
220 | atomic_set(&mnt->mnt_longrefs, 1); | 220 | this_cpu_add(mnt->mnt_pcp->mnt_count, 1); |
221 | #else | 221 | #else |
222 | mnt->mnt_count = 1; | 222 | mnt->mnt_count = 1; |
223 | mnt->mnt_writers = 0; | 223 | mnt->mnt_writers = 0; |
@@ -611,6 +611,21 @@ static void attach_mnt(struct vfsmount *mnt, struct path *path) | |||
611 | list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts); | 611 | list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts); |
612 | } | 612 | } |
613 | 613 | ||
614 | static inline void __mnt_make_longterm(struct vfsmount *mnt) | ||
615 | { | ||
616 | #ifdef CONFIG_SMP | ||
617 | atomic_inc(&mnt->mnt_longterm); | ||
618 | #endif | ||
619 | } | ||
620 | |||
621 | /* needs vfsmount lock for write */ | ||
622 | static inline void __mnt_make_shortterm(struct vfsmount *mnt) | ||
623 | { | ||
624 | #ifdef CONFIG_SMP | ||
625 | atomic_dec(&mnt->mnt_longterm); | ||
626 | #endif | ||
627 | } | ||
628 | |||
614 | /* | 629 | /* |
615 | * vfsmount lock must be held for write | 630 | * vfsmount lock must be held for write |
616 | */ | 631 | */ |
@@ -624,8 +639,11 @@ static void commit_tree(struct vfsmount *mnt) | |||
624 | BUG_ON(parent == mnt); | 639 | BUG_ON(parent == mnt); |
625 | 640 | ||
626 | list_add_tail(&head, &mnt->mnt_list); | 641 | list_add_tail(&head, &mnt->mnt_list); |
627 | list_for_each_entry(m, &head, mnt_list) | 642 | list_for_each_entry(m, &head, mnt_list) { |
628 | m->mnt_ns = n; | 643 | m->mnt_ns = n; |
644 | __mnt_make_longterm(m); | ||
645 | } | ||
646 | |||
629 | list_splice(&head, n->list.prev); | 647 | list_splice(&head, n->list.prev); |
630 | 648 | ||
631 | list_add_tail(&mnt->mnt_hash, mount_hashtable + | 649 | list_add_tail(&mnt->mnt_hash, mount_hashtable + |
@@ -734,51 +752,30 @@ static inline void mntfree(struct vfsmount *mnt) | |||
734 | deactivate_super(sb); | 752 | deactivate_super(sb); |
735 | } | 753 | } |
736 | 754 | ||
737 | #ifdef CONFIG_SMP | 755 | static void mntput_no_expire(struct vfsmount *mnt) |
738 | static inline void __mntput(struct vfsmount *mnt, int longrefs) | ||
739 | { | 756 | { |
740 | if (!longrefs) { | ||
741 | put_again: | 757 | put_again: |
742 | br_read_lock(vfsmount_lock); | 758 | #ifdef CONFIG_SMP |
743 | if (likely(atomic_read(&mnt->mnt_longrefs))) { | 759 | br_read_lock(vfsmount_lock); |
744 | mnt_dec_count(mnt); | 760 | if (likely(atomic_read(&mnt->mnt_longterm))) { |
745 | br_read_unlock(vfsmount_lock); | 761 | mnt_dec_count(mnt); |
746 | return; | ||
747 | } | ||
748 | br_read_unlock(vfsmount_lock); | 762 | br_read_unlock(vfsmount_lock); |
749 | } else { | 763 | return; |
750 | BUG_ON(!atomic_read(&mnt->mnt_longrefs)); | ||
751 | if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1)) | ||
752 | return; | ||
753 | } | 764 | } |
765 | br_read_unlock(vfsmount_lock); | ||
754 | 766 | ||
755 | br_write_lock(vfsmount_lock); | 767 | br_write_lock(vfsmount_lock); |
756 | if (!longrefs) | 768 | mnt_dec_count(mnt); |
757 | mnt_dec_count(mnt); | ||
758 | else | ||
759 | atomic_dec(&mnt->mnt_longrefs); | ||
760 | if (mnt_get_count(mnt)) { | 769 | if (mnt_get_count(mnt)) { |
761 | br_write_unlock(vfsmount_lock); | 770 | br_write_unlock(vfsmount_lock); |
762 | return; | 771 | return; |
763 | } | 772 | } |
764 | if (unlikely(mnt->mnt_pinned)) { | ||
765 | mnt_add_count(mnt, mnt->mnt_pinned + 1); | ||
766 | mnt->mnt_pinned = 0; | ||
767 | br_write_unlock(vfsmount_lock); | ||
768 | acct_auto_close_mnt(mnt); | ||
769 | goto put_again; | ||
770 | } | ||
771 | br_write_unlock(vfsmount_lock); | ||
772 | mntfree(mnt); | ||
773 | } | ||
774 | #else | 773 | #else |
775 | static inline void __mntput(struct vfsmount *mnt, int longrefs) | ||
776 | { | ||
777 | put_again: | ||
778 | mnt_dec_count(mnt); | 774 | mnt_dec_count(mnt); |
779 | if (likely(mnt_get_count(mnt))) | 775 | if (likely(mnt_get_count(mnt))) |
780 | return; | 776 | return; |
781 | br_write_lock(vfsmount_lock); | 777 | br_write_lock(vfsmount_lock); |
778 | #endif | ||
782 | if (unlikely(mnt->mnt_pinned)) { | 779 | if (unlikely(mnt->mnt_pinned)) { |
783 | mnt_add_count(mnt, mnt->mnt_pinned + 1); | 780 | mnt_add_count(mnt, mnt->mnt_pinned + 1); |
784 | mnt->mnt_pinned = 0; | 781 | mnt->mnt_pinned = 0; |
@@ -789,12 +786,6 @@ put_again: | |||
789 | br_write_unlock(vfsmount_lock); | 786 | br_write_unlock(vfsmount_lock); |
790 | mntfree(mnt); | 787 | mntfree(mnt); |
791 | } | 788 | } |
792 | #endif | ||
793 | |||
794 | static void mntput_no_expire(struct vfsmount *mnt) | ||
795 | { | ||
796 | __mntput(mnt, 0); | ||
797 | } | ||
798 | 789 | ||
799 | void mntput(struct vfsmount *mnt) | 790 | void mntput(struct vfsmount *mnt) |
800 | { | 791 | { |
@@ -802,7 +793,7 @@ void mntput(struct vfsmount *mnt) | |||
802 | /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ | 793 | /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ |
803 | if (unlikely(mnt->mnt_expiry_mark)) | 794 | if (unlikely(mnt->mnt_expiry_mark)) |
804 | mnt->mnt_expiry_mark = 0; | 795 | mnt->mnt_expiry_mark = 0; |
805 | __mntput(mnt, 0); | 796 | mntput_no_expire(mnt); |
806 | } | 797 | } |
807 | } | 798 | } |
808 | EXPORT_SYMBOL(mntput); | 799 | EXPORT_SYMBOL(mntput); |
@@ -815,33 +806,6 @@ struct vfsmount *mntget(struct vfsmount *mnt) | |||
815 | } | 806 | } |
816 | EXPORT_SYMBOL(mntget); | 807 | EXPORT_SYMBOL(mntget); |
817 | 808 | ||
818 | void mntput_long(struct vfsmount *mnt) | ||
819 | { | ||
820 | #ifdef CONFIG_SMP | ||
821 | if (mnt) { | ||
822 | /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ | ||
823 | if (unlikely(mnt->mnt_expiry_mark)) | ||
824 | mnt->mnt_expiry_mark = 0; | ||
825 | __mntput(mnt, 1); | ||
826 | } | ||
827 | #else | ||
828 | mntput(mnt); | ||
829 | #endif | ||
830 | } | ||
831 | EXPORT_SYMBOL(mntput_long); | ||
832 | |||
833 | struct vfsmount *mntget_long(struct vfsmount *mnt) | ||
834 | { | ||
835 | #ifdef CONFIG_SMP | ||
836 | if (mnt) | ||
837 | atomic_inc(&mnt->mnt_longrefs); | ||
838 | return mnt; | ||
839 | #else | ||
840 | return mntget(mnt); | ||
841 | #endif | ||
842 | } | ||
843 | EXPORT_SYMBOL(mntget_long); | ||
844 | |||
845 | void mnt_pin(struct vfsmount *mnt) | 809 | void mnt_pin(struct vfsmount *mnt) |
846 | { | 810 | { |
847 | br_write_lock(vfsmount_lock); | 811 | br_write_lock(vfsmount_lock); |
@@ -1216,7 +1180,7 @@ void release_mounts(struct list_head *head) | |||
1216 | dput(dentry); | 1180 | dput(dentry); |
1217 | mntput(m); | 1181 | mntput(m); |
1218 | } | 1182 | } |
1219 | mntput_long(mnt); | 1183 | mntput(mnt); |
1220 | } | 1184 | } |
1221 | } | 1185 | } |
1222 | 1186 | ||
@@ -1226,19 +1190,21 @@ void release_mounts(struct list_head *head) | |||
1226 | */ | 1190 | */ |
1227 | void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) | 1191 | void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) |
1228 | { | 1192 | { |
1193 | LIST_HEAD(tmp_list); | ||
1229 | struct vfsmount *p; | 1194 | struct vfsmount *p; |
1230 | 1195 | ||
1231 | for (p = mnt; p; p = next_mnt(p, mnt)) | 1196 | for (p = mnt; p; p = next_mnt(p, mnt)) |
1232 | list_move(&p->mnt_hash, kill); | 1197 | list_move(&p->mnt_hash, &tmp_list); |
1233 | 1198 | ||
1234 | if (propagate) | 1199 | if (propagate) |
1235 | propagate_umount(kill); | 1200 | propagate_umount(&tmp_list); |
1236 | 1201 | ||
1237 | list_for_each_entry(p, kill, mnt_hash) { | 1202 | list_for_each_entry(p, &tmp_list, mnt_hash) { |
1238 | list_del_init(&p->mnt_expire); | 1203 | list_del_init(&p->mnt_expire); |
1239 | list_del_init(&p->mnt_list); | 1204 | list_del_init(&p->mnt_list); |
1240 | __touch_mnt_namespace(p->mnt_ns); | 1205 | __touch_mnt_namespace(p->mnt_ns); |
1241 | p->mnt_ns = NULL; | 1206 | p->mnt_ns = NULL; |
1207 | __mnt_make_shortterm(p); | ||
1242 | list_del_init(&p->mnt_child); | 1208 | list_del_init(&p->mnt_child); |
1243 | if (p->mnt_parent != p) { | 1209 | if (p->mnt_parent != p) { |
1244 | p->mnt_parent->mnt_ghosts++; | 1210 | p->mnt_parent->mnt_ghosts++; |
@@ -1246,6 +1212,7 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) | |||
1246 | } | 1212 | } |
1247 | change_mnt_propagation(p, MS_PRIVATE); | 1213 | change_mnt_propagation(p, MS_PRIVATE); |
1248 | } | 1214 | } |
1215 | list_splice(&tmp_list, kill); | ||
1249 | } | 1216 | } |
1250 | 1217 | ||
1251 | static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts); | 1218 | static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts); |
@@ -1844,9 +1811,10 @@ static int do_move_mount(struct path *path, char *old_name) | |||
1844 | return err; | 1811 | return err; |
1845 | 1812 | ||
1846 | down_write(&namespace_sem); | 1813 | down_write(&namespace_sem); |
1847 | while (d_mountpoint(path->dentry) && | 1814 | err = follow_down(path, true); |
1848 | follow_down(path)) | 1815 | if (err < 0) |
1849 | ; | 1816 | goto out; |
1817 | |||
1850 | err = -EINVAL; | 1818 | err = -EINVAL; |
1851 | if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) | 1819 | if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) |
1852 | goto out; | 1820 | goto out; |
@@ -1904,6 +1872,8 @@ out: | |||
1904 | return err; | 1872 | return err; |
1905 | } | 1873 | } |
1906 | 1874 | ||
1875 | static int do_add_mount(struct vfsmount *, struct path *, int); | ||
1876 | |||
1907 | /* | 1877 | /* |
1908 | * create a new mount for userspace and request it to be added into the | 1878 | * create a new mount for userspace and request it to be added into the |
1909 | * namespace's tree | 1879 | * namespace's tree |
@@ -1912,6 +1882,7 @@ static int do_new_mount(struct path *path, char *type, int flags, | |||
1912 | int mnt_flags, char *name, void *data) | 1882 | int mnt_flags, char *name, void *data) |
1913 | { | 1883 | { |
1914 | struct vfsmount *mnt; | 1884 | struct vfsmount *mnt; |
1885 | int err; | ||
1915 | 1886 | ||
1916 | if (!type) | 1887 | if (!type) |
1917 | return -EINVAL; | 1888 | return -EINVAL; |
@@ -1924,15 +1895,47 @@ static int do_new_mount(struct path *path, char *type, int flags, | |||
1924 | if (IS_ERR(mnt)) | 1895 | if (IS_ERR(mnt)) |
1925 | return PTR_ERR(mnt); | 1896 | return PTR_ERR(mnt); |
1926 | 1897 | ||
1927 | return do_add_mount(mnt, path, mnt_flags, NULL); | 1898 | err = do_add_mount(mnt, path, mnt_flags); |
1899 | if (err) | ||
1900 | mntput(mnt); | ||
1901 | return err; | ||
1902 | } | ||
1903 | |||
1904 | int finish_automount(struct vfsmount *m, struct path *path) | ||
1905 | { | ||
1906 | int err; | ||
1907 | /* The new mount record should have at least 2 refs to prevent it being | ||
1908 | * expired before we get a chance to add it | ||
1909 | */ | ||
1910 | BUG_ON(mnt_get_count(m) < 2); | ||
1911 | |||
1912 | if (m->mnt_sb == path->mnt->mnt_sb && | ||
1913 | m->mnt_root == path->dentry) { | ||
1914 | err = -ELOOP; | ||
1915 | goto fail; | ||
1916 | } | ||
1917 | |||
1918 | err = do_add_mount(m, path, path->mnt->mnt_flags | MNT_SHRINKABLE); | ||
1919 | if (!err) | ||
1920 | return 0; | ||
1921 | fail: | ||
1922 | /* remove m from any expiration list it may be on */ | ||
1923 | if (!list_empty(&m->mnt_expire)) { | ||
1924 | down_write(&namespace_sem); | ||
1925 | br_write_lock(vfsmount_lock); | ||
1926 | list_del_init(&m->mnt_expire); | ||
1927 | br_write_unlock(vfsmount_lock); | ||
1928 | up_write(&namespace_sem); | ||
1929 | } | ||
1930 | mntput(m); | ||
1931 | mntput(m); | ||
1932 | return err; | ||
1928 | } | 1933 | } |
1929 | 1934 | ||
1930 | /* | 1935 | /* |
1931 | * add a mount into a namespace's mount tree | 1936 | * add a mount into a namespace's mount tree |
1932 | * - provide the option of adding the new mount to an expiration list | ||
1933 | */ | 1937 | */ |
1934 | int do_add_mount(struct vfsmount *newmnt, struct path *path, | 1938 | static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags) |
1935 | int mnt_flags, struct list_head *fslist) | ||
1936 | { | 1939 | { |
1937 | int err; | 1940 | int err; |
1938 | 1941 | ||
@@ -1940,9 +1943,10 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, | |||
1940 | 1943 | ||
1941 | down_write(&namespace_sem); | 1944 | down_write(&namespace_sem); |
1942 | /* Something was mounted here while we slept */ | 1945 | /* Something was mounted here while we slept */ |
1943 | while (d_mountpoint(path->dentry) && | 1946 | err = follow_down(path, true); |
1944 | follow_down(path)) | 1947 | if (err < 0) |
1945 | ; | 1948 | goto unlock; |
1949 | |||
1946 | err = -EINVAL; | 1950 | err = -EINVAL; |
1947 | if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt)) | 1951 | if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt)) |
1948 | goto unlock; | 1952 | goto unlock; |
@@ -1958,22 +1962,29 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, | |||
1958 | goto unlock; | 1962 | goto unlock; |
1959 | 1963 | ||
1960 | newmnt->mnt_flags = mnt_flags; | 1964 | newmnt->mnt_flags = mnt_flags; |
1961 | if ((err = graft_tree(newmnt, path))) | 1965 | err = graft_tree(newmnt, path); |
1962 | goto unlock; | ||
1963 | |||
1964 | if (fslist) /* add to the specified expiration list */ | ||
1965 | list_add_tail(&newmnt->mnt_expire, fslist); | ||
1966 | |||
1967 | up_write(&namespace_sem); | ||
1968 | return 0; | ||
1969 | 1966 | ||
1970 | unlock: | 1967 | unlock: |
1971 | up_write(&namespace_sem); | 1968 | up_write(&namespace_sem); |
1972 | mntput_long(newmnt); | ||
1973 | return err; | 1969 | return err; |
1974 | } | 1970 | } |
1975 | 1971 | ||
1976 | EXPORT_SYMBOL_GPL(do_add_mount); | 1972 | /** |
1973 | * mnt_set_expiry - Put a mount on an expiration list | ||
1974 | * @mnt: The mount to list. | ||
1975 | * @expiry_list: The list to add the mount to. | ||
1976 | */ | ||
1977 | void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) | ||
1978 | { | ||
1979 | down_write(&namespace_sem); | ||
1980 | br_write_lock(vfsmount_lock); | ||
1981 | |||
1982 | list_add_tail(&mnt->mnt_expire, expiry_list); | ||
1983 | |||
1984 | br_write_unlock(vfsmount_lock); | ||
1985 | up_write(&namespace_sem); | ||
1986 | } | ||
1987 | EXPORT_SYMBOL(mnt_set_expiry); | ||
1977 | 1988 | ||
1978 | /* | 1989 | /* |
1979 | * process a list of expirable mountpoints with the intent of discarding any | 1990 | * process a list of expirable mountpoints with the intent of discarding any |
@@ -2262,6 +2273,22 @@ static struct mnt_namespace *alloc_mnt_ns(void) | |||
2262 | return new_ns; | 2273 | return new_ns; |
2263 | } | 2274 | } |
2264 | 2275 | ||
2276 | void mnt_make_longterm(struct vfsmount *mnt) | ||
2277 | { | ||
2278 | __mnt_make_longterm(mnt); | ||
2279 | } | ||
2280 | |||
2281 | void mnt_make_shortterm(struct vfsmount *mnt) | ||
2282 | { | ||
2283 | #ifdef CONFIG_SMP | ||
2284 | if (atomic_add_unless(&mnt->mnt_longterm, -1, 1)) | ||
2285 | return; | ||
2286 | br_write_lock(vfsmount_lock); | ||
2287 | atomic_dec(&mnt->mnt_longterm); | ||
2288 | br_write_unlock(vfsmount_lock); | ||
2289 | #endif | ||
2290 | } | ||
2291 | |||
2265 | /* | 2292 | /* |
2266 | * Allocate a new namespace structure and populate it with contents | 2293 | * Allocate a new namespace structure and populate it with contents |
2267 | * copied from the namespace of the passed in task structure. | 2294 | * copied from the namespace of the passed in task structure. |
@@ -2299,14 +2326,19 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | |||
2299 | q = new_ns->root; | 2326 | q = new_ns->root; |
2300 | while (p) { | 2327 | while (p) { |
2301 | q->mnt_ns = new_ns; | 2328 | q->mnt_ns = new_ns; |
2329 | __mnt_make_longterm(q); | ||
2302 | if (fs) { | 2330 | if (fs) { |
2303 | if (p == fs->root.mnt) { | 2331 | if (p == fs->root.mnt) { |
2332 | fs->root.mnt = mntget(q); | ||
2333 | __mnt_make_longterm(q); | ||
2334 | mnt_make_shortterm(p); | ||
2304 | rootmnt = p; | 2335 | rootmnt = p; |
2305 | fs->root.mnt = mntget_long(q); | ||
2306 | } | 2336 | } |
2307 | if (p == fs->pwd.mnt) { | 2337 | if (p == fs->pwd.mnt) { |
2338 | fs->pwd.mnt = mntget(q); | ||
2339 | __mnt_make_longterm(q); | ||
2340 | mnt_make_shortterm(p); | ||
2308 | pwdmnt = p; | 2341 | pwdmnt = p; |
2309 | fs->pwd.mnt = mntget_long(q); | ||
2310 | } | 2342 | } |
2311 | } | 2343 | } |
2312 | p = next_mnt(p, mnt_ns->root); | 2344 | p = next_mnt(p, mnt_ns->root); |
@@ -2315,9 +2347,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | |||
2315 | up_write(&namespace_sem); | 2347 | up_write(&namespace_sem); |
2316 | 2348 | ||
2317 | if (rootmnt) | 2349 | if (rootmnt) |
2318 | mntput_long(rootmnt); | 2350 | mntput(rootmnt); |
2319 | if (pwdmnt) | 2351 | if (pwdmnt) |
2320 | mntput_long(pwdmnt); | 2352 | mntput(pwdmnt); |
2321 | 2353 | ||
2322 | return new_ns; | 2354 | return new_ns; |
2323 | } | 2355 | } |
@@ -2350,6 +2382,7 @@ struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt) | |||
2350 | new_ns = alloc_mnt_ns(); | 2382 | new_ns = alloc_mnt_ns(); |
2351 | if (!IS_ERR(new_ns)) { | 2383 | if (!IS_ERR(new_ns)) { |
2352 | mnt->mnt_ns = new_ns; | 2384 | mnt->mnt_ns = new_ns; |
2385 | __mnt_make_longterm(mnt); | ||
2353 | new_ns->root = mnt; | 2386 | new_ns->root = mnt; |
2354 | list_add(&new_ns->list, &new_ns->root->mnt_list); | 2387 | list_add(&new_ns->list, &new_ns->root->mnt_list); |
2355 | } | 2388 | } |
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 199016528fcb..e3d294269058 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -135,33 +135,6 @@ out_err: | |||
135 | 135 | ||
136 | #if defined(CONFIG_NFS_V4_1) | 136 | #if defined(CONFIG_NFS_V4_1) |
137 | /* | 137 | /* |
138 | * * CB_SEQUENCE operations will fail until the callback sessionid is set. | ||
139 | * */ | ||
140 | int nfs4_set_callback_sessionid(struct nfs_client *clp) | ||
141 | { | ||
142 | struct svc_serv *serv = clp->cl_rpcclient->cl_xprt->bc_serv; | ||
143 | struct nfs4_sessionid *bc_sid; | ||
144 | |||
145 | if (!serv->sv_bc_xprt) | ||
146 | return -EINVAL; | ||
147 | |||
148 | /* on success freed in xprt_free */ | ||
149 | bc_sid = kmalloc(sizeof(struct nfs4_sessionid), GFP_KERNEL); | ||
150 | if (!bc_sid) | ||
151 | return -ENOMEM; | ||
152 | memcpy(bc_sid->data, &clp->cl_session->sess_id.data, | ||
153 | NFS4_MAX_SESSIONID_LEN); | ||
154 | spin_lock_bh(&serv->sv_cb_lock); | ||
155 | serv->sv_bc_xprt->xpt_bc_sid = bc_sid; | ||
156 | spin_unlock_bh(&serv->sv_cb_lock); | ||
157 | dprintk("%s set xpt_bc_sid=%u:%u:%u:%u for sv_bc_xprt %p\n", __func__, | ||
158 | ((u32 *)bc_sid->data)[0], ((u32 *)bc_sid->data)[1], | ||
159 | ((u32 *)bc_sid->data)[2], ((u32 *)bc_sid->data)[3], | ||
160 | serv->sv_bc_xprt); | ||
161 | return 0; | ||
162 | } | ||
163 | |||
164 | /* | ||
165 | * The callback service for NFSv4.1 callbacks | 138 | * The callback service for NFSv4.1 callbacks |
166 | */ | 139 | */ |
167 | static int | 140 | static int |
@@ -266,10 +239,6 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, | |||
266 | struct nfs_callback_data *cb_info) | 239 | struct nfs_callback_data *cb_info) |
267 | { | 240 | { |
268 | } | 241 | } |
269 | int nfs4_set_callback_sessionid(struct nfs_client *clp) | ||
270 | { | ||
271 | return 0; | ||
272 | } | ||
273 | #endif /* CONFIG_NFS_V4_1 */ | 242 | #endif /* CONFIG_NFS_V4_1 */ |
274 | 243 | ||
275 | /* | 244 | /* |
@@ -359,78 +328,58 @@ void nfs_callback_down(int minorversion) | |||
359 | mutex_unlock(&nfs_callback_mutex); | 328 | mutex_unlock(&nfs_callback_mutex); |
360 | } | 329 | } |
361 | 330 | ||
362 | static int check_gss_callback_principal(struct nfs_client *clp, | 331 | /* Boolean check of RPC_AUTH_GSS principal */ |
363 | struct svc_rqst *rqstp) | 332 | int |
333 | check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) | ||
364 | { | 334 | { |
365 | struct rpc_clnt *r = clp->cl_rpcclient; | 335 | struct rpc_clnt *r = clp->cl_rpcclient; |
366 | char *p = svc_gss_principal(rqstp); | 336 | char *p = svc_gss_principal(rqstp); |
367 | 337 | ||
338 | if (rqstp->rq_authop->flavour != RPC_AUTH_GSS) | ||
339 | return 1; | ||
340 | |||
368 | /* No RPC_AUTH_GSS on NFSv4.1 back channel yet */ | 341 | /* No RPC_AUTH_GSS on NFSv4.1 back channel yet */ |
369 | if (clp->cl_minorversion != 0) | 342 | if (clp->cl_minorversion != 0) |
370 | return SVC_DROP; | 343 | return 0; |
371 | /* | 344 | /* |
372 | * It might just be a normal user principal, in which case | 345 | * It might just be a normal user principal, in which case |
373 | * userspace won't bother to tell us the name at all. | 346 | * userspace won't bother to tell us the name at all. |
374 | */ | 347 | */ |
375 | if (p == NULL) | 348 | if (p == NULL) |
376 | return SVC_DENIED; | 349 | return 0; |
377 | 350 | ||
378 | /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */ | 351 | /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */ |
379 | 352 | ||
380 | if (memcmp(p, "nfs@", 4) != 0) | 353 | if (memcmp(p, "nfs@", 4) != 0) |
381 | return SVC_DENIED; | 354 | return 0; |
382 | p += 4; | 355 | p += 4; |
383 | if (strcmp(p, r->cl_server) != 0) | 356 | if (strcmp(p, r->cl_server) != 0) |
384 | return SVC_DENIED; | 357 | return 0; |
385 | return SVC_OK; | 358 | return 1; |
386 | } | 359 | } |
387 | 360 | ||
388 | /* pg_authenticate method helper */ | 361 | /* |
389 | static struct nfs_client *nfs_cb_find_client(struct svc_rqst *rqstp) | 362 | * pg_authenticate method for nfsv4 callback threads. |
390 | { | 363 | * |
391 | struct nfs4_sessionid *sessionid = bc_xprt_sid(rqstp); | 364 | * The authflavor has been negotiated, so an incorrect flavor is a server |
392 | int is_cb_compound = rqstp->rq_proc == CB_COMPOUND ? 1 : 0; | 365 | * bug. Drop packets with incorrect authflavor. |
393 | 366 | * | |
394 | dprintk("--> %s rq_proc %d\n", __func__, rqstp->rq_proc); | 367 | * All other checking done after NFS decoding where the nfs_client can be |
395 | if (svc_is_backchannel(rqstp)) | 368 | * found in nfs4_callback_compound |
396 | /* Sessionid (usually) set after CB_NULL ping */ | 369 | */ |
397 | return nfs4_find_client_sessionid(svc_addr(rqstp), sessionid, | ||
398 | is_cb_compound); | ||
399 | else | ||
400 | /* No callback identifier in pg_authenticate */ | ||
401 | return nfs4_find_client_no_ident(svc_addr(rqstp)); | ||
402 | } | ||
403 | |||
404 | /* pg_authenticate method for nfsv4 callback threads. */ | ||
405 | static int nfs_callback_authenticate(struct svc_rqst *rqstp) | 370 | static int nfs_callback_authenticate(struct svc_rqst *rqstp) |
406 | { | 371 | { |
407 | struct nfs_client *clp; | ||
408 | RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); | ||
409 | int ret = SVC_OK; | ||
410 | |||
411 | /* Don't talk to strangers */ | ||
412 | clp = nfs_cb_find_client(rqstp); | ||
413 | if (clp == NULL) | ||
414 | return SVC_DROP; | ||
415 | |||
416 | dprintk("%s: %s NFSv4 callback!\n", __func__, | ||
417 | svc_print_addr(rqstp, buf, sizeof(buf))); | ||
418 | |||
419 | switch (rqstp->rq_authop->flavour) { | 372 | switch (rqstp->rq_authop->flavour) { |
420 | case RPC_AUTH_NULL: | 373 | case RPC_AUTH_NULL: |
421 | if (rqstp->rq_proc != CB_NULL) | 374 | if (rqstp->rq_proc != CB_NULL) |
422 | ret = SVC_DENIED; | 375 | return SVC_DROP; |
423 | break; | 376 | break; |
424 | case RPC_AUTH_UNIX: | 377 | case RPC_AUTH_GSS: |
425 | break; | 378 | /* No RPC_AUTH_GSS support yet in NFSv4.1 */ |
426 | case RPC_AUTH_GSS: | 379 | if (svc_is_backchannel(rqstp)) |
427 | ret = check_gss_callback_principal(clp, rqstp); | 380 | return SVC_DROP; |
428 | break; | ||
429 | default: | ||
430 | ret = SVC_DENIED; | ||
431 | } | 381 | } |
432 | nfs_put_client(clp); | 382 | return SVC_OK; |
433 | return ret; | ||
434 | } | 383 | } |
435 | 384 | ||
436 | /* | 385 | /* |
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index d3b44f9bd747..46d93ce7311b 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h | |||
@@ -7,6 +7,7 @@ | |||
7 | */ | 7 | */ |
8 | #ifndef __LINUX_FS_NFS_CALLBACK_H | 8 | #ifndef __LINUX_FS_NFS_CALLBACK_H |
9 | #define __LINUX_FS_NFS_CALLBACK_H | 9 | #define __LINUX_FS_NFS_CALLBACK_H |
10 | #include <linux/sunrpc/svc.h> | ||
10 | 11 | ||
11 | #define NFS4_CALLBACK 0x40000000 | 12 | #define NFS4_CALLBACK 0x40000000 |
12 | #define NFS4_CALLBACK_XDRSIZE 2048 | 13 | #define NFS4_CALLBACK_XDRSIZE 2048 |
@@ -37,7 +38,6 @@ enum nfs4_callback_opnum { | |||
37 | struct cb_process_state { | 38 | struct cb_process_state { |
38 | __be32 drc_status; | 39 | __be32 drc_status; |
39 | struct nfs_client *clp; | 40 | struct nfs_client *clp; |
40 | struct nfs4_sessionid *svc_sid; /* v4.1 callback service sessionid */ | ||
41 | }; | 41 | }; |
42 | 42 | ||
43 | struct cb_compound_hdr_arg { | 43 | struct cb_compound_hdr_arg { |
@@ -168,7 +168,7 @@ extern unsigned nfs4_callback_layoutrecall( | |||
168 | extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); | 168 | extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); |
169 | extern void nfs4_cb_take_slot(struct nfs_client *clp); | 169 | extern void nfs4_cb_take_slot(struct nfs_client *clp); |
170 | #endif /* CONFIG_NFS_V4_1 */ | 170 | #endif /* CONFIG_NFS_V4_1 */ |
171 | 171 | extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *); | |
172 | extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, | 172 | extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, |
173 | struct cb_getattrres *res, | 173 | struct cb_getattrres *res, |
174 | struct cb_process_state *cps); | 174 | struct cb_process_state *cps); |
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 4bb91cb2620d..89587573fe50 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -373,17 +373,11 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, | |||
373 | { | 373 | { |
374 | struct nfs_client *clp; | 374 | struct nfs_client *clp; |
375 | int i; | 375 | int i; |
376 | __be32 status; | 376 | __be32 status = htonl(NFS4ERR_BADSESSION); |
377 | 377 | ||
378 | cps->clp = NULL; | 378 | cps->clp = NULL; |
379 | 379 | ||
380 | status = htonl(NFS4ERR_BADSESSION); | 380 | clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); |
381 | /* Incoming session must match the callback session */ | ||
382 | if (memcmp(&args->csa_sessionid, cps->svc_sid, NFS4_MAX_SESSIONID_LEN)) | ||
383 | goto out; | ||
384 | |||
385 | clp = nfs4_find_client_sessionid(args->csa_addr, | ||
386 | &args->csa_sessionid, 1); | ||
387 | if (clp == NULL) | 381 | if (clp == NULL) |
388 | goto out; | 382 | goto out; |
389 | 383 | ||
@@ -414,9 +408,9 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, | |||
414 | res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; | 408 | res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; |
415 | res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; | 409 | res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; |
416 | nfs4_cb_take_slot(clp); | 410 | nfs4_cb_take_slot(clp); |
417 | cps->clp = clp; /* put in nfs4_callback_compound */ | ||
418 | 411 | ||
419 | out: | 412 | out: |
413 | cps->clp = clp; /* put in nfs4_callback_compound */ | ||
420 | for (i = 0; i < args->csa_nrclists; i++) | 414 | for (i = 0; i < args->csa_nrclists; i++) |
421 | kfree(args->csa_rclists[i].rcl_refcalls); | 415 | kfree(args->csa_rclists[i].rcl_refcalls); |
422 | kfree(args->csa_rclists); | 416 | kfree(args->csa_rclists); |
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 23112c263f81..14e0f9371d14 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c | |||
@@ -794,10 +794,9 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r | |||
794 | 794 | ||
795 | if (hdr_arg.minorversion == 0) { | 795 | if (hdr_arg.minorversion == 0) { |
796 | cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident); | 796 | cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident); |
797 | if (!cps.clp) | 797 | if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) |
798 | return rpc_drop_reply; | 798 | return rpc_drop_reply; |
799 | } else | 799 | } |
800 | cps.svc_sid = bc_xprt_sid(rqstp); | ||
801 | 800 | ||
802 | hdr_res.taglen = hdr_arg.taglen; | 801 | hdr_res.taglen = hdr_arg.taglen; |
803 | hdr_res.tag = hdr_arg.tag; | 802 | hdr_res.tag = hdr_arg.tag; |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 192f2f860265..bd3ca32879e7 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -1206,16 +1206,11 @@ nfs4_find_client_ident(int cb_ident) | |||
1206 | * For CB_COMPOUND calls, find a client by IP address, protocol version, | 1206 | * For CB_COMPOUND calls, find a client by IP address, protocol version, |
1207 | * minorversion, and sessionID | 1207 | * minorversion, and sessionID |
1208 | * | 1208 | * |
1209 | * CREATE_SESSION triggers a CB_NULL ping from servers. The callback service | ||
1210 | * sessionid can only be set after the CREATE_SESSION return, so a CB_NULL | ||
1211 | * can arrive before the callback sessionid is set. For CB_NULL calls, | ||
1212 | * find a client by IP address protocol version, and minorversion. | ||
1213 | * | ||
1214 | * Returns NULL if no such client | 1209 | * Returns NULL if no such client |
1215 | */ | 1210 | */ |
1216 | struct nfs_client * | 1211 | struct nfs_client * |
1217 | nfs4_find_client_sessionid(const struct sockaddr *addr, | 1212 | nfs4_find_client_sessionid(const struct sockaddr *addr, |
1218 | struct nfs4_sessionid *sid, int is_cb_compound) | 1213 | struct nfs4_sessionid *sid) |
1219 | { | 1214 | { |
1220 | struct nfs_client *clp; | 1215 | struct nfs_client *clp; |
1221 | 1216 | ||
@@ -1227,9 +1222,9 @@ nfs4_find_client_sessionid(const struct sockaddr *addr, | |||
1227 | if (!nfs4_has_session(clp)) | 1222 | if (!nfs4_has_session(clp)) |
1228 | continue; | 1223 | continue; |
1229 | 1224 | ||
1230 | /* Match sessionid unless cb_null call*/ | 1225 | /* Match sessionid*/ |
1231 | if (is_cb_compound && (memcmp(clp->cl_session->sess_id.data, | 1226 | if (memcmp(clp->cl_session->sess_id.data, |
1232 | sid->data, NFS4_MAX_SESSIONID_LEN) != 0)) | 1227 | sid->data, NFS4_MAX_SESSIONID_LEN) != 0) |
1233 | continue; | 1228 | continue; |
1234 | 1229 | ||
1235 | atomic_inc(&clp->cl_count); | 1230 | atomic_inc(&clp->cl_count); |
@@ -1244,7 +1239,7 @@ nfs4_find_client_sessionid(const struct sockaddr *addr, | |||
1244 | 1239 | ||
1245 | struct nfs_client * | 1240 | struct nfs_client * |
1246 | nfs4_find_client_sessionid(const struct sockaddr *addr, | 1241 | nfs4_find_client_sessionid(const struct sockaddr *addr, |
1247 | struct nfs4_sessionid *sid, int is_cb_compound) | 1242 | struct nfs4_sessionid *sid) |
1248 | { | 1243 | { |
1249 | return NULL; | 1244 | return NULL; |
1250 | } | 1245 | } |
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 364e4328f392..bbbc6bf5cb2e 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c | |||
@@ -23,8 +23,6 @@ | |||
23 | 23 | ||
24 | static void nfs_do_free_delegation(struct nfs_delegation *delegation) | 24 | static void nfs_do_free_delegation(struct nfs_delegation *delegation) |
25 | { | 25 | { |
26 | if (delegation->cred) | ||
27 | put_rpccred(delegation->cred); | ||
28 | kfree(delegation); | 26 | kfree(delegation); |
29 | } | 27 | } |
30 | 28 | ||
@@ -37,6 +35,10 @@ static void nfs_free_delegation_callback(struct rcu_head *head) | |||
37 | 35 | ||
38 | static void nfs_free_delegation(struct nfs_delegation *delegation) | 36 | static void nfs_free_delegation(struct nfs_delegation *delegation) |
39 | { | 37 | { |
38 | if (delegation->cred) { | ||
39 | put_rpccred(delegation->cred); | ||
40 | delegation->cred = NULL; | ||
41 | } | ||
40 | call_rcu(&delegation->rcu, nfs_free_delegation_callback); | 42 | call_rcu(&delegation->rcu, nfs_free_delegation_callback); |
41 | } | 43 | } |
42 | 44 | ||
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 95b081bc9e25..2c3eb33b904d 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -970,7 +970,7 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) | |||
970 | { | 970 | { |
971 | struct nfs_server *server = NFS_SERVER(inode); | 971 | struct nfs_server *server = NFS_SERVER(inode); |
972 | 972 | ||
973 | if (test_bit(NFS_INO_MOUNTPOINT, &NFS_I(inode)->flags)) | 973 | if (IS_AUTOMOUNT(inode)) |
974 | return 0; | 974 | return 0; |
975 | if (nd != NULL) { | 975 | if (nd != NULL) { |
976 | /* VFS wants an on-the-wire revalidation */ | 976 | /* VFS wants an on-the-wire revalidation */ |
@@ -1173,6 +1173,7 @@ const struct dentry_operations nfs_dentry_operations = { | |||
1173 | .d_revalidate = nfs_lookup_revalidate, | 1173 | .d_revalidate = nfs_lookup_revalidate, |
1174 | .d_delete = nfs_dentry_delete, | 1174 | .d_delete = nfs_dentry_delete, |
1175 | .d_iput = nfs_dentry_iput, | 1175 | .d_iput = nfs_dentry_iput, |
1176 | .d_automount = nfs_d_automount, | ||
1176 | }; | 1177 | }; |
1177 | 1178 | ||
1178 | static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) | 1179 | static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) |
@@ -1246,6 +1247,7 @@ const struct dentry_operations nfs4_dentry_operations = { | |||
1246 | .d_revalidate = nfs_open_revalidate, | 1247 | .d_revalidate = nfs_open_revalidate, |
1247 | .d_delete = nfs_dentry_delete, | 1248 | .d_delete = nfs_dentry_delete, |
1248 | .d_iput = nfs_dentry_iput, | 1249 | .d_iput = nfs_dentry_iput, |
1250 | .d_automount = nfs_d_automount, | ||
1249 | }; | 1251 | }; |
1250 | 1252 | ||
1251 | /* | 1253 | /* |
@@ -1406,11 +1408,15 @@ no_open: | |||
1406 | static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) | 1408 | static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) |
1407 | { | 1409 | { |
1408 | struct dentry *parent = NULL; | 1410 | struct dentry *parent = NULL; |
1409 | struct inode *inode = dentry->d_inode; | 1411 | struct inode *inode; |
1410 | struct inode *dir; | 1412 | struct inode *dir; |
1411 | struct nfs_open_context *ctx; | 1413 | struct nfs_open_context *ctx; |
1412 | int openflags, ret = 0; | 1414 | int openflags, ret = 0; |
1413 | 1415 | ||
1416 | if (nd->flags & LOOKUP_RCU) | ||
1417 | return -ECHILD; | ||
1418 | |||
1419 | inode = dentry->d_inode; | ||
1414 | if (!is_atomic_open(nd) || d_mountpoint(dentry)) | 1420 | if (!is_atomic_open(nd) || d_mountpoint(dentry)) |
1415 | goto no_open; | 1421 | goto no_open; |
1416 | 1422 | ||
@@ -1579,6 +1585,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
1579 | { | 1585 | { |
1580 | struct iattr attr; | 1586 | struct iattr attr; |
1581 | int error; | 1587 | int error; |
1588 | int open_flags = 0; | ||
1582 | 1589 | ||
1583 | dfprintk(VFS, "NFS: create(%s/%ld), %s\n", | 1590 | dfprintk(VFS, "NFS: create(%s/%ld), %s\n", |
1584 | dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); | 1591 | dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); |
@@ -1586,7 +1593,10 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
1586 | attr.ia_mode = mode; | 1593 | attr.ia_mode = mode; |
1587 | attr.ia_valid = ATTR_MODE; | 1594 | attr.ia_valid = ATTR_MODE; |
1588 | 1595 | ||
1589 | error = NFS_PROTO(dir)->create(dir, dentry, &attr, 0, NULL); | 1596 | if ((nd->flags & LOOKUP_CREATE) != 0) |
1597 | open_flags = nd->intent.open.flags; | ||
1598 | |||
1599 | error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, NULL); | ||
1590 | if (error != 0) | 1600 | if (error != 0) |
1591 | goto out_err; | 1601 | goto out_err; |
1592 | return 0; | 1602 | return 0; |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e6ace0d93c71..9943a75bb6d1 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -407,15 +407,18 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, | |||
407 | pos += vec->iov_len; | 407 | pos += vec->iov_len; |
408 | } | 408 | } |
409 | 409 | ||
410 | /* | ||
411 | * If no bytes were started, return the error, and let the | ||
412 | * generic layer handle the completion. | ||
413 | */ | ||
414 | if (requested_bytes == 0) { | ||
415 | nfs_direct_req_release(dreq); | ||
416 | return result < 0 ? result : -EIO; | ||
417 | } | ||
418 | |||
410 | if (put_dreq(dreq)) | 419 | if (put_dreq(dreq)) |
411 | nfs_direct_complete(dreq); | 420 | nfs_direct_complete(dreq); |
412 | 421 | return 0; | |
413 | if (requested_bytes != 0) | ||
414 | return 0; | ||
415 | |||
416 | if (result < 0) | ||
417 | return result; | ||
418 | return -EIO; | ||
419 | } | 422 | } |
420 | 423 | ||
421 | static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, | 424 | static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, |
@@ -841,15 +844,18 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, | |||
841 | pos += vec->iov_len; | 844 | pos += vec->iov_len; |
842 | } | 845 | } |
843 | 846 | ||
847 | /* | ||
848 | * If no bytes were started, return the error, and let the | ||
849 | * generic layer handle the completion. | ||
850 | */ | ||
851 | if (requested_bytes == 0) { | ||
852 | nfs_direct_req_release(dreq); | ||
853 | return result < 0 ? result : -EIO; | ||
854 | } | ||
855 | |||
844 | if (put_dreq(dreq)) | 856 | if (put_dreq(dreq)) |
845 | nfs_direct_write_complete(dreq, dreq->inode); | 857 | nfs_direct_write_complete(dreq, dreq->inode); |
846 | 858 | return 0; | |
847 | if (requested_bytes != 0) | ||
848 | return 0; | ||
849 | |||
850 | if (result < 0) | ||
851 | return result; | ||
852 | return -EIO; | ||
853 | } | 859 | } |
854 | 860 | ||
855 | static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, | 861 | static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index ce00b704452c..1cc600e77bb4 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -300,7 +300,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
300 | else | 300 | else |
301 | inode->i_op = &nfs_mountpoint_inode_operations; | 301 | inode->i_op = &nfs_mountpoint_inode_operations; |
302 | inode->i_fop = NULL; | 302 | inode->i_fop = NULL; |
303 | set_bit(NFS_INO_MOUNTPOINT, &nfsi->flags); | 303 | inode->i_flags |= S_AUTOMOUNT; |
304 | } | 304 | } |
305 | } else if (S_ISLNK(inode->i_mode)) | 305 | } else if (S_ISLNK(inode->i_mode)) |
306 | inode->i_op = &nfs_symlink_inode_operations; | 306 | inode->i_op = &nfs_symlink_inode_operations; |
@@ -881,9 +881,10 @@ out: | |||
881 | return ret; | 881 | return ret; |
882 | } | 882 | } |
883 | 883 | ||
884 | static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) | 884 | static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) |
885 | { | 885 | { |
886 | struct nfs_inode *nfsi = NFS_I(inode); | 886 | struct nfs_inode *nfsi = NFS_I(inode); |
887 | unsigned long ret = 0; | ||
887 | 888 | ||
888 | if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) | 889 | if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) |
889 | && (fattr->valid & NFS_ATTR_FATTR_CHANGE) | 890 | && (fattr->valid & NFS_ATTR_FATTR_CHANGE) |
@@ -891,25 +892,32 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
891 | nfsi->change_attr = fattr->change_attr; | 892 | nfsi->change_attr = fattr->change_attr; |
892 | if (S_ISDIR(inode->i_mode)) | 893 | if (S_ISDIR(inode->i_mode)) |
893 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; | 894 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; |
895 | ret |= NFS_INO_INVALID_ATTR; | ||
894 | } | 896 | } |
895 | /* If we have atomic WCC data, we may update some attributes */ | 897 | /* If we have atomic WCC data, we may update some attributes */ |
896 | if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME) | 898 | if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME) |
897 | && (fattr->valid & NFS_ATTR_FATTR_CTIME) | 899 | && (fattr->valid & NFS_ATTR_FATTR_CTIME) |
898 | && timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) | 900 | && timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) { |
899 | memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); | 901 | memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); |
902 | ret |= NFS_INO_INVALID_ATTR; | ||
903 | } | ||
900 | 904 | ||
901 | if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME) | 905 | if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME) |
902 | && (fattr->valid & NFS_ATTR_FATTR_MTIME) | 906 | && (fattr->valid & NFS_ATTR_FATTR_MTIME) |
903 | && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { | 907 | && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { |
904 | memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); | 908 | memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); |
905 | if (S_ISDIR(inode->i_mode)) | 909 | if (S_ISDIR(inode->i_mode)) |
906 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; | 910 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; |
911 | ret |= NFS_INO_INVALID_ATTR; | ||
907 | } | 912 | } |
908 | if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) | 913 | if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) |
909 | && (fattr->valid & NFS_ATTR_FATTR_SIZE) | 914 | && (fattr->valid & NFS_ATTR_FATTR_SIZE) |
910 | && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) | 915 | && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) |
911 | && nfsi->npages == 0) | 916 | && nfsi->npages == 0) { |
912 | i_size_write(inode, nfs_size_to_loff_t(fattr->size)); | 917 | i_size_write(inode, nfs_size_to_loff_t(fattr->size)); |
918 | ret |= NFS_INO_INVALID_ATTR; | ||
919 | } | ||
920 | return ret; | ||
913 | } | 921 | } |
914 | 922 | ||
915 | /** | 923 | /** |
@@ -1208,7 +1216,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1208 | /* Update the fsid? */ | 1216 | /* Update the fsid? */ |
1209 | if (S_ISDIR(inode->i_mode) && (fattr->valid & NFS_ATTR_FATTR_FSID) && | 1217 | if (S_ISDIR(inode->i_mode) && (fattr->valid & NFS_ATTR_FATTR_FSID) && |
1210 | !nfs_fsid_equal(&server->fsid, &fattr->fsid) && | 1218 | !nfs_fsid_equal(&server->fsid, &fattr->fsid) && |
1211 | !test_bit(NFS_INO_MOUNTPOINT, &nfsi->flags)) | 1219 | !IS_AUTOMOUNT(inode)) |
1212 | server->fsid = fattr->fsid; | 1220 | server->fsid = fattr->fsid; |
1213 | 1221 | ||
1214 | /* | 1222 | /* |
@@ -1223,7 +1231,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1223 | | NFS_INO_REVAL_PAGECACHE); | 1231 | | NFS_INO_REVAL_PAGECACHE); |
1224 | 1232 | ||
1225 | /* Do atomic weak cache consistency updates */ | 1233 | /* Do atomic weak cache consistency updates */ |
1226 | nfs_wcc_update_inode(inode, fattr); | 1234 | invalid |= nfs_wcc_update_inode(inode, fattr); |
1227 | 1235 | ||
1228 | /* More cache consistency checks */ | 1236 | /* More cache consistency checks */ |
1229 | if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { | 1237 | if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index bfa3a34af801..cf9fdbdabc67 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -133,8 +133,7 @@ extern void nfs_put_client(struct nfs_client *); | |||
133 | extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *); | 133 | extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *); |
134 | extern struct nfs_client *nfs4_find_client_ident(int); | 134 | extern struct nfs_client *nfs4_find_client_ident(int); |
135 | extern struct nfs_client * | 135 | extern struct nfs_client * |
136 | nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *, | 136 | nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *); |
137 | int); | ||
138 | extern struct nfs_server *nfs_create_server( | 137 | extern struct nfs_server *nfs_create_server( |
139 | const struct nfs_parsed_mount_data *, | 138 | const struct nfs_parsed_mount_data *, |
140 | struct nfs_fh *); | 139 | struct nfs_fh *); |
@@ -252,6 +251,7 @@ extern char *nfs_path(const char *base, | |||
252 | const struct dentry *droot, | 251 | const struct dentry *droot, |
253 | const struct dentry *dentry, | 252 | const struct dentry *dentry, |
254 | char *buffer, ssize_t buflen); | 253 | char *buffer, ssize_t buflen); |
254 | extern struct vfsmount *nfs_d_automount(struct path *path); | ||
255 | 255 | ||
256 | /* getroot.c */ | 256 | /* getroot.c */ |
257 | extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *); | 257 | extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *); |
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 74aaf3963c10..f32b8603dca8 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c | |||
@@ -97,9 +97,8 @@ Elong: | |||
97 | } | 97 | } |
98 | 98 | ||
99 | /* | 99 | /* |
100 | * nfs_follow_mountpoint - handle crossing a mountpoint on the server | 100 | * nfs_d_automount - Handle crossing a mountpoint on the server |
101 | * @dentry - dentry of mountpoint | 101 | * @path - The mountpoint |
102 | * @nd - nameidata info | ||
103 | * | 102 | * |
104 | * When we encounter a mountpoint on the server, we want to set up | 103 | * When we encounter a mountpoint on the server, we want to set up |
105 | * a mountpoint on the client too, to prevent inode numbers from | 104 | * a mountpoint on the client too, to prevent inode numbers from |
@@ -109,87 +108,65 @@ Elong: | |||
109 | * situation, and that different filesystems may want to use | 108 | * situation, and that different filesystems may want to use |
110 | * different security flavours. | 109 | * different security flavours. |
111 | */ | 110 | */ |
112 | static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) | 111 | struct vfsmount *nfs_d_automount(struct path *path) |
113 | { | 112 | { |
114 | struct vfsmount *mnt; | 113 | struct vfsmount *mnt; |
115 | struct nfs_server *server = NFS_SERVER(dentry->d_inode); | 114 | struct nfs_server *server = NFS_SERVER(path->dentry->d_inode); |
116 | struct dentry *parent; | 115 | struct dentry *parent; |
117 | struct nfs_fh *fh = NULL; | 116 | struct nfs_fh *fh = NULL; |
118 | struct nfs_fattr *fattr = NULL; | 117 | struct nfs_fattr *fattr = NULL; |
119 | int err; | 118 | int err; |
120 | 119 | ||
121 | dprintk("--> nfs_follow_mountpoint()\n"); | 120 | dprintk("--> nfs_d_automount()\n"); |
122 | 121 | ||
123 | err = -ESTALE; | 122 | mnt = ERR_PTR(-ESTALE); |
124 | if (IS_ROOT(dentry)) | 123 | if (IS_ROOT(path->dentry)) |
125 | goto out_err; | 124 | goto out_nofree; |
126 | 125 | ||
127 | err = -ENOMEM; | 126 | mnt = ERR_PTR(-ENOMEM); |
128 | fh = nfs_alloc_fhandle(); | 127 | fh = nfs_alloc_fhandle(); |
129 | fattr = nfs_alloc_fattr(); | 128 | fattr = nfs_alloc_fattr(); |
130 | if (fh == NULL || fattr == NULL) | 129 | if (fh == NULL || fattr == NULL) |
131 | goto out_err; | 130 | goto out; |
132 | 131 | ||
133 | dprintk("%s: enter\n", __func__); | 132 | dprintk("%s: enter\n", __func__); |
134 | dput(nd->path.dentry); | ||
135 | nd->path.dentry = dget(dentry); | ||
136 | 133 | ||
137 | /* Look it up again */ | 134 | /* Look it up again to get its attributes */ |
138 | parent = dget_parent(nd->path.dentry); | 135 | parent = dget_parent(path->dentry); |
139 | err = server->nfs_client->rpc_ops->lookup(parent->d_inode, | 136 | err = server->nfs_client->rpc_ops->lookup(parent->d_inode, |
140 | &nd->path.dentry->d_name, | 137 | &path->dentry->d_name, |
141 | fh, fattr); | 138 | fh, fattr); |
142 | dput(parent); | 139 | dput(parent); |
143 | if (err != 0) | 140 | if (err != 0) { |
144 | goto out_err; | 141 | mnt = ERR_PTR(err); |
142 | goto out; | ||
143 | } | ||
145 | 144 | ||
146 | if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) | 145 | if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) |
147 | mnt = nfs_do_refmount(nd->path.mnt, nd->path.dentry); | 146 | mnt = nfs_do_refmount(path->mnt, path->dentry); |
148 | else | 147 | else |
149 | mnt = nfs_do_submount(nd->path.mnt, nd->path.dentry, fh, | 148 | mnt = nfs_do_submount(path->mnt, path->dentry, fh, fattr); |
150 | fattr); | ||
151 | err = PTR_ERR(mnt); | ||
152 | if (IS_ERR(mnt)) | 149 | if (IS_ERR(mnt)) |
153 | goto out_err; | 150 | goto out; |
154 | 151 | ||
155 | mntget(mnt); | 152 | dprintk("%s: done, success\n", __func__); |
156 | err = do_add_mount(mnt, &nd->path, nd->path.mnt->mnt_flags|MNT_SHRINKABLE, | 153 | mntget(mnt); /* prevent immediate expiration */ |
157 | &nfs_automount_list); | 154 | mnt_set_expiry(mnt, &nfs_automount_list); |
158 | if (err < 0) { | ||
159 | mntput(mnt); | ||
160 | if (err == -EBUSY) | ||
161 | goto out_follow; | ||
162 | goto out_err; | ||
163 | } | ||
164 | path_put(&nd->path); | ||
165 | nd->path.mnt = mnt; | ||
166 | nd->path.dentry = dget(mnt->mnt_root); | ||
167 | schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); | 155 | schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); |
156 | |||
168 | out: | 157 | out: |
169 | nfs_free_fattr(fattr); | 158 | nfs_free_fattr(fattr); |
170 | nfs_free_fhandle(fh); | 159 | nfs_free_fhandle(fh); |
171 | dprintk("%s: done, returned %d\n", __func__, err); | 160 | out_nofree: |
172 | 161 | dprintk("<-- nfs_follow_mountpoint() = %p\n", mnt); | |
173 | dprintk("<-- nfs_follow_mountpoint() = %d\n", err); | 162 | return mnt; |
174 | return ERR_PTR(err); | ||
175 | out_err: | ||
176 | path_put(&nd->path); | ||
177 | goto out; | ||
178 | out_follow: | ||
179 | while (d_mountpoint(nd->path.dentry) && | ||
180 | follow_down(&nd->path)) | ||
181 | ; | ||
182 | err = 0; | ||
183 | goto out; | ||
184 | } | 163 | } |
185 | 164 | ||
186 | const struct inode_operations nfs_mountpoint_inode_operations = { | 165 | const struct inode_operations nfs_mountpoint_inode_operations = { |
187 | .follow_link = nfs_follow_mountpoint, | ||
188 | .getattr = nfs_getattr, | 166 | .getattr = nfs_getattr, |
189 | }; | 167 | }; |
190 | 168 | ||
191 | const struct inode_operations nfs_referral_inode_operations = { | 169 | const struct inode_operations nfs_referral_inode_operations = { |
192 | .follow_link = nfs_follow_mountpoint, | ||
193 | }; | 170 | }; |
194 | 171 | ||
195 | static void nfs_expire_automounts(struct work_struct *work) | 172 | static void nfs_expire_automounts(struct work_struct *work) |
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 9f88c5f4c7e2..274342771655 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c | |||
@@ -311,8 +311,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, | |||
311 | if (!nfs_server_capable(inode, NFS_CAP_ACLS)) | 311 | if (!nfs_server_capable(inode, NFS_CAP_ACLS)) |
312 | goto out; | 312 | goto out; |
313 | 313 | ||
314 | /* We are doing this here, because XDR marshalling can only | 314 | /* We are doing this here because XDR marshalling does not |
315 | return -ENOMEM. */ | 315 | * return any results, it BUGs. */ |
316 | status = -ENOSPC; | 316 | status = -ENOSPC; |
317 | if (acl != NULL && acl->a_count > NFS_ACL_MAX_ENTRIES) | 317 | if (acl != NULL && acl->a_count > NFS_ACL_MAX_ENTRIES) |
318 | goto out; | 318 | goto out; |
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 01c5e8b1941d..183c6b123d0f 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c | |||
@@ -1328,10 +1328,13 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req, | |||
1328 | 1328 | ||
1329 | encode_nfs_fh3(xdr, NFS_FH(args->inode)); | 1329 | encode_nfs_fh3(xdr, NFS_FH(args->inode)); |
1330 | encode_uint32(xdr, args->mask); | 1330 | encode_uint32(xdr, args->mask); |
1331 | |||
1332 | base = req->rq_slen; | ||
1331 | if (args->npages != 0) | 1333 | if (args->npages != 0) |
1332 | xdr_write_pages(xdr, args->pages, 0, args->len); | 1334 | xdr_write_pages(xdr, args->pages, 0, args->len); |
1335 | else | ||
1336 | xdr_reserve_space(xdr, NFS_ACL_INLINE_BUFSIZE); | ||
1333 | 1337 | ||
1334 | base = req->rq_slen; | ||
1335 | error = nfsacl_encode(xdr->buf, base, args->inode, | 1338 | error = nfsacl_encode(xdr->buf, base, args->inode, |
1336 | (args->mask & NFS_ACL) ? | 1339 | (args->mask & NFS_ACL) ? |
1337 | args->acl_access : NULL, 1, 0); | 1340 | args->acl_access : NULL, 1, 0); |
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 51fe64ace55a..f5c9b125e8cc 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c | |||
@@ -214,7 +214,7 @@ decode_and_add_ds(__be32 **pp, struct inode *inode) | |||
214 | 214 | ||
215 | /* ipv6 length plus port is legal */ | 215 | /* ipv6 length plus port is legal */ |
216 | if (rlen > INET6_ADDRSTRLEN + 8) { | 216 | if (rlen > INET6_ADDRSTRLEN + 8) { |
217 | dprintk("%s Invalid address, length %d\n", __func__, | 217 | dprintk("%s: Invalid address, length %d\n", __func__, |
218 | rlen); | 218 | rlen); |
219 | goto out_err; | 219 | goto out_err; |
220 | } | 220 | } |
@@ -225,6 +225,11 @@ decode_and_add_ds(__be32 **pp, struct inode *inode) | |||
225 | /* replace the port dots with dashes for the in4_pton() delimiter*/ | 225 | /* replace the port dots with dashes for the in4_pton() delimiter*/ |
226 | for (i = 0; i < 2; i++) { | 226 | for (i = 0; i < 2; i++) { |
227 | char *res = strrchr(buf, '.'); | 227 | char *res = strrchr(buf, '.'); |
228 | if (!res) { | ||
229 | dprintk("%s: Failed finding expected dots in port\n", | ||
230 | __func__); | ||
231 | goto out_free; | ||
232 | } | ||
228 | *res = '-'; | 233 | *res = '-'; |
229 | } | 234 | } |
230 | 235 | ||
@@ -240,7 +245,7 @@ decode_and_add_ds(__be32 **pp, struct inode *inode) | |||
240 | port = htons((tmp[0] << 8) | (tmp[1])); | 245 | port = htons((tmp[0] << 8) | (tmp[1])); |
241 | 246 | ||
242 | ds = nfs4_pnfs_ds_add(inode, ip_addr, port); | 247 | ds = nfs4_pnfs_ds_add(inode, ip_addr, port); |
243 | dprintk("%s Decoded address and port %s\n", __func__, buf); | 248 | dprintk("%s: Decoded address and port %s\n", __func__, buf); |
244 | out_free: | 249 | out_free: |
245 | kfree(buf); | 250 | kfree(buf); |
246 | out_err: | 251 | out_err: |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9d992b0346e3..78936a8f40ab 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include <linux/module.h> | 50 | #include <linux/module.h> |
51 | #include <linux/sunrpc/bc_xprt.h> | 51 | #include <linux/sunrpc/bc_xprt.h> |
52 | #include <linux/xattr.h> | 52 | #include <linux/xattr.h> |
53 | #include <linux/utsname.h> | ||
53 | 54 | ||
54 | #include "nfs4_fs.h" | 55 | #include "nfs4_fs.h" |
55 | #include "delegation.h" | 56 | #include "delegation.h" |
@@ -4572,27 +4573,16 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) | |||
4572 | *p = htonl((u32)clp->cl_boot_time.tv_nsec); | 4573 | *p = htonl((u32)clp->cl_boot_time.tv_nsec); |
4573 | args.verifier = &verifier; | 4574 | args.verifier = &verifier; |
4574 | 4575 | ||
4575 | while (1) { | 4576 | args.id_len = scnprintf(args.id, sizeof(args.id), |
4576 | args.id_len = scnprintf(args.id, sizeof(args.id), | 4577 | "%s/%s.%s/%u", |
4577 | "%s/%s %u", | 4578 | clp->cl_ipaddr, |
4578 | clp->cl_ipaddr, | 4579 | init_utsname()->nodename, |
4579 | rpc_peeraddr2str(clp->cl_rpcclient, | 4580 | init_utsname()->domainname, |
4580 | RPC_DISPLAY_ADDR), | 4581 | clp->cl_rpcclient->cl_auth->au_flavor); |
4581 | clp->cl_id_uniquifier); | ||
4582 | |||
4583 | status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); | ||
4584 | |||
4585 | if (status != -NFS4ERR_CLID_INUSE) | ||
4586 | break; | ||
4587 | |||
4588 | if (signalled()) | ||
4589 | break; | ||
4590 | |||
4591 | if (++clp->cl_id_uniquifier == 0) | ||
4592 | break; | ||
4593 | } | ||
4594 | 4582 | ||
4595 | status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); | 4583 | status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); |
4584 | if (!status) | ||
4585 | status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); | ||
4596 | dprintk("<-- %s status= %d\n", __func__, status); | 4586 | dprintk("<-- %s status= %d\n", __func__, status); |
4597 | return status; | 4587 | return status; |
4598 | } | 4588 | } |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 2336d532cf66..e6742b57a04c 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -232,12 +232,6 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) | |||
232 | status = nfs4_proc_create_session(clp); | 232 | status = nfs4_proc_create_session(clp); |
233 | if (status != 0) | 233 | if (status != 0) |
234 | goto out; | 234 | goto out; |
235 | status = nfs4_set_callback_sessionid(clp); | ||
236 | if (status != 0) { | ||
237 | printk(KERN_WARNING "Sessionid not set. No callback service\n"); | ||
238 | nfs_callback_down(1); | ||
239 | status = 0; | ||
240 | } | ||
241 | nfs41_setup_state_renewal(clp); | 235 | nfs41_setup_state_renewal(clp); |
242 | nfs_mark_client_ready(clp, NFS_CS_READY); | 236 | nfs_mark_client_ready(clp, NFS_CS_READY); |
243 | out: | 237 | out: |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 2ab8e5cb8f59..4e2c168b6ee9 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -6086,11 +6086,11 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, | |||
6086 | __be32 *p = xdr_inline_decode(xdr, 4); | 6086 | __be32 *p = xdr_inline_decode(xdr, 4); |
6087 | if (unlikely(!p)) | 6087 | if (unlikely(!p)) |
6088 | goto out_overflow; | 6088 | goto out_overflow; |
6089 | if (!ntohl(*p++)) { | 6089 | if (*p == xdr_zero) { |
6090 | p = xdr_inline_decode(xdr, 4); | 6090 | p = xdr_inline_decode(xdr, 4); |
6091 | if (unlikely(!p)) | 6091 | if (unlikely(!p)) |
6092 | goto out_overflow; | 6092 | goto out_overflow; |
6093 | if (!ntohl(*p++)) | 6093 | if (*p == xdr_zero) |
6094 | return -EAGAIN; | 6094 | return -EAGAIN; |
6095 | entry->eof = 1; | 6095 | entry->eof = 1; |
6096 | return -EBADCOOKIE; | 6096 | return -EBADCOOKIE; |
@@ -6101,7 +6101,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, | |||
6101 | goto out_overflow; | 6101 | goto out_overflow; |
6102 | entry->prev_cookie = entry->cookie; | 6102 | entry->prev_cookie = entry->cookie; |
6103 | p = xdr_decode_hyper(p, &entry->cookie); | 6103 | p = xdr_decode_hyper(p, &entry->cookie); |
6104 | entry->len = ntohl(*p++); | 6104 | entry->len = be32_to_cpup(p); |
6105 | 6105 | ||
6106 | p = xdr_inline_decode(xdr, entry->len); | 6106 | p = xdr_inline_decode(xdr, entry->len); |
6107 | if (unlikely(!p)) | 6107 | if (unlikely(!p)) |
@@ -6132,9 +6132,6 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, | |||
6132 | if (entry->fattr->valid & NFS_ATTR_FATTR_TYPE) | 6132 | if (entry->fattr->valid & NFS_ATTR_FATTR_TYPE) |
6133 | entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); | 6133 | entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); |
6134 | 6134 | ||
6135 | if (verify_attr_len(xdr, p, len) < 0) | ||
6136 | goto out_overflow; | ||
6137 | |||
6138 | return 0; | 6135 | return 0; |
6139 | 6136 | ||
6140 | out_overflow: | 6137 | out_overflow: |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index bc4089769735..1b1bc1a0fb0a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -951,7 +951,7 @@ pnfs_put_deviceid_cache(struct nfs_client *clp) | |||
951 | { | 951 | { |
952 | struct pnfs_deviceid_cache *local = clp->cl_devid_cache; | 952 | struct pnfs_deviceid_cache *local = clp->cl_devid_cache; |
953 | 953 | ||
954 | dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache); | 954 | dprintk("--> %s ({%d})\n", __func__, atomic_read(&local->dc_ref)); |
955 | if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) { | 955 | if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) { |
956 | int i; | 956 | int i; |
957 | /* Verify cache is empty */ | 957 | /* Verify cache is empty */ |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 10d648ea128b..c8278f4046cb 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -932,7 +932,7 @@ out_bad: | |||
932 | while (!list_empty(&list)) { | 932 | while (!list_empty(&list)) { |
933 | data = list_entry(list.next, struct nfs_write_data, pages); | 933 | data = list_entry(list.next, struct nfs_write_data, pages); |
934 | list_del(&data->pages); | 934 | list_del(&data->pages); |
935 | nfs_writedata_release(data); | 935 | nfs_writedata_free(data); |
936 | } | 936 | } |
937 | nfs_redirty_request(req); | 937 | nfs_redirty_request(req); |
938 | return -ENOMEM; | 938 | return -ENOMEM; |
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index fc1c52571c03..84c27d69d421 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c | |||
@@ -42,6 +42,11 @@ struct nfsacl_encode_desc { | |||
42 | gid_t gid; | 42 | gid_t gid; |
43 | }; | 43 | }; |
44 | 44 | ||
45 | struct nfsacl_simple_acl { | ||
46 | struct posix_acl acl; | ||
47 | struct posix_acl_entry ace[4]; | ||
48 | }; | ||
49 | |||
45 | static int | 50 | static int |
46 | xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem) | 51 | xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem) |
47 | { | 52 | { |
@@ -72,9 +77,20 @@ xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem) | |||
72 | return 0; | 77 | return 0; |
73 | } | 78 | } |
74 | 79 | ||
75 | unsigned int | 80 | /** |
76 | nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, | 81 | * nfsacl_encode - Encode an NFSv3 ACL |
77 | struct posix_acl *acl, int encode_entries, int typeflag) | 82 | * |
83 | * @buf: destination xdr_buf to contain XDR encoded ACL | ||
84 | * @base: byte offset in xdr_buf where XDR'd ACL begins | ||
85 | * @inode: inode of file whose ACL this is | ||
86 | * @acl: posix_acl to encode | ||
87 | * @encode_entries: whether to encode ACEs as well | ||
88 | * @typeflag: ACL type: NFS_ACL_DEFAULT or zero | ||
89 | * | ||
90 | * Returns size of encoded ACL in bytes or a negative errno value. | ||
91 | */ | ||
92 | int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, | ||
93 | struct posix_acl *acl, int encode_entries, int typeflag) | ||
78 | { | 94 | { |
79 | int entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0; | 95 | int entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0; |
80 | struct nfsacl_encode_desc nfsacl_desc = { | 96 | struct nfsacl_encode_desc nfsacl_desc = { |
@@ -88,17 +104,22 @@ nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, | |||
88 | .uid = inode->i_uid, | 104 | .uid = inode->i_uid, |
89 | .gid = inode->i_gid, | 105 | .gid = inode->i_gid, |
90 | }; | 106 | }; |
107 | struct nfsacl_simple_acl aclbuf; | ||
91 | int err; | 108 | int err; |
92 | struct posix_acl *acl2 = NULL; | ||
93 | 109 | ||
94 | if (entries > NFS_ACL_MAX_ENTRIES || | 110 | if (entries > NFS_ACL_MAX_ENTRIES || |
95 | xdr_encode_word(buf, base, entries)) | 111 | xdr_encode_word(buf, base, entries)) |
96 | return -EINVAL; | 112 | return -EINVAL; |
97 | if (encode_entries && acl && acl->a_count == 3) { | 113 | if (encode_entries && acl && acl->a_count == 3) { |
98 | /* Fake up an ACL_MASK entry. */ | 114 | struct posix_acl *acl2 = &aclbuf.acl; |
99 | acl2 = posix_acl_alloc(4, GFP_KERNEL); | 115 | |
100 | if (!acl2) | 116 | /* Avoid the use of posix_acl_alloc(). nfsacl_encode() is |
101 | return -ENOMEM; | 117 | * invoked in contexts where a memory allocation failure is |
118 | * fatal. Fortunately this fake ACL is small enough to | ||
119 | * construct on the stack. */ | ||
120 | memset(acl2, 0, sizeof(acl2)); | ||
121 | posix_acl_init(acl2, 4); | ||
122 | |||
102 | /* Insert entries in canonical order: other orders seem | 123 | /* Insert entries in canonical order: other orders seem |
103 | to confuse Solaris VxFS. */ | 124 | to confuse Solaris VxFS. */ |
104 | acl2->a_entries[0] = acl->a_entries[0]; /* ACL_USER_OBJ */ | 125 | acl2->a_entries[0] = acl->a_entries[0]; /* ACL_USER_OBJ */ |
@@ -109,8 +130,6 @@ nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, | |||
109 | nfsacl_desc.acl = acl2; | 130 | nfsacl_desc.acl = acl2; |
110 | } | 131 | } |
111 | err = xdr_encode_array2(buf, base + 4, &nfsacl_desc.desc); | 132 | err = xdr_encode_array2(buf, base + 4, &nfsacl_desc.desc); |
112 | if (acl2) | ||
113 | posix_acl_release(acl2); | ||
114 | if (!err) | 133 | if (!err) |
115 | err = 8 + nfsacl_desc.desc.elem_size * | 134 | err = 8 + nfsacl_desc.desc.elem_size * |
116 | nfsacl_desc.desc.array_len; | 135 | nfsacl_desc.desc.array_len; |
@@ -224,9 +243,18 @@ posix_acl_from_nfsacl(struct posix_acl *acl) | |||
224 | return 0; | 243 | return 0; |
225 | } | 244 | } |
226 | 245 | ||
227 | unsigned int | 246 | /** |
228 | nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, | 247 | * nfsacl_decode - Decode an NFSv3 ACL |
229 | struct posix_acl **pacl) | 248 | * |
249 | * @buf: xdr_buf containing XDR'd ACL data to decode | ||
250 | * @base: byte offset in xdr_buf where XDR'd ACL begins | ||
251 | * @aclcnt: count of ACEs in decoded posix_acl | ||
252 | * @pacl: buffer in which to place decoded posix_acl | ||
253 | * | ||
254 | * Returns the length of the decoded ACL in bytes, or a negative errno value. | ||
255 | */ | ||
256 | int nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, | ||
257 | struct posix_acl **pacl) | ||
230 | { | 258 | { |
231 | struct nfsacl_decode_desc nfsacl_desc = { | 259 | struct nfsacl_decode_desc nfsacl_desc = { |
232 | .desc = { | 260 | .desc = { |
diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h new file mode 100644 index 000000000000..34e5c40af5ef --- /dev/null +++ b/fs/nfsd/acl.h | |||
@@ -0,0 +1,59 @@ | |||
1 | /* | ||
2 | * Common NFSv4 ACL handling definitions. | ||
3 | * | ||
4 | * Copyright (c) 2002 The Regents of the University of Michigan. | ||
5 | * All rights reserved. | ||
6 | * | ||
7 | * Marius Aamodt Eriksen <marius@umich.edu> | ||
8 | * | ||
9 | * Redistribution and use in source and binary forms, with or without | ||
10 | * modification, are permitted provided that the following conditions | ||
11 | * are met: | ||
12 | * | ||
13 | * 1. Redistributions of source code must retain the above copyright | ||
14 | * notice, this list of conditions and the following disclaimer. | ||
15 | * 2. Redistributions in binary form must reproduce the above copyright | ||
16 | * notice, this list of conditions and the following disclaimer in the | ||
17 | * documentation and/or other materials provided with the distribution. | ||
18 | * 3. Neither the name of the University nor the names of its | ||
19 | * contributors may be used to endorse or promote products derived | ||
20 | * from this software without specific prior written permission. | ||
21 | * | ||
22 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
23 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
24 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
25 | * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||
26 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
27 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
28 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | ||
29 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
30 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
31 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
32 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
33 | */ | ||
34 | |||
35 | #ifndef LINUX_NFS4_ACL_H | ||
36 | #define LINUX_NFS4_ACL_H | ||
37 | |||
38 | #include <linux/posix_acl.h> | ||
39 | |||
40 | /* Maximum ACL we'll accept from client; chosen (somewhat arbitrarily) to | ||
41 | * fit in a page: */ | ||
42 | #define NFS4_ACL_MAX 170 | ||
43 | |||
44 | struct nfs4_acl *nfs4_acl_new(int); | ||
45 | int nfs4_acl_get_whotype(char *, u32); | ||
46 | int nfs4_acl_write_who(int who, char *p); | ||
47 | int nfs4_acl_permission(struct nfs4_acl *acl, uid_t owner, gid_t group, | ||
48 | uid_t who, u32 mask); | ||
49 | |||
50 | #define NFS4_ACL_TYPE_DEFAULT 0x01 | ||
51 | #define NFS4_ACL_DIR 0x02 | ||
52 | #define NFS4_ACL_OWNER 0x04 | ||
53 | |||
54 | struct nfs4_acl *nfs4_acl_posix_to_nfsv4(struct posix_acl *, | ||
55 | struct posix_acl *, unsigned int flags); | ||
56 | int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *, struct posix_acl **, | ||
57 | struct posix_acl **, unsigned int flags); | ||
58 | |||
59 | #endif /* LINUX_NFS4_ACL_H */ | ||
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index c0fcb7ab7f6d..8b31e5f8795d 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
@@ -1,4 +1,3 @@ | |||
1 | #define MSNFS /* HACK HACK */ | ||
2 | /* | 1 | /* |
3 | * NFS exporting and validation. | 2 | * NFS exporting and validation. |
4 | * | 3 | * |
@@ -1444,9 +1443,6 @@ static struct flags { | |||
1444 | { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}}, | 1443 | { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}}, |
1445 | { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}}, | 1444 | { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}}, |
1446 | { NFSEXP_V4ROOT, {"v4root", ""}}, | 1445 | { NFSEXP_V4ROOT, {"v4root", ""}}, |
1447 | #ifdef MSNFS | ||
1448 | { NFSEXP_MSNFS, {"msnfs", ""}}, | ||
1449 | #endif | ||
1450 | { 0, {"", ""}} | 1446 | { 0, {"", ""}} |
1451 | }; | 1447 | }; |
1452 | 1448 | ||
diff --git a/fs/nfsd/idmap.h b/fs/nfsd/idmap.h new file mode 100644 index 000000000000..2f3be1321534 --- /dev/null +++ b/fs/nfsd/idmap.h | |||
@@ -0,0 +1,62 @@ | |||
1 | /* | ||
2 | * Mapping of UID to name and vice versa. | ||
3 | * | ||
4 | * Copyright (c) 2002, 2003 The Regents of the University of | ||
5 | * Michigan. All rights reserved. | ||
6 | > * | ||
7 | * Marius Aamodt Eriksen <marius@umich.edu> | ||
8 | * | ||
9 | * Redistribution and use in source and binary forms, with or without | ||
10 | * modification, are permitted provided that the following conditions | ||
11 | * are met: | ||
12 | * | ||
13 | * 1. Redistributions of source code must retain the above copyright | ||
14 | * notice, this list of conditions and the following disclaimer. | ||
15 | * 2. Redistributions in binary form must reproduce the above copyright | ||
16 | * notice, this list of conditions and the following disclaimer in the | ||
17 | * documentation and/or other materials provided with the distribution. | ||
18 | * 3. Neither the name of the University nor the names of its | ||
19 | * contributors may be used to endorse or promote products derived | ||
20 | * from this software without specific prior written permission. | ||
21 | * | ||
22 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
23 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
24 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
25 | * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||
26 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
27 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
28 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | ||
29 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
30 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
31 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
32 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
33 | */ | ||
34 | |||
35 | #ifndef LINUX_NFSD_IDMAP_H | ||
36 | #define LINUX_NFSD_IDMAP_H | ||
37 | |||
38 | #include <linux/in.h> | ||
39 | #include <linux/sunrpc/svc.h> | ||
40 | |||
41 | /* XXX from linux/nfs_idmap.h */ | ||
42 | #define IDMAP_NAMESZ 128 | ||
43 | |||
44 | #ifdef CONFIG_NFSD_V4 | ||
45 | int nfsd_idmap_init(void); | ||
46 | void nfsd_idmap_shutdown(void); | ||
47 | #else | ||
48 | static inline int nfsd_idmap_init(void) | ||
49 | { | ||
50 | return 0; | ||
51 | } | ||
52 | static inline void nfsd_idmap_shutdown(void) | ||
53 | { | ||
54 | } | ||
55 | #endif | ||
56 | |||
57 | __be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *); | ||
58 | __be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, __u32 *); | ||
59 | int nfsd_map_uid_to_name(struct svc_rqst *, __u32, char *); | ||
60 | int nfsd_map_gid_to_name(struct svc_rqst *, __u32, char *); | ||
61 | |||
62 | #endif /* LINUX_NFSD_IDMAP_H */ | ||
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 5b7e3021e06b..2247fc91d5e9 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c | |||
@@ -151,10 +151,10 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, | |||
151 | __be32 nfserr; | 151 | __be32 nfserr; |
152 | u32 max_blocksize = svc_max_payload(rqstp); | 152 | u32 max_blocksize = svc_max_payload(rqstp); |
153 | 153 | ||
154 | dprintk("nfsd: READ(3) %s %lu bytes at %lu\n", | 154 | dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n", |
155 | SVCFH_fmt(&argp->fh), | 155 | SVCFH_fmt(&argp->fh), |
156 | (unsigned long) argp->count, | 156 | (unsigned long) argp->count, |
157 | (unsigned long) argp->offset); | 157 | (unsigned long long) argp->offset); |
158 | 158 | ||
159 | /* Obtain buffer pointer for payload. | 159 | /* Obtain buffer pointer for payload. |
160 | * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) | 160 | * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) |
@@ -191,10 +191,10 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp, | |||
191 | __be32 nfserr; | 191 | __be32 nfserr; |
192 | unsigned long cnt = argp->len; | 192 | unsigned long cnt = argp->len; |
193 | 193 | ||
194 | dprintk("nfsd: WRITE(3) %s %d bytes at %ld%s\n", | 194 | dprintk("nfsd: WRITE(3) %s %d bytes at %Lu%s\n", |
195 | SVCFH_fmt(&argp->fh), | 195 | SVCFH_fmt(&argp->fh), |
196 | argp->len, | 196 | argp->len, |
197 | (unsigned long) argp->offset, | 197 | (unsigned long long) argp->offset, |
198 | argp->stable? " stable" : ""); | 198 | argp->stable? " stable" : ""); |
199 | 199 | ||
200 | fh_copy(&resp->fh, &argp->fh); | 200 | fh_copy(&resp->fh, &argp->fh); |
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index e48052615159..ad88f1c0a4c3 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c | |||
@@ -36,7 +36,7 @@ | |||
36 | 36 | ||
37 | #include <linux/slab.h> | 37 | #include <linux/slab.h> |
38 | #include <linux/nfs_fs.h> | 38 | #include <linux/nfs_fs.h> |
39 | #include <linux/nfs4_acl.h> | 39 | #include "acl.h" |
40 | 40 | ||
41 | 41 | ||
42 | /* mode bit translations: */ | 42 | /* mode bit translations: */ |
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 21a63da305ff..3be975e18919 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
@@ -628,10 +628,8 @@ static int max_cb_time(void) | |||
628 | return max(nfsd4_lease/10, (time_t)1) * HZ; | 628 | return max(nfsd4_lease/10, (time_t)1) * HZ; |
629 | } | 629 | } |
630 | 630 | ||
631 | /* Reference counting, callback cleanup, etc., all look racy as heck. | ||
632 | * And why is cl_cb_set an atomic? */ | ||
633 | 631 | ||
634 | int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn) | 632 | static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses) |
635 | { | 633 | { |
636 | struct rpc_timeout timeparms = { | 634 | struct rpc_timeout timeparms = { |
637 | .to_initval = max_cb_time(), | 635 | .to_initval = max_cb_time(), |
@@ -641,6 +639,7 @@ int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn) | |||
641 | .net = &init_net, | 639 | .net = &init_net, |
642 | .address = (struct sockaddr *) &conn->cb_addr, | 640 | .address = (struct sockaddr *) &conn->cb_addr, |
643 | .addrsize = conn->cb_addrlen, | 641 | .addrsize = conn->cb_addrlen, |
642 | .saddress = (struct sockaddr *) &conn->cb_saddr, | ||
644 | .timeout = &timeparms, | 643 | .timeout = &timeparms, |
645 | .program = &cb_program, | 644 | .program = &cb_program, |
646 | .version = 0, | 645 | .version = 0, |
@@ -657,6 +656,10 @@ int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn) | |||
657 | args.protocol = XPRT_TRANSPORT_TCP; | 656 | args.protocol = XPRT_TRANSPORT_TCP; |
658 | clp->cl_cb_ident = conn->cb_ident; | 657 | clp->cl_cb_ident = conn->cb_ident; |
659 | } else { | 658 | } else { |
659 | if (!conn->cb_xprt) | ||
660 | return -EINVAL; | ||
661 | clp->cl_cb_conn.cb_xprt = conn->cb_xprt; | ||
662 | clp->cl_cb_session = ses; | ||
660 | args.bc_xprt = conn->cb_xprt; | 663 | args.bc_xprt = conn->cb_xprt; |
661 | args.prognumber = clp->cl_cb_session->se_cb_prog; | 664 | args.prognumber = clp->cl_cb_session->se_cb_prog; |
662 | args.protocol = XPRT_TRANSPORT_BC_TCP; | 665 | args.protocol = XPRT_TRANSPORT_BC_TCP; |
@@ -679,14 +682,20 @@ static void warn_no_callback_path(struct nfs4_client *clp, int reason) | |||
679 | (int)clp->cl_name.len, clp->cl_name.data, reason); | 682 | (int)clp->cl_name.len, clp->cl_name.data, reason); |
680 | } | 683 | } |
681 | 684 | ||
685 | static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason) | ||
686 | { | ||
687 | clp->cl_cb_state = NFSD4_CB_DOWN; | ||
688 | warn_no_callback_path(clp, reason); | ||
689 | } | ||
690 | |||
682 | static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) | 691 | static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) |
683 | { | 692 | { |
684 | struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null); | 693 | struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null); |
685 | 694 | ||
686 | if (task->tk_status) | 695 | if (task->tk_status) |
687 | warn_no_callback_path(clp, task->tk_status); | 696 | nfsd4_mark_cb_down(clp, task->tk_status); |
688 | else | 697 | else |
689 | atomic_set(&clp->cl_cb_set, 1); | 698 | clp->cl_cb_state = NFSD4_CB_UP; |
690 | } | 699 | } |
691 | 700 | ||
692 | static const struct rpc_call_ops nfsd4_cb_probe_ops = { | 701 | static const struct rpc_call_ops nfsd4_cb_probe_ops = { |
@@ -709,6 +718,11 @@ int set_callback_cred(void) | |||
709 | 718 | ||
710 | static struct workqueue_struct *callback_wq; | 719 | static struct workqueue_struct *callback_wq; |
711 | 720 | ||
721 | static void run_nfsd4_cb(struct nfsd4_callback *cb) | ||
722 | { | ||
723 | queue_work(callback_wq, &cb->cb_work); | ||
724 | } | ||
725 | |||
712 | static void do_probe_callback(struct nfs4_client *clp) | 726 | static void do_probe_callback(struct nfs4_client *clp) |
713 | { | 727 | { |
714 | struct nfsd4_callback *cb = &clp->cl_cb_null; | 728 | struct nfsd4_callback *cb = &clp->cl_cb_null; |
@@ -723,7 +737,7 @@ static void do_probe_callback(struct nfs4_client *clp) | |||
723 | 737 | ||
724 | cb->cb_ops = &nfsd4_cb_probe_ops; | 738 | cb->cb_ops = &nfsd4_cb_probe_ops; |
725 | 739 | ||
726 | queue_work(callback_wq, &cb->cb_work); | 740 | run_nfsd4_cb(cb); |
727 | } | 741 | } |
728 | 742 | ||
729 | /* | 743 | /* |
@@ -732,14 +746,21 @@ static void do_probe_callback(struct nfs4_client *clp) | |||
732 | */ | 746 | */ |
733 | void nfsd4_probe_callback(struct nfs4_client *clp) | 747 | void nfsd4_probe_callback(struct nfs4_client *clp) |
734 | { | 748 | { |
749 | /* XXX: atomicity? Also, should we be using cl_cb_flags? */ | ||
750 | clp->cl_cb_state = NFSD4_CB_UNKNOWN; | ||
735 | set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags); | 751 | set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags); |
736 | do_probe_callback(clp); | 752 | do_probe_callback(clp); |
737 | } | 753 | } |
738 | 754 | ||
739 | void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) | 755 | void nfsd4_probe_callback_sync(struct nfs4_client *clp) |
740 | { | 756 | { |
741 | BUG_ON(atomic_read(&clp->cl_cb_set)); | 757 | nfsd4_probe_callback(clp); |
758 | flush_workqueue(callback_wq); | ||
759 | } | ||
742 | 760 | ||
761 | void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) | ||
762 | { | ||
763 | clp->cl_cb_state = NFSD4_CB_UNKNOWN; | ||
743 | spin_lock(&clp->cl_lock); | 764 | spin_lock(&clp->cl_lock); |
744 | memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn)); | 765 | memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn)); |
745 | spin_unlock(&clp->cl_lock); | 766 | spin_unlock(&clp->cl_lock); |
@@ -750,24 +771,14 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) | |||
750 | * If the slot is available, then mark it busy. Otherwise, set the | 771 | * If the slot is available, then mark it busy. Otherwise, set the |
751 | * thread for sleeping on the callback RPC wait queue. | 772 | * thread for sleeping on the callback RPC wait queue. |
752 | */ | 773 | */ |
753 | static int nfsd41_cb_setup_sequence(struct nfs4_client *clp, | 774 | static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task) |
754 | struct rpc_task *task) | ||
755 | { | 775 | { |
756 | u32 *ptr = (u32 *)clp->cl_cb_session->se_sessionid.data; | ||
757 | int status = 0; | ||
758 | |||
759 | dprintk("%s: %u:%u:%u:%u\n", __func__, | ||
760 | ptr[0], ptr[1], ptr[2], ptr[3]); | ||
761 | |||
762 | if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { | 776 | if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { |
763 | rpc_sleep_on(&clp->cl_cb_waitq, task, NULL); | 777 | rpc_sleep_on(&clp->cl_cb_waitq, task, NULL); |
764 | dprintk("%s slot is busy\n", __func__); | 778 | dprintk("%s slot is busy\n", __func__); |
765 | status = -EAGAIN; | 779 | return false; |
766 | goto out; | ||
767 | } | 780 | } |
768 | out: | 781 | return true; |
769 | dprintk("%s status=%d\n", __func__, status); | ||
770 | return status; | ||
771 | } | 782 | } |
772 | 783 | ||
773 | /* | 784 | /* |
@@ -780,20 +791,19 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) | |||
780 | struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); | 791 | struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); |
781 | struct nfs4_client *clp = dp->dl_client; | 792 | struct nfs4_client *clp = dp->dl_client; |
782 | u32 minorversion = clp->cl_minorversion; | 793 | u32 minorversion = clp->cl_minorversion; |
783 | int status = 0; | ||
784 | 794 | ||
785 | cb->cb_minorversion = minorversion; | 795 | cb->cb_minorversion = minorversion; |
786 | if (minorversion) { | 796 | if (minorversion) { |
787 | status = nfsd41_cb_setup_sequence(clp, task); | 797 | if (!nfsd41_cb_get_slot(clp, task)) |
788 | if (status) { | ||
789 | if (status != -EAGAIN) { | ||
790 | /* terminate rpc task */ | ||
791 | task->tk_status = status; | ||
792 | task->tk_action = NULL; | ||
793 | } | ||
794 | return; | 798 | return; |
795 | } | ||
796 | } | 799 | } |
800 | spin_lock(&clp->cl_lock); | ||
801 | if (list_empty(&cb->cb_per_client)) { | ||
802 | /* This is the first call, not a restart */ | ||
803 | cb->cb_done = false; | ||
804 | list_add(&cb->cb_per_client, &clp->cl_callbacks); | ||
805 | } | ||
806 | spin_unlock(&clp->cl_lock); | ||
797 | rpc_call_start(task); | 807 | rpc_call_start(task); |
798 | } | 808 | } |
799 | 809 | ||
@@ -829,15 +839,18 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) | |||
829 | 839 | ||
830 | nfsd4_cb_done(task, calldata); | 840 | nfsd4_cb_done(task, calldata); |
831 | 841 | ||
832 | if (current_rpc_client == NULL) { | 842 | if (current_rpc_client != task->tk_client) { |
833 | /* We're shutting down; give up. */ | 843 | /* We're shutting down or changing cl_cb_client; leave |
834 | /* XXX: err, or is it ok just to fall through | 844 | * it to nfsd4_process_cb_update to restart the call if |
835 | * and rpc_restart_call? */ | 845 | * necessary. */ |
836 | return; | 846 | return; |
837 | } | 847 | } |
838 | 848 | ||
849 | if (cb->cb_done) | ||
850 | return; | ||
839 | switch (task->tk_status) { | 851 | switch (task->tk_status) { |
840 | case 0: | 852 | case 0: |
853 | cb->cb_done = true; | ||
841 | return; | 854 | return; |
842 | case -EBADHANDLE: | 855 | case -EBADHANDLE: |
843 | case -NFS4ERR_BAD_STATEID: | 856 | case -NFS4ERR_BAD_STATEID: |
@@ -846,32 +859,30 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) | |||
846 | break; | 859 | break; |
847 | default: | 860 | default: |
848 | /* Network partition? */ | 861 | /* Network partition? */ |
849 | atomic_set(&clp->cl_cb_set, 0); | 862 | nfsd4_mark_cb_down(clp, task->tk_status); |
850 | warn_no_callback_path(clp, task->tk_status); | ||
851 | if (current_rpc_client != task->tk_client) { | ||
852 | /* queue a callback on the new connection: */ | ||
853 | atomic_inc(&dp->dl_count); | ||
854 | nfsd4_cb_recall(dp); | ||
855 | return; | ||
856 | } | ||
857 | } | 863 | } |
858 | if (dp->dl_retries--) { | 864 | if (dp->dl_retries--) { |
859 | rpc_delay(task, 2*HZ); | 865 | rpc_delay(task, 2*HZ); |
860 | task->tk_status = 0; | 866 | task->tk_status = 0; |
861 | rpc_restart_call_prepare(task); | 867 | rpc_restart_call_prepare(task); |
862 | return; | 868 | return; |
863 | } else { | ||
864 | atomic_set(&clp->cl_cb_set, 0); | ||
865 | warn_no_callback_path(clp, task->tk_status); | ||
866 | } | 869 | } |
870 | nfsd4_mark_cb_down(clp, task->tk_status); | ||
871 | cb->cb_done = true; | ||
867 | } | 872 | } |
868 | 873 | ||
869 | static void nfsd4_cb_recall_release(void *calldata) | 874 | static void nfsd4_cb_recall_release(void *calldata) |
870 | { | 875 | { |
871 | struct nfsd4_callback *cb = calldata; | 876 | struct nfsd4_callback *cb = calldata; |
877 | struct nfs4_client *clp = cb->cb_clp; | ||
872 | struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); | 878 | struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); |
873 | 879 | ||
874 | nfs4_put_delegation(dp); | 880 | if (cb->cb_done) { |
881 | spin_lock(&clp->cl_lock); | ||
882 | list_del(&cb->cb_per_client); | ||
883 | spin_unlock(&clp->cl_lock); | ||
884 | nfs4_put_delegation(dp); | ||
885 | } | ||
875 | } | 886 | } |
876 | 887 | ||
877 | static const struct rpc_call_ops nfsd4_cb_recall_ops = { | 888 | static const struct rpc_call_ops nfsd4_cb_recall_ops = { |
@@ -906,16 +917,33 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp) | |||
906 | flush_workqueue(callback_wq); | 917 | flush_workqueue(callback_wq); |
907 | } | 918 | } |
908 | 919 | ||
909 | void nfsd4_release_cb(struct nfsd4_callback *cb) | 920 | static void nfsd4_release_cb(struct nfsd4_callback *cb) |
910 | { | 921 | { |
911 | if (cb->cb_ops->rpc_release) | 922 | if (cb->cb_ops->rpc_release) |
912 | cb->cb_ops->rpc_release(cb); | 923 | cb->cb_ops->rpc_release(cb); |
913 | } | 924 | } |
914 | 925 | ||
915 | void nfsd4_process_cb_update(struct nfsd4_callback *cb) | 926 | /* requires cl_lock: */ |
927 | static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp) | ||
928 | { | ||
929 | struct nfsd4_session *s; | ||
930 | struct nfsd4_conn *c; | ||
931 | |||
932 | list_for_each_entry(s, &clp->cl_sessions, se_perclnt) { | ||
933 | list_for_each_entry(c, &s->se_conns, cn_persession) { | ||
934 | if (c->cn_flags & NFS4_CDFC4_BACK) | ||
935 | return c; | ||
936 | } | ||
937 | } | ||
938 | return NULL; | ||
939 | } | ||
940 | |||
941 | static void nfsd4_process_cb_update(struct nfsd4_callback *cb) | ||
916 | { | 942 | { |
917 | struct nfs4_cb_conn conn; | 943 | struct nfs4_cb_conn conn; |
918 | struct nfs4_client *clp = cb->cb_clp; | 944 | struct nfs4_client *clp = cb->cb_clp; |
945 | struct nfsd4_session *ses = NULL; | ||
946 | struct nfsd4_conn *c; | ||
919 | int err; | 947 | int err; |
920 | 948 | ||
921 | /* | 949 | /* |
@@ -926,6 +954,10 @@ void nfsd4_process_cb_update(struct nfsd4_callback *cb) | |||
926 | rpc_shutdown_client(clp->cl_cb_client); | 954 | rpc_shutdown_client(clp->cl_cb_client); |
927 | clp->cl_cb_client = NULL; | 955 | clp->cl_cb_client = NULL; |
928 | } | 956 | } |
957 | if (clp->cl_cb_conn.cb_xprt) { | ||
958 | svc_xprt_put(clp->cl_cb_conn.cb_xprt); | ||
959 | clp->cl_cb_conn.cb_xprt = NULL; | ||
960 | } | ||
929 | if (test_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags)) | 961 | if (test_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags)) |
930 | return; | 962 | return; |
931 | spin_lock(&clp->cl_lock); | 963 | spin_lock(&clp->cl_lock); |
@@ -936,11 +968,22 @@ void nfsd4_process_cb_update(struct nfsd4_callback *cb) | |||
936 | BUG_ON(!clp->cl_cb_flags); | 968 | BUG_ON(!clp->cl_cb_flags); |
937 | clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags); | 969 | clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags); |
938 | memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn)); | 970 | memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn)); |
971 | c = __nfsd4_find_backchannel(clp); | ||
972 | if (c) { | ||
973 | svc_xprt_get(c->cn_xprt); | ||
974 | conn.cb_xprt = c->cn_xprt; | ||
975 | ses = c->cn_session; | ||
976 | } | ||
939 | spin_unlock(&clp->cl_lock); | 977 | spin_unlock(&clp->cl_lock); |
940 | 978 | ||
941 | err = setup_callback_client(clp, &conn); | 979 | err = setup_callback_client(clp, &conn, ses); |
942 | if (err) | 980 | if (err) { |
943 | warn_no_callback_path(clp, err); | 981 | warn_no_callback_path(clp, err); |
982 | return; | ||
983 | } | ||
984 | /* Yay, the callback channel's back! Restart any callbacks: */ | ||
985 | list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client) | ||
986 | run_nfsd4_cb(cb); | ||
944 | } | 987 | } |
945 | 988 | ||
946 | void nfsd4_do_callback_rpc(struct work_struct *w) | 989 | void nfsd4_do_callback_rpc(struct work_struct *w) |
@@ -965,10 +1008,11 @@ void nfsd4_do_callback_rpc(struct work_struct *w) | |||
965 | void nfsd4_cb_recall(struct nfs4_delegation *dp) | 1008 | void nfsd4_cb_recall(struct nfs4_delegation *dp) |
966 | { | 1009 | { |
967 | struct nfsd4_callback *cb = &dp->dl_recall; | 1010 | struct nfsd4_callback *cb = &dp->dl_recall; |
1011 | struct nfs4_client *clp = dp->dl_client; | ||
968 | 1012 | ||
969 | dp->dl_retries = 1; | 1013 | dp->dl_retries = 1; |
970 | cb->cb_op = dp; | 1014 | cb->cb_op = dp; |
971 | cb->cb_clp = dp->dl_client; | 1015 | cb->cb_clp = clp; |
972 | cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL]; | 1016 | cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL]; |
973 | cb->cb_msg.rpc_argp = cb; | 1017 | cb->cb_msg.rpc_argp = cb; |
974 | cb->cb_msg.rpc_resp = cb; | 1018 | cb->cb_msg.rpc_resp = cb; |
@@ -977,5 +1021,8 @@ void nfsd4_cb_recall(struct nfs4_delegation *dp) | |||
977 | cb->cb_ops = &nfsd4_cb_recall_ops; | 1021 | cb->cb_ops = &nfsd4_cb_recall_ops; |
978 | dp->dl_retries = 1; | 1022 | dp->dl_retries = 1; |
979 | 1023 | ||
980 | queue_work(callback_wq, &dp->dl_recall.cb_work); | 1024 | INIT_LIST_HEAD(&cb->cb_per_client); |
1025 | cb->cb_done = true; | ||
1026 | |||
1027 | run_nfsd4_cb(&dp->dl_recall); | ||
981 | } | 1028 | } |
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index f0695e815f0e..6d2c397d458b 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c | |||
@@ -33,10 +33,11 @@ | |||
33 | */ | 33 | */ |
34 | 34 | ||
35 | #include <linux/module.h> | 35 | #include <linux/module.h> |
36 | #include <linux/nfsd_idmap.h> | ||
37 | #include <linux/seq_file.h> | 36 | #include <linux/seq_file.h> |
38 | #include <linux/sched.h> | 37 | #include <linux/sched.h> |
39 | #include <linux/slab.h> | 38 | #include <linux/slab.h> |
39 | #include "idmap.h" | ||
40 | #include "nfsd.h" | ||
40 | 41 | ||
41 | /* | 42 | /* |
42 | * Cache entry | 43 | * Cache entry |
@@ -514,7 +515,7 @@ rqst_authname(struct svc_rqst *rqstp) | |||
514 | return clp->name; | 515 | return clp->name; |
515 | } | 516 | } |
516 | 517 | ||
517 | static int | 518 | static __be32 |
518 | idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, | 519 | idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, |
519 | uid_t *id) | 520 | uid_t *id) |
520 | { | 521 | { |
@@ -524,15 +525,15 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen | |||
524 | int ret; | 525 | int ret; |
525 | 526 | ||
526 | if (namelen + 1 > sizeof(key.name)) | 527 | if (namelen + 1 > sizeof(key.name)) |
527 | return -EINVAL; | 528 | return nfserr_badowner; |
528 | memcpy(key.name, name, namelen); | 529 | memcpy(key.name, name, namelen); |
529 | key.name[namelen] = '\0'; | 530 | key.name[namelen] = '\0'; |
530 | strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); | 531 | strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); |
531 | ret = idmap_lookup(rqstp, nametoid_lookup, &key, &nametoid_cache, &item); | 532 | ret = idmap_lookup(rqstp, nametoid_lookup, &key, &nametoid_cache, &item); |
532 | if (ret == -ENOENT) | 533 | if (ret == -ENOENT) |
533 | ret = -ESRCH; /* nfserr_badname */ | 534 | return nfserr_badowner; |
534 | if (ret) | 535 | if (ret) |
535 | return ret; | 536 | return nfserrno(ret); |
536 | *id = item->id; | 537 | *id = item->id; |
537 | cache_put(&item->h, &nametoid_cache); | 538 | cache_put(&item->h, &nametoid_cache); |
538 | return 0; | 539 | return 0; |
@@ -560,14 +561,14 @@ idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) | |||
560 | return ret; | 561 | return ret; |
561 | } | 562 | } |
562 | 563 | ||
563 | int | 564 | __be32 |
564 | nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen, | 565 | nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen, |
565 | __u32 *id) | 566 | __u32 *id) |
566 | { | 567 | { |
567 | return idmap_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id); | 568 | return idmap_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id); |
568 | } | 569 | } |
569 | 570 | ||
570 | int | 571 | __be32 |
571 | nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen, | 572 | nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen, |
572 | __u32 *id) | 573 | __u32 *id) |
573 | { | 574 | { |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 0cdfd022bb7b..db52546143d1 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -604,9 +604,7 @@ nfsd4_link(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
604 | return status; | 604 | return status; |
605 | } | 605 | } |
606 | 606 | ||
607 | static __be32 | 607 | static __be32 nfsd4_do_lookupp(struct svc_rqst *rqstp, struct svc_fh *fh) |
608 | nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | ||
609 | void *arg) | ||
610 | { | 608 | { |
611 | struct svc_fh tmp_fh; | 609 | struct svc_fh tmp_fh; |
612 | __be32 ret; | 610 | __be32 ret; |
@@ -615,13 +613,19 @@ nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
615 | ret = exp_pseudoroot(rqstp, &tmp_fh); | 613 | ret = exp_pseudoroot(rqstp, &tmp_fh); |
616 | if (ret) | 614 | if (ret) |
617 | return ret; | 615 | return ret; |
618 | if (tmp_fh.fh_dentry == cstate->current_fh.fh_dentry) { | 616 | if (tmp_fh.fh_dentry == fh->fh_dentry) { |
619 | fh_put(&tmp_fh); | 617 | fh_put(&tmp_fh); |
620 | return nfserr_noent; | 618 | return nfserr_noent; |
621 | } | 619 | } |
622 | fh_put(&tmp_fh); | 620 | fh_put(&tmp_fh); |
623 | return nfsd_lookup(rqstp, &cstate->current_fh, | 621 | return nfsd_lookup(rqstp, fh, "..", 2, fh); |
624 | "..", 2, &cstate->current_fh); | 622 | } |
623 | |||
624 | static __be32 | ||
625 | nfsd4_lookupp(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | ||
626 | void *arg) | ||
627 | { | ||
628 | return nfsd4_do_lookupp(rqstp, &cstate->current_fh); | ||
625 | } | 629 | } |
626 | 630 | ||
627 | static __be32 | 631 | static __be32 |
@@ -769,10 +773,36 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
769 | } else | 773 | } else |
770 | secinfo->si_exp = exp; | 774 | secinfo->si_exp = exp; |
771 | dput(dentry); | 775 | dput(dentry); |
776 | if (cstate->minorversion) | ||
777 | /* See rfc 5661 section 2.6.3.1.1.8 */ | ||
778 | fh_put(&cstate->current_fh); | ||
772 | return err; | 779 | return err; |
773 | } | 780 | } |
774 | 781 | ||
775 | static __be32 | 782 | static __be32 |
783 | nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | ||
784 | struct nfsd4_secinfo_no_name *sin) | ||
785 | { | ||
786 | __be32 err; | ||
787 | |||
788 | switch (sin->sin_style) { | ||
789 | case NFS4_SECINFO_STYLE4_CURRENT_FH: | ||
790 | break; | ||
791 | case NFS4_SECINFO_STYLE4_PARENT: | ||
792 | err = nfsd4_do_lookupp(rqstp, &cstate->current_fh); | ||
793 | if (err) | ||
794 | return err; | ||
795 | break; | ||
796 | default: | ||
797 | return nfserr_inval; | ||
798 | } | ||
799 | exp_get(cstate->current_fh.fh_export); | ||
800 | sin->sin_exp = cstate->current_fh.fh_export; | ||
801 | fh_put(&cstate->current_fh); | ||
802 | return nfs_ok; | ||
803 | } | ||
804 | |||
805 | static __be32 | ||
776 | nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 806 | nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
777 | struct nfsd4_setattr *setattr) | 807 | struct nfsd4_setattr *setattr) |
778 | { | 808 | { |
@@ -974,8 +1004,8 @@ static const char *nfsd4_op_name(unsigned opnum); | |||
974 | * Also note, enforced elsewhere: | 1004 | * Also note, enforced elsewhere: |
975 | * - SEQUENCE other than as first op results in | 1005 | * - SEQUENCE other than as first op results in |
976 | * NFS4ERR_SEQUENCE_POS. (Enforced in nfsd4_sequence().) | 1006 | * NFS4ERR_SEQUENCE_POS. (Enforced in nfsd4_sequence().) |
977 | * - BIND_CONN_TO_SESSION must be the only op in its compound | 1007 | * - BIND_CONN_TO_SESSION must be the only op in its compound. |
978 | * (Will be enforced in nfsd4_bind_conn_to_session().) | 1008 | * (Enforced in nfsd4_bind_conn_to_session().) |
979 | * - DESTROY_SESSION must be the final operation in a compound, if | 1009 | * - DESTROY_SESSION must be the final operation in a compound, if |
980 | * sessionid's in SEQUENCE and DESTROY_SESSION are the same. | 1010 | * sessionid's in SEQUENCE and DESTROY_SESSION are the same. |
981 | * (Enforced in nfsd4_destroy_session().) | 1011 | * (Enforced in nfsd4_destroy_session().) |
@@ -1126,10 +1156,6 @@ encode_op: | |||
1126 | 1156 | ||
1127 | nfsd4_increment_op_stats(op->opnum); | 1157 | nfsd4_increment_op_stats(op->opnum); |
1128 | } | 1158 | } |
1129 | if (!rqstp->rq_usedeferral && status == nfserr_dropit) { | ||
1130 | dprintk("%s Dropit - send NFS4ERR_DELAY\n", __func__); | ||
1131 | status = nfserr_jukebox; | ||
1132 | } | ||
1133 | 1159 | ||
1134 | resp->cstate.status = status; | 1160 | resp->cstate.status = status; |
1135 | fh_put(&resp->cstate.current_fh); | 1161 | fh_put(&resp->cstate.current_fh); |
@@ -1300,6 +1326,11 @@ static struct nfsd4_operation nfsd4_ops[] = { | |||
1300 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, | 1326 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, |
1301 | .op_name = "OP_EXCHANGE_ID", | 1327 | .op_name = "OP_EXCHANGE_ID", |
1302 | }, | 1328 | }, |
1329 | [OP_BIND_CONN_TO_SESSION] = { | ||
1330 | .op_func = (nfsd4op_func)nfsd4_bind_conn_to_session, | ||
1331 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, | ||
1332 | .op_name = "OP_BIND_CONN_TO_SESSION", | ||
1333 | }, | ||
1303 | [OP_CREATE_SESSION] = { | 1334 | [OP_CREATE_SESSION] = { |
1304 | .op_func = (nfsd4op_func)nfsd4_create_session, | 1335 | .op_func = (nfsd4op_func)nfsd4_create_session, |
1305 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, | 1336 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, |
@@ -1320,6 +1351,10 @@ static struct nfsd4_operation nfsd4_ops[] = { | |||
1320 | .op_flags = ALLOWED_WITHOUT_FH, | 1351 | .op_flags = ALLOWED_WITHOUT_FH, |
1321 | .op_name = "OP_RECLAIM_COMPLETE", | 1352 | .op_name = "OP_RECLAIM_COMPLETE", |
1322 | }, | 1353 | }, |
1354 | [OP_SECINFO_NO_NAME] = { | ||
1355 | .op_func = (nfsd4op_func)nfsd4_secinfo_no_name, | ||
1356 | .op_name = "OP_SECINFO_NO_NAME", | ||
1357 | }, | ||
1323 | }; | 1358 | }; |
1324 | 1359 | ||
1325 | static const char *nfsd4_op_name(unsigned opnum) | 1360 | static const char *nfsd4_op_name(unsigned opnum) |
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 7e26caab2a26..ffb59ef6f82f 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c | |||
@@ -302,7 +302,6 @@ purge_old(struct dentry *parent, struct dentry *child) | |||
302 | { | 302 | { |
303 | int status; | 303 | int status; |
304 | 304 | ||
305 | /* note: we currently use this path only for minorversion 0 */ | ||
306 | if (nfs4_has_reclaimed_state(child->d_name.name, false)) | 305 | if (nfs4_has_reclaimed_state(child->d_name.name, false)) |
307 | return 0; | 306 | return 0; |
308 | 307 | ||
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index fbd18c3074bb..d98d0213285d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -230,7 +230,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f | |||
230 | dp->dl_client = clp; | 230 | dp->dl_client = clp; |
231 | get_nfs4_file(fp); | 231 | get_nfs4_file(fp); |
232 | dp->dl_file = fp; | 232 | dp->dl_file = fp; |
233 | nfs4_file_get_access(fp, O_RDONLY); | 233 | dp->dl_vfs_file = find_readable_file(fp); |
234 | get_file(dp->dl_vfs_file); | ||
234 | dp->dl_flock = NULL; | 235 | dp->dl_flock = NULL; |
235 | dp->dl_type = type; | 236 | dp->dl_type = type; |
236 | dp->dl_stateid.si_boot = boot_time; | 237 | dp->dl_stateid.si_boot = boot_time; |
@@ -252,6 +253,7 @@ nfs4_put_delegation(struct nfs4_delegation *dp) | |||
252 | if (atomic_dec_and_test(&dp->dl_count)) { | 253 | if (atomic_dec_and_test(&dp->dl_count)) { |
253 | dprintk("NFSD: freeing dp %p\n",dp); | 254 | dprintk("NFSD: freeing dp %p\n",dp); |
254 | put_nfs4_file(dp->dl_file); | 255 | put_nfs4_file(dp->dl_file); |
256 | fput(dp->dl_vfs_file); | ||
255 | kmem_cache_free(deleg_slab, dp); | 257 | kmem_cache_free(deleg_slab, dp); |
256 | num_delegations--; | 258 | num_delegations--; |
257 | } | 259 | } |
@@ -265,12 +267,10 @@ nfs4_put_delegation(struct nfs4_delegation *dp) | |||
265 | static void | 267 | static void |
266 | nfs4_close_delegation(struct nfs4_delegation *dp) | 268 | nfs4_close_delegation(struct nfs4_delegation *dp) |
267 | { | 269 | { |
268 | struct file *filp = find_readable_file(dp->dl_file); | ||
269 | |||
270 | dprintk("NFSD: close_delegation dp %p\n",dp); | 270 | dprintk("NFSD: close_delegation dp %p\n",dp); |
271 | /* XXX: do we even need this check?: */ | ||
271 | if (dp->dl_flock) | 272 | if (dp->dl_flock) |
272 | vfs_setlease(filp, F_UNLCK, &dp->dl_flock); | 273 | vfs_setlease(dp->dl_vfs_file, F_UNLCK, &dp->dl_flock); |
273 | nfs4_file_put_access(dp->dl_file, O_RDONLY); | ||
274 | } | 274 | } |
275 | 275 | ||
276 | /* Called under the state lock. */ | 276 | /* Called under the state lock. */ |
@@ -642,6 +642,7 @@ static void nfsd4_conn_lost(struct svc_xpt_user *u) | |||
642 | free_conn(c); | 642 | free_conn(c); |
643 | } | 643 | } |
644 | spin_unlock(&clp->cl_lock); | 644 | spin_unlock(&clp->cl_lock); |
645 | nfsd4_probe_callback(clp); | ||
645 | } | 646 | } |
646 | 647 | ||
647 | static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags) | 648 | static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags) |
@@ -679,15 +680,12 @@ static int nfsd4_register_conn(struct nfsd4_conn *conn) | |||
679 | return register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user); | 680 | return register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user); |
680 | } | 681 | } |
681 | 682 | ||
682 | static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses) | 683 | static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses, u32 dir) |
683 | { | 684 | { |
684 | struct nfsd4_conn *conn; | 685 | struct nfsd4_conn *conn; |
685 | u32 flags = NFS4_CDFC4_FORE; | ||
686 | int ret; | 686 | int ret; |
687 | 687 | ||
688 | if (ses->se_flags & SESSION4_BACK_CHAN) | 688 | conn = alloc_conn(rqstp, dir); |
689 | flags |= NFS4_CDFC4_BACK; | ||
690 | conn = alloc_conn(rqstp, flags); | ||
691 | if (!conn) | 689 | if (!conn) |
692 | return nfserr_jukebox; | 690 | return nfserr_jukebox; |
693 | nfsd4_hash_conn(conn, ses); | 691 | nfsd4_hash_conn(conn, ses); |
@@ -698,6 +696,17 @@ static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses) | |||
698 | return nfs_ok; | 696 | return nfs_ok; |
699 | } | 697 | } |
700 | 698 | ||
699 | static __be32 nfsd4_new_conn_from_crses(struct svc_rqst *rqstp, struct nfsd4_session *ses) | ||
700 | { | ||
701 | u32 dir = NFS4_CDFC4_FORE; | ||
702 | |||
703 | if (ses->se_flags & SESSION4_BACK_CHAN) | ||
704 | dir |= NFS4_CDFC4_BACK; | ||
705 | |||
706 | return nfsd4_new_conn(rqstp, ses, dir); | ||
707 | } | ||
708 | |||
709 | /* must be called under client_lock */ | ||
701 | static void nfsd4_del_conns(struct nfsd4_session *s) | 710 | static void nfsd4_del_conns(struct nfsd4_session *s) |
702 | { | 711 | { |
703 | struct nfs4_client *clp = s->se_client; | 712 | struct nfs4_client *clp = s->se_client; |
@@ -749,6 +758,8 @@ static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct n | |||
749 | */ | 758 | */ |
750 | slotsize = nfsd4_sanitize_slot_size(fchan->maxresp_cached); | 759 | slotsize = nfsd4_sanitize_slot_size(fchan->maxresp_cached); |
751 | numslots = nfsd4_get_drc_mem(slotsize, fchan->maxreqs); | 760 | numslots = nfsd4_get_drc_mem(slotsize, fchan->maxreqs); |
761 | if (numslots < 1) | ||
762 | return NULL; | ||
752 | 763 | ||
753 | new = alloc_session(slotsize, numslots); | 764 | new = alloc_session(slotsize, numslots); |
754 | if (!new) { | 765 | if (!new) { |
@@ -769,25 +780,30 @@ static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct n | |||
769 | idx = hash_sessionid(&new->se_sessionid); | 780 | idx = hash_sessionid(&new->se_sessionid); |
770 | spin_lock(&client_lock); | 781 | spin_lock(&client_lock); |
771 | list_add(&new->se_hash, &sessionid_hashtbl[idx]); | 782 | list_add(&new->se_hash, &sessionid_hashtbl[idx]); |
783 | spin_lock(&clp->cl_lock); | ||
772 | list_add(&new->se_perclnt, &clp->cl_sessions); | 784 | list_add(&new->se_perclnt, &clp->cl_sessions); |
785 | spin_unlock(&clp->cl_lock); | ||
773 | spin_unlock(&client_lock); | 786 | spin_unlock(&client_lock); |
774 | 787 | ||
775 | status = nfsd4_new_conn(rqstp, new); | 788 | status = nfsd4_new_conn_from_crses(rqstp, new); |
776 | /* whoops: benny points out, status is ignored! (err, or bogus) */ | 789 | /* whoops: benny points out, status is ignored! (err, or bogus) */ |
777 | if (status) { | 790 | if (status) { |
778 | free_session(&new->se_ref); | 791 | free_session(&new->se_ref); |
779 | return NULL; | 792 | return NULL; |
780 | } | 793 | } |
781 | if (!clp->cl_cb_session && (cses->flags & SESSION4_BACK_CHAN)) { | 794 | if (cses->flags & SESSION4_BACK_CHAN) { |
782 | struct sockaddr *sa = svc_addr(rqstp); | 795 | struct sockaddr *sa = svc_addr(rqstp); |
783 | 796 | /* | |
784 | clp->cl_cb_session = new; | 797 | * This is a little silly; with sessions there's no real |
785 | clp->cl_cb_conn.cb_xprt = rqstp->rq_xprt; | 798 | * use for the callback address. Use the peer address |
786 | svc_xprt_get(rqstp->rq_xprt); | 799 | * as a reasonable default for now, but consider fixing |
800 | * the rpc client not to require an address in the | ||
801 | * future: | ||
802 | */ | ||
787 | rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa); | 803 | rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa); |
788 | clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa); | 804 | clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa); |
789 | nfsd4_probe_callback(clp); | ||
790 | } | 805 | } |
806 | nfsd4_probe_callback(clp); | ||
791 | return new; | 807 | return new; |
792 | } | 808 | } |
793 | 809 | ||
@@ -817,7 +833,9 @@ static void | |||
817 | unhash_session(struct nfsd4_session *ses) | 833 | unhash_session(struct nfsd4_session *ses) |
818 | { | 834 | { |
819 | list_del(&ses->se_hash); | 835 | list_del(&ses->se_hash); |
836 | spin_lock(&ses->se_client->cl_lock); | ||
820 | list_del(&ses->se_perclnt); | 837 | list_del(&ses->se_perclnt); |
838 | spin_unlock(&ses->se_client->cl_lock); | ||
821 | } | 839 | } |
822 | 840 | ||
823 | /* must be called under the client_lock */ | 841 | /* must be called under the client_lock */ |
@@ -923,8 +941,10 @@ unhash_client_locked(struct nfs4_client *clp) | |||
923 | 941 | ||
924 | mark_client_expired(clp); | 942 | mark_client_expired(clp); |
925 | list_del(&clp->cl_lru); | 943 | list_del(&clp->cl_lru); |
944 | spin_lock(&clp->cl_lock); | ||
926 | list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) | 945 | list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) |
927 | list_del_init(&ses->se_hash); | 946 | list_del_init(&ses->se_hash); |
947 | spin_unlock(&clp->cl_lock); | ||
928 | } | 948 | } |
929 | 949 | ||
930 | static void | 950 | static void |
@@ -1051,12 +1071,13 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, | |||
1051 | 1071 | ||
1052 | memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); | 1072 | memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); |
1053 | atomic_set(&clp->cl_refcount, 0); | 1073 | atomic_set(&clp->cl_refcount, 0); |
1054 | atomic_set(&clp->cl_cb_set, 0); | 1074 | clp->cl_cb_state = NFSD4_CB_UNKNOWN; |
1055 | INIT_LIST_HEAD(&clp->cl_idhash); | 1075 | INIT_LIST_HEAD(&clp->cl_idhash); |
1056 | INIT_LIST_HEAD(&clp->cl_strhash); | 1076 | INIT_LIST_HEAD(&clp->cl_strhash); |
1057 | INIT_LIST_HEAD(&clp->cl_openowners); | 1077 | INIT_LIST_HEAD(&clp->cl_openowners); |
1058 | INIT_LIST_HEAD(&clp->cl_delegations); | 1078 | INIT_LIST_HEAD(&clp->cl_delegations); |
1059 | INIT_LIST_HEAD(&clp->cl_lru); | 1079 | INIT_LIST_HEAD(&clp->cl_lru); |
1080 | INIT_LIST_HEAD(&clp->cl_callbacks); | ||
1060 | spin_lock_init(&clp->cl_lock); | 1081 | spin_lock_init(&clp->cl_lock); |
1061 | INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc); | 1082 | INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc); |
1062 | clp->cl_time = get_seconds(); | 1083 | clp->cl_time = get_seconds(); |
@@ -1132,54 +1153,55 @@ find_unconfirmed_client(clientid_t *clid) | |||
1132 | return NULL; | 1153 | return NULL; |
1133 | } | 1154 | } |
1134 | 1155 | ||
1135 | /* | 1156 | static bool clp_used_exchangeid(struct nfs4_client *clp) |
1136 | * Return 1 iff clp's clientid establishment method matches the use_exchange_id | ||
1137 | * parameter. Matching is based on the fact the at least one of the | ||
1138 | * EXCHGID4_FLAG_USE_{NON_PNFS,PNFS_MDS,PNFS_DS} flags must be set for v4.1 | ||
1139 | * | ||
1140 | * FIXME: we need to unify the clientid namespaces for nfsv4.x | ||
1141 | * and correctly deal with client upgrade/downgrade in EXCHANGE_ID | ||
1142 | * and SET_CLIENTID{,_CONFIRM} | ||
1143 | */ | ||
1144 | static inline int | ||
1145 | match_clientid_establishment(struct nfs4_client *clp, bool use_exchange_id) | ||
1146 | { | 1157 | { |
1147 | bool has_exchange_flags = (clp->cl_exchange_flags != 0); | 1158 | return clp->cl_exchange_flags != 0; |
1148 | return use_exchange_id == has_exchange_flags; | 1159 | } |
1149 | } | ||
1150 | 1160 | ||
1151 | static struct nfs4_client * | 1161 | static struct nfs4_client * |
1152 | find_confirmed_client_by_str(const char *dname, unsigned int hashval, | 1162 | find_confirmed_client_by_str(const char *dname, unsigned int hashval) |
1153 | bool use_exchange_id) | ||
1154 | { | 1163 | { |
1155 | struct nfs4_client *clp; | 1164 | struct nfs4_client *clp; |
1156 | 1165 | ||
1157 | list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) { | 1166 | list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) { |
1158 | if (same_name(clp->cl_recdir, dname) && | 1167 | if (same_name(clp->cl_recdir, dname)) |
1159 | match_clientid_establishment(clp, use_exchange_id)) | ||
1160 | return clp; | 1168 | return clp; |
1161 | } | 1169 | } |
1162 | return NULL; | 1170 | return NULL; |
1163 | } | 1171 | } |
1164 | 1172 | ||
1165 | static struct nfs4_client * | 1173 | static struct nfs4_client * |
1166 | find_unconfirmed_client_by_str(const char *dname, unsigned int hashval, | 1174 | find_unconfirmed_client_by_str(const char *dname, unsigned int hashval) |
1167 | bool use_exchange_id) | ||
1168 | { | 1175 | { |
1169 | struct nfs4_client *clp; | 1176 | struct nfs4_client *clp; |
1170 | 1177 | ||
1171 | list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) { | 1178 | list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) { |
1172 | if (same_name(clp->cl_recdir, dname) && | 1179 | if (same_name(clp->cl_recdir, dname)) |
1173 | match_clientid_establishment(clp, use_exchange_id)) | ||
1174 | return clp; | 1180 | return clp; |
1175 | } | 1181 | } |
1176 | return NULL; | 1182 | return NULL; |
1177 | } | 1183 | } |
1178 | 1184 | ||
1185 | static void rpc_svcaddr2sockaddr(struct sockaddr *sa, unsigned short family, union svc_addr_u *svcaddr) | ||
1186 | { | ||
1187 | switch (family) { | ||
1188 | case AF_INET: | ||
1189 | ((struct sockaddr_in *)sa)->sin_family = AF_INET; | ||
1190 | ((struct sockaddr_in *)sa)->sin_addr = svcaddr->addr; | ||
1191 | return; | ||
1192 | case AF_INET6: | ||
1193 | ((struct sockaddr_in6 *)sa)->sin6_family = AF_INET6; | ||
1194 | ((struct sockaddr_in6 *)sa)->sin6_addr = svcaddr->addr6; | ||
1195 | return; | ||
1196 | } | ||
1197 | } | ||
1198 | |||
1179 | static void | 1199 | static void |
1180 | gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid) | 1200 | gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_rqst *rqstp) |
1181 | { | 1201 | { |
1182 | struct nfs4_cb_conn *conn = &clp->cl_cb_conn; | 1202 | struct nfs4_cb_conn *conn = &clp->cl_cb_conn; |
1203 | struct sockaddr *sa = svc_addr(rqstp); | ||
1204 | u32 scopeid = rpc_get_scope_id(sa); | ||
1183 | unsigned short expected_family; | 1205 | unsigned short expected_family; |
1184 | 1206 | ||
1185 | /* Currently, we only support tcp and tcp6 for the callback channel */ | 1207 | /* Currently, we only support tcp and tcp6 for the callback channel */ |
@@ -1205,6 +1227,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid) | |||
1205 | 1227 | ||
1206 | conn->cb_prog = se->se_callback_prog; | 1228 | conn->cb_prog = se->se_callback_prog; |
1207 | conn->cb_ident = se->se_callback_ident; | 1229 | conn->cb_ident = se->se_callback_ident; |
1230 | rpc_svcaddr2sockaddr((struct sockaddr *)&conn->cb_saddr, expected_family, &rqstp->rq_daddr); | ||
1208 | return; | 1231 | return; |
1209 | out_err: | 1232 | out_err: |
1210 | conn->cb_addr.ss_family = AF_UNSPEC; | 1233 | conn->cb_addr.ss_family = AF_UNSPEC; |
@@ -1344,7 +1367,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, | |||
1344 | case SP4_NONE: | 1367 | case SP4_NONE: |
1345 | break; | 1368 | break; |
1346 | case SP4_SSV: | 1369 | case SP4_SSV: |
1347 | return nfserr_encr_alg_unsupp; | 1370 | return nfserr_serverfault; |
1348 | default: | 1371 | default: |
1349 | BUG(); /* checked by xdr code */ | 1372 | BUG(); /* checked by xdr code */ |
1350 | case SP4_MACH_CRED: | 1373 | case SP4_MACH_CRED: |
@@ -1361,8 +1384,12 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, | |||
1361 | nfs4_lock_state(); | 1384 | nfs4_lock_state(); |
1362 | status = nfs_ok; | 1385 | status = nfs_ok; |
1363 | 1386 | ||
1364 | conf = find_confirmed_client_by_str(dname, strhashval, true); | 1387 | conf = find_confirmed_client_by_str(dname, strhashval); |
1365 | if (conf) { | 1388 | if (conf) { |
1389 | if (!clp_used_exchangeid(conf)) { | ||
1390 | status = nfserr_clid_inuse; /* XXX: ? */ | ||
1391 | goto out; | ||
1392 | } | ||
1366 | if (!same_verf(&verf, &conf->cl_verifier)) { | 1393 | if (!same_verf(&verf, &conf->cl_verifier)) { |
1367 | /* 18.35.4 case 8 */ | 1394 | /* 18.35.4 case 8 */ |
1368 | if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { | 1395 | if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { |
@@ -1403,7 +1430,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, | |||
1403 | goto out; | 1430 | goto out; |
1404 | } | 1431 | } |
1405 | 1432 | ||
1406 | unconf = find_unconfirmed_client_by_str(dname, strhashval, true); | 1433 | unconf = find_unconfirmed_client_by_str(dname, strhashval); |
1407 | if (unconf) { | 1434 | if (unconf) { |
1408 | /* | 1435 | /* |
1409 | * Possible retry or client restart. Per 18.35.4 case 4, | 1436 | * Possible retry or client restart. Per 18.35.4 case 4, |
@@ -1560,6 +1587,8 @@ nfsd4_create_session(struct svc_rqst *rqstp, | |||
1560 | status = nfs_ok; | 1587 | status = nfs_ok; |
1561 | memcpy(cr_ses->sessionid.data, new->se_sessionid.data, | 1588 | memcpy(cr_ses->sessionid.data, new->se_sessionid.data, |
1562 | NFS4_MAX_SESSIONID_LEN); | 1589 | NFS4_MAX_SESSIONID_LEN); |
1590 | memcpy(&cr_ses->fore_channel, &new->se_fchannel, | ||
1591 | sizeof(struct nfsd4_channel_attrs)); | ||
1563 | cs_slot->sl_seqid++; | 1592 | cs_slot->sl_seqid++; |
1564 | cr_ses->seqid = cs_slot->sl_seqid; | 1593 | cr_ses->seqid = cs_slot->sl_seqid; |
1565 | 1594 | ||
@@ -1581,6 +1610,45 @@ static bool nfsd4_last_compound_op(struct svc_rqst *rqstp) | |||
1581 | return argp->opcnt == resp->opcnt; | 1610 | return argp->opcnt == resp->opcnt; |
1582 | } | 1611 | } |
1583 | 1612 | ||
1613 | static __be32 nfsd4_map_bcts_dir(u32 *dir) | ||
1614 | { | ||
1615 | switch (*dir) { | ||
1616 | case NFS4_CDFC4_FORE: | ||
1617 | case NFS4_CDFC4_BACK: | ||
1618 | return nfs_ok; | ||
1619 | case NFS4_CDFC4_FORE_OR_BOTH: | ||
1620 | case NFS4_CDFC4_BACK_OR_BOTH: | ||
1621 | *dir = NFS4_CDFC4_BOTH; | ||
1622 | return nfs_ok; | ||
1623 | }; | ||
1624 | return nfserr_inval; | ||
1625 | } | ||
1626 | |||
1627 | __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, | ||
1628 | struct nfsd4_compound_state *cstate, | ||
1629 | struct nfsd4_bind_conn_to_session *bcts) | ||
1630 | { | ||
1631 | __be32 status; | ||
1632 | |||
1633 | if (!nfsd4_last_compound_op(rqstp)) | ||
1634 | return nfserr_not_only_op; | ||
1635 | spin_lock(&client_lock); | ||
1636 | cstate->session = find_in_sessionid_hashtbl(&bcts->sessionid); | ||
1637 | /* Sorta weird: we only need the refcnt'ing because new_conn acquires | ||
1638 | * client_lock iself: */ | ||
1639 | if (cstate->session) { | ||
1640 | nfsd4_get_session(cstate->session); | ||
1641 | atomic_inc(&cstate->session->se_client->cl_refcount); | ||
1642 | } | ||
1643 | spin_unlock(&client_lock); | ||
1644 | if (!cstate->session) | ||
1645 | return nfserr_badsession; | ||
1646 | |||
1647 | status = nfsd4_map_bcts_dir(&bcts->dir); | ||
1648 | nfsd4_new_conn(rqstp, cstate->session, bcts->dir); | ||
1649 | return nfs_ok; | ||
1650 | } | ||
1651 | |||
1584 | static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid) | 1652 | static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid) |
1585 | { | 1653 | { |
1586 | if (!session) | 1654 | if (!session) |
@@ -1619,8 +1687,7 @@ nfsd4_destroy_session(struct svc_rqst *r, | |||
1619 | spin_unlock(&client_lock); | 1687 | spin_unlock(&client_lock); |
1620 | 1688 | ||
1621 | nfs4_lock_state(); | 1689 | nfs4_lock_state(); |
1622 | /* wait for callbacks */ | 1690 | nfsd4_probe_callback_sync(ses->se_client); |
1623 | nfsd4_shutdown_callback(ses->se_client); | ||
1624 | nfs4_unlock_state(); | 1691 | nfs4_unlock_state(); |
1625 | 1692 | ||
1626 | nfsd4_del_conns(ses); | 1693 | nfsd4_del_conns(ses); |
@@ -1733,8 +1800,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, | |||
1733 | out: | 1800 | out: |
1734 | /* Hold a session reference until done processing the compound. */ | 1801 | /* Hold a session reference until done processing the compound. */ |
1735 | if (cstate->session) { | 1802 | if (cstate->session) { |
1803 | struct nfs4_client *clp = session->se_client; | ||
1804 | |||
1736 | nfsd4_get_session(cstate->session); | 1805 | nfsd4_get_session(cstate->session); |
1737 | atomic_inc(&session->se_client->cl_refcount); | 1806 | atomic_inc(&clp->cl_refcount); |
1807 | if (clp->cl_cb_state == NFSD4_CB_DOWN) | ||
1808 | seq->status_flags |= SEQ4_STATUS_CB_PATH_DOWN; | ||
1738 | } | 1809 | } |
1739 | kfree(conn); | 1810 | kfree(conn); |
1740 | spin_unlock(&client_lock); | 1811 | spin_unlock(&client_lock); |
@@ -1775,7 +1846,6 @@ __be32 | |||
1775 | nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 1846 | nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
1776 | struct nfsd4_setclientid *setclid) | 1847 | struct nfsd4_setclientid *setclid) |
1777 | { | 1848 | { |
1778 | struct sockaddr *sa = svc_addr(rqstp); | ||
1779 | struct xdr_netobj clname = { | 1849 | struct xdr_netobj clname = { |
1780 | .len = setclid->se_namelen, | 1850 | .len = setclid->se_namelen, |
1781 | .data = setclid->se_name, | 1851 | .data = setclid->se_name, |
@@ -1801,10 +1871,12 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
1801 | strhashval = clientstr_hashval(dname); | 1871 | strhashval = clientstr_hashval(dname); |
1802 | 1872 | ||
1803 | nfs4_lock_state(); | 1873 | nfs4_lock_state(); |
1804 | conf = find_confirmed_client_by_str(dname, strhashval, false); | 1874 | conf = find_confirmed_client_by_str(dname, strhashval); |
1805 | if (conf) { | 1875 | if (conf) { |
1806 | /* RFC 3530 14.2.33 CASE 0: */ | 1876 | /* RFC 3530 14.2.33 CASE 0: */ |
1807 | status = nfserr_clid_inuse; | 1877 | status = nfserr_clid_inuse; |
1878 | if (clp_used_exchangeid(conf)) | ||
1879 | goto out; | ||
1808 | if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { | 1880 | if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { |
1809 | char addr_str[INET6_ADDRSTRLEN]; | 1881 | char addr_str[INET6_ADDRSTRLEN]; |
1810 | rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str, | 1882 | rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str, |
@@ -1819,7 +1891,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
1819 | * has a description of SETCLIENTID request processing consisting | 1891 | * has a description of SETCLIENTID request processing consisting |
1820 | * of 5 bullet points, labeled as CASE0 - CASE4 below. | 1892 | * of 5 bullet points, labeled as CASE0 - CASE4 below. |
1821 | */ | 1893 | */ |
1822 | unconf = find_unconfirmed_client_by_str(dname, strhashval, false); | 1894 | unconf = find_unconfirmed_client_by_str(dname, strhashval); |
1823 | status = nfserr_resource; | 1895 | status = nfserr_resource; |
1824 | if (!conf) { | 1896 | if (!conf) { |
1825 | /* | 1897 | /* |
@@ -1876,7 +1948,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
1876 | * for consistent minorversion use throughout: | 1948 | * for consistent minorversion use throughout: |
1877 | */ | 1949 | */ |
1878 | new->cl_minorversion = 0; | 1950 | new->cl_minorversion = 0; |
1879 | gen_callback(new, setclid, rpc_get_scope_id(sa)); | 1951 | gen_callback(new, setclid, rqstp); |
1880 | add_to_unconfirmed(new, strhashval); | 1952 | add_to_unconfirmed(new, strhashval); |
1881 | setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; | 1953 | setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; |
1882 | setclid->se_clientid.cl_id = new->cl_clientid.cl_id; | 1954 | setclid->se_clientid.cl_id = new->cl_clientid.cl_id; |
@@ -1935,7 +2007,6 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | |||
1935 | if (!same_creds(&conf->cl_cred, &unconf->cl_cred)) | 2007 | if (!same_creds(&conf->cl_cred, &unconf->cl_cred)) |
1936 | status = nfserr_clid_inuse; | 2008 | status = nfserr_clid_inuse; |
1937 | else { | 2009 | else { |
1938 | atomic_set(&conf->cl_cb_set, 0); | ||
1939 | nfsd4_change_callback(conf, &unconf->cl_cb_conn); | 2010 | nfsd4_change_callback(conf, &unconf->cl_cb_conn); |
1940 | nfsd4_probe_callback(conf); | 2011 | nfsd4_probe_callback(conf); |
1941 | expire_client(unconf); | 2012 | expire_client(unconf); |
@@ -1964,7 +2035,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | |||
1964 | unsigned int hash = | 2035 | unsigned int hash = |
1965 | clientstr_hashval(unconf->cl_recdir); | 2036 | clientstr_hashval(unconf->cl_recdir); |
1966 | conf = find_confirmed_client_by_str(unconf->cl_recdir, | 2037 | conf = find_confirmed_client_by_str(unconf->cl_recdir, |
1967 | hash, false); | 2038 | hash); |
1968 | if (conf) { | 2039 | if (conf) { |
1969 | nfsd4_remove_clid_dir(conf); | 2040 | nfsd4_remove_clid_dir(conf); |
1970 | expire_client(conf); | 2041 | expire_client(conf); |
@@ -2300,41 +2371,6 @@ void nfsd_break_deleg_cb(struct file_lock *fl) | |||
2300 | nfsd4_cb_recall(dp); | 2371 | nfsd4_cb_recall(dp); |
2301 | } | 2372 | } |
2302 | 2373 | ||
2303 | /* | ||
2304 | * The file_lock is being reapd. | ||
2305 | * | ||
2306 | * Called by locks_free_lock() with lock_flocks() held. | ||
2307 | */ | ||
2308 | static | ||
2309 | void nfsd_release_deleg_cb(struct file_lock *fl) | ||
2310 | { | ||
2311 | struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner; | ||
2312 | |||
2313 | dprintk("NFSD nfsd_release_deleg_cb: fl %p dp %p dl_count %d\n", fl,dp, atomic_read(&dp->dl_count)); | ||
2314 | |||
2315 | if (!(fl->fl_flags & FL_LEASE) || !dp) | ||
2316 | return; | ||
2317 | dp->dl_flock = NULL; | ||
2318 | } | ||
2319 | |||
2320 | /* | ||
2321 | * Called from setlease() with lock_flocks() held | ||
2322 | */ | ||
2323 | static | ||
2324 | int nfsd_same_client_deleg_cb(struct file_lock *onlist, struct file_lock *try) | ||
2325 | { | ||
2326 | struct nfs4_delegation *onlistd = | ||
2327 | (struct nfs4_delegation *)onlist->fl_owner; | ||
2328 | struct nfs4_delegation *tryd = | ||
2329 | (struct nfs4_delegation *)try->fl_owner; | ||
2330 | |||
2331 | if (onlist->fl_lmops != try->fl_lmops) | ||
2332 | return 0; | ||
2333 | |||
2334 | return onlistd->dl_client == tryd->dl_client; | ||
2335 | } | ||
2336 | |||
2337 | |||
2338 | static | 2374 | static |
2339 | int nfsd_change_deleg_cb(struct file_lock **onlist, int arg) | 2375 | int nfsd_change_deleg_cb(struct file_lock **onlist, int arg) |
2340 | { | 2376 | { |
@@ -2346,8 +2382,6 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg) | |||
2346 | 2382 | ||
2347 | static const struct lock_manager_operations nfsd_lease_mng_ops = { | 2383 | static const struct lock_manager_operations nfsd_lease_mng_ops = { |
2348 | .fl_break = nfsd_break_deleg_cb, | 2384 | .fl_break = nfsd_break_deleg_cb, |
2349 | .fl_release_private = nfsd_release_deleg_cb, | ||
2350 | .fl_mylease = nfsd_same_client_deleg_cb, | ||
2351 | .fl_change = nfsd_change_deleg_cb, | 2385 | .fl_change = nfsd_change_deleg_cb, |
2352 | }; | 2386 | }; |
2353 | 2387 | ||
@@ -2514,8 +2548,6 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file | |||
2514 | if (!fp->fi_fds[oflag]) { | 2548 | if (!fp->fi_fds[oflag]) { |
2515 | status = nfsd_open(rqstp, cur_fh, S_IFREG, access, | 2549 | status = nfsd_open(rqstp, cur_fh, S_IFREG, access, |
2516 | &fp->fi_fds[oflag]); | 2550 | &fp->fi_fds[oflag]); |
2517 | if (status == nfserr_dropit) | ||
2518 | status = nfserr_jukebox; | ||
2519 | if (status) | 2551 | if (status) |
2520 | return status; | 2552 | return status; |
2521 | } | 2553 | } |
@@ -2596,6 +2628,19 @@ nfs4_set_claim_prev(struct nfsd4_open *open) | |||
2596 | open->op_stateowner->so_client->cl_firststate = 1; | 2628 | open->op_stateowner->so_client->cl_firststate = 1; |
2597 | } | 2629 | } |
2598 | 2630 | ||
2631 | /* Should we give out recallable state?: */ | ||
2632 | static bool nfsd4_cb_channel_good(struct nfs4_client *clp) | ||
2633 | { | ||
2634 | if (clp->cl_cb_state == NFSD4_CB_UP) | ||
2635 | return true; | ||
2636 | /* | ||
2637 | * In the sessions case, since we don't have to establish a | ||
2638 | * separate connection for callbacks, we assume it's OK | ||
2639 | * until we hear otherwise: | ||
2640 | */ | ||
2641 | return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; | ||
2642 | } | ||
2643 | |||
2599 | /* | 2644 | /* |
2600 | * Attempt to hand out a delegation. | 2645 | * Attempt to hand out a delegation. |
2601 | */ | 2646 | */ |
@@ -2604,10 +2649,11 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta | |||
2604 | { | 2649 | { |
2605 | struct nfs4_delegation *dp; | 2650 | struct nfs4_delegation *dp; |
2606 | struct nfs4_stateowner *sop = stp->st_stateowner; | 2651 | struct nfs4_stateowner *sop = stp->st_stateowner; |
2607 | int cb_up = atomic_read(&sop->so_client->cl_cb_set); | 2652 | int cb_up; |
2608 | struct file_lock *fl; | 2653 | struct file_lock *fl; |
2609 | int status, flag = 0; | 2654 | int status, flag = 0; |
2610 | 2655 | ||
2656 | cb_up = nfsd4_cb_channel_good(sop->so_client); | ||
2611 | flag = NFS4_OPEN_DELEGATE_NONE; | 2657 | flag = NFS4_OPEN_DELEGATE_NONE; |
2612 | open->op_recall = 0; | 2658 | open->op_recall = 0; |
2613 | switch (open->op_claim_type) { | 2659 | switch (open->op_claim_type) { |
@@ -2655,7 +2701,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta | |||
2655 | dp->dl_flock = fl; | 2701 | dp->dl_flock = fl; |
2656 | 2702 | ||
2657 | /* vfs_setlease checks to see if delegation should be handed out. | 2703 | /* vfs_setlease checks to see if delegation should be handed out. |
2658 | * the lock_manager callbacks fl_mylease and fl_change are used | 2704 | * the lock_manager callback fl_change is used |
2659 | */ | 2705 | */ |
2660 | if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) { | 2706 | if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) { |
2661 | dprintk("NFSD: setlease failed [%d], no delegation\n", status); | 2707 | dprintk("NFSD: setlease failed [%d], no delegation\n", status); |
@@ -2794,7 +2840,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2794 | renew_client(clp); | 2840 | renew_client(clp); |
2795 | status = nfserr_cb_path_down; | 2841 | status = nfserr_cb_path_down; |
2796 | if (!list_empty(&clp->cl_delegations) | 2842 | if (!list_empty(&clp->cl_delegations) |
2797 | && !atomic_read(&clp->cl_cb_set)) | 2843 | && clp->cl_cb_state != NFSD4_CB_UP) |
2798 | goto out; | 2844 | goto out; |
2799 | status = nfs_ok; | 2845 | status = nfs_ok; |
2800 | out: | 2846 | out: |
@@ -3081,9 +3127,10 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, | |||
3081 | if (status) | 3127 | if (status) |
3082 | goto out; | 3128 | goto out; |
3083 | renew_client(dp->dl_client); | 3129 | renew_client(dp->dl_client); |
3084 | if (filpp) | 3130 | if (filpp) { |
3085 | *filpp = find_readable_file(dp->dl_file); | 3131 | *filpp = find_readable_file(dp->dl_file); |
3086 | BUG_ON(!*filpp); | 3132 | BUG_ON(!*filpp); |
3133 | } | ||
3087 | } else { /* open or lock stateid */ | 3134 | } else { /* open or lock stateid */ |
3088 | stp = find_stateid(stateid, flags); | 3135 | stp = find_stateid(stateid, flags); |
3089 | if (!stp) | 3136 | if (!stp) |
@@ -4107,7 +4154,7 @@ nfs4_has_reclaimed_state(const char *name, bool use_exchange_id) | |||
4107 | unsigned int strhashval = clientstr_hashval(name); | 4154 | unsigned int strhashval = clientstr_hashval(name); |
4108 | struct nfs4_client *clp; | 4155 | struct nfs4_client *clp; |
4109 | 4156 | ||
4110 | clp = find_confirmed_client_by_str(name, strhashval, use_exchange_id); | 4157 | clp = find_confirmed_client_by_str(name, strhashval); |
4111 | return clp ? 1 : 0; | 4158 | return clp ? 1 : 0; |
4112 | } | 4159 | } |
4113 | 4160 | ||
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index f35a94a04026..956629b9cdc9 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -44,13 +44,14 @@ | |||
44 | #include <linux/namei.h> | 44 | #include <linux/namei.h> |
45 | #include <linux/statfs.h> | 45 | #include <linux/statfs.h> |
46 | #include <linux/utsname.h> | 46 | #include <linux/utsname.h> |
47 | #include <linux/nfsd_idmap.h> | ||
48 | #include <linux/nfs4_acl.h> | ||
49 | #include <linux/sunrpc/svcauth_gss.h> | 47 | #include <linux/sunrpc/svcauth_gss.h> |
50 | 48 | ||
49 | #include "idmap.h" | ||
50 | #include "acl.h" | ||
51 | #include "xdr4.h" | 51 | #include "xdr4.h" |
52 | #include "vfs.h" | 52 | #include "vfs.h" |
53 | 53 | ||
54 | |||
54 | #define NFSDDBG_FACILITY NFSDDBG_XDR | 55 | #define NFSDDBG_FACILITY NFSDDBG_XDR |
55 | 56 | ||
56 | /* | 57 | /* |
@@ -288,17 +289,17 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, | |||
288 | len += XDR_QUADLEN(dummy32) << 2; | 289 | len += XDR_QUADLEN(dummy32) << 2; |
289 | READMEM(buf, dummy32); | 290 | READMEM(buf, dummy32); |
290 | ace->whotype = nfs4_acl_get_whotype(buf, dummy32); | 291 | ace->whotype = nfs4_acl_get_whotype(buf, dummy32); |
291 | host_err = 0; | 292 | status = nfs_ok; |
292 | if (ace->whotype != NFS4_ACL_WHO_NAMED) | 293 | if (ace->whotype != NFS4_ACL_WHO_NAMED) |
293 | ace->who = 0; | 294 | ace->who = 0; |
294 | else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) | 295 | else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) |
295 | host_err = nfsd_map_name_to_gid(argp->rqstp, | 296 | status = nfsd_map_name_to_gid(argp->rqstp, |
296 | buf, dummy32, &ace->who); | 297 | buf, dummy32, &ace->who); |
297 | else | 298 | else |
298 | host_err = nfsd_map_name_to_uid(argp->rqstp, | 299 | status = nfsd_map_name_to_uid(argp->rqstp, |
299 | buf, dummy32, &ace->who); | 300 | buf, dummy32, &ace->who); |
300 | if (host_err) | 301 | if (status) |
301 | goto out_nfserr; | 302 | return status; |
302 | } | 303 | } |
303 | } else | 304 | } else |
304 | *acl = NULL; | 305 | *acl = NULL; |
@@ -420,6 +421,21 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access | |||
420 | DECODE_TAIL; | 421 | DECODE_TAIL; |
421 | } | 422 | } |
422 | 423 | ||
424 | static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) | ||
425 | { | ||
426 | DECODE_HEAD; | ||
427 | u32 dummy; | ||
428 | |||
429 | READ_BUF(NFS4_MAX_SESSIONID_LEN + 8); | ||
430 | COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); | ||
431 | READ32(bcts->dir); | ||
432 | /* XXX: Perhaps Tom Tucker could help us figure out how we | ||
433 | * should be using ctsa_use_conn_in_rdma_mode: */ | ||
434 | READ32(dummy); | ||
435 | |||
436 | DECODE_TAIL; | ||
437 | } | ||
438 | |||
423 | static __be32 | 439 | static __be32 |
424 | nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) | 440 | nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) |
425 | { | 441 | { |
@@ -847,6 +863,17 @@ nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, | |||
847 | } | 863 | } |
848 | 864 | ||
849 | static __be32 | 865 | static __be32 |
866 | nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, | ||
867 | struct nfsd4_secinfo_no_name *sin) | ||
868 | { | ||
869 | DECODE_HEAD; | ||
870 | |||
871 | READ_BUF(4); | ||
872 | READ32(sin->sin_style); | ||
873 | DECODE_TAIL; | ||
874 | } | ||
875 | |||
876 | static __be32 | ||
850 | nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) | 877 | nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) |
851 | { | 878 | { |
852 | __be32 status; | 879 | __be32 status; |
@@ -1005,7 +1032,7 @@ static __be32 | |||
1005 | nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, | 1032 | nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, |
1006 | struct nfsd4_exchange_id *exid) | 1033 | struct nfsd4_exchange_id *exid) |
1007 | { | 1034 | { |
1008 | int dummy; | 1035 | int dummy, tmp; |
1009 | DECODE_HEAD; | 1036 | DECODE_HEAD; |
1010 | 1037 | ||
1011 | READ_BUF(NFS4_VERIFIER_SIZE); | 1038 | READ_BUF(NFS4_VERIFIER_SIZE); |
@@ -1053,15 +1080,23 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, | |||
1053 | 1080 | ||
1054 | /* ssp_hash_algs<> */ | 1081 | /* ssp_hash_algs<> */ |
1055 | READ_BUF(4); | 1082 | READ_BUF(4); |
1056 | READ32(dummy); | 1083 | READ32(tmp); |
1057 | READ_BUF(dummy); | 1084 | while (tmp--) { |
1058 | p += XDR_QUADLEN(dummy); | 1085 | READ_BUF(4); |
1086 | READ32(dummy); | ||
1087 | READ_BUF(dummy); | ||
1088 | p += XDR_QUADLEN(dummy); | ||
1089 | } | ||
1059 | 1090 | ||
1060 | /* ssp_encr_algs<> */ | 1091 | /* ssp_encr_algs<> */ |
1061 | READ_BUF(4); | 1092 | READ_BUF(4); |
1062 | READ32(dummy); | 1093 | READ32(tmp); |
1063 | READ_BUF(dummy); | 1094 | while (tmp--) { |
1064 | p += XDR_QUADLEN(dummy); | 1095 | READ_BUF(4); |
1096 | READ32(dummy); | ||
1097 | READ_BUF(dummy); | ||
1098 | p += XDR_QUADLEN(dummy); | ||
1099 | } | ||
1065 | 1100 | ||
1066 | /* ssp_window and ssp_num_gss_handles */ | 1101 | /* ssp_window and ssp_num_gss_handles */ |
1067 | READ_BUF(8); | 1102 | READ_BUF(8); |
@@ -1339,7 +1374,7 @@ static nfsd4_dec nfsd41_dec_ops[] = { | |||
1339 | 1374 | ||
1340 | /* new operations for NFSv4.1 */ | 1375 | /* new operations for NFSv4.1 */ |
1341 | [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_notsupp, | 1376 | [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_notsupp, |
1342 | [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_notsupp, | 1377 | [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_bind_conn_to_session, |
1343 | [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id, | 1378 | [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id, |
1344 | [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session, | 1379 | [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session, |
1345 | [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session, | 1380 | [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session, |
@@ -1350,7 +1385,7 @@ static nfsd4_dec nfsd41_dec_ops[] = { | |||
1350 | [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp, | 1385 | [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp, |
1351 | [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp, | 1386 | [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp, |
1352 | [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp, | 1387 | [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp, |
1353 | [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_notsupp, | 1388 | [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name, |
1354 | [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence, | 1389 | [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence, |
1355 | [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, | 1390 | [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, |
1356 | [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_notsupp, | 1391 | [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_notsupp, |
@@ -2309,8 +2344,6 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, | |||
2309 | case nfserr_resource: | 2344 | case nfserr_resource: |
2310 | nfserr = nfserr_toosmall; | 2345 | nfserr = nfserr_toosmall; |
2311 | goto fail; | 2346 | goto fail; |
2312 | case nfserr_dropit: | ||
2313 | goto fail; | ||
2314 | case nfserr_noent: | 2347 | case nfserr_noent: |
2315 | goto skip_entry; | 2348 | goto skip_entry; |
2316 | default: | 2349 | default: |
@@ -2365,6 +2398,21 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ | |||
2365 | return nfserr; | 2398 | return nfserr; |
2366 | } | 2399 | } |
2367 | 2400 | ||
2401 | static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts) | ||
2402 | { | ||
2403 | __be32 *p; | ||
2404 | |||
2405 | if (!nfserr) { | ||
2406 | RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 8); | ||
2407 | WRITEMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); | ||
2408 | WRITE32(bcts->dir); | ||
2409 | /* XXX: ? */ | ||
2410 | WRITE32(0); | ||
2411 | ADJUST_ARGS(); | ||
2412 | } | ||
2413 | return nfserr; | ||
2414 | } | ||
2415 | |||
2368 | static __be32 | 2416 | static __be32 |
2369 | nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) | 2417 | nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) |
2370 | { | 2418 | { |
@@ -2826,11 +2874,10 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ | |||
2826 | } | 2874 | } |
2827 | 2875 | ||
2828 | static __be32 | 2876 | static __be32 |
2829 | nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, | 2877 | nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp, |
2830 | struct nfsd4_secinfo *secinfo) | 2878 | __be32 nfserr,struct svc_export *exp) |
2831 | { | 2879 | { |
2832 | int i = 0; | 2880 | int i = 0; |
2833 | struct svc_export *exp = secinfo->si_exp; | ||
2834 | u32 nflavs; | 2881 | u32 nflavs; |
2835 | struct exp_flavor_info *flavs; | 2882 | struct exp_flavor_info *flavs; |
2836 | struct exp_flavor_info def_flavs[2]; | 2883 | struct exp_flavor_info def_flavs[2]; |
@@ -2892,6 +2939,20 @@ out: | |||
2892 | return nfserr; | 2939 | return nfserr; |
2893 | } | 2940 | } |
2894 | 2941 | ||
2942 | static __be32 | ||
2943 | nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, | ||
2944 | struct nfsd4_secinfo *secinfo) | ||
2945 | { | ||
2946 | return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->si_exp); | ||
2947 | } | ||
2948 | |||
2949 | static __be32 | ||
2950 | nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, | ||
2951 | struct nfsd4_secinfo_no_name *secinfo) | ||
2952 | { | ||
2953 | return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->sin_exp); | ||
2954 | } | ||
2955 | |||
2895 | /* | 2956 | /* |
2896 | * The SETATTR encode routine is special -- it always encodes a bitmap, | 2957 | * The SETATTR encode routine is special -- it always encodes a bitmap, |
2897 | * regardless of the error status. | 2958 | * regardless of the error status. |
@@ -3076,13 +3137,9 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, | |||
3076 | WRITE32(seq->seqid); | 3137 | WRITE32(seq->seqid); |
3077 | WRITE32(seq->slotid); | 3138 | WRITE32(seq->slotid); |
3078 | WRITE32(seq->maxslots); | 3139 | WRITE32(seq->maxslots); |
3079 | /* | 3140 | /* For now: target_maxslots = maxslots */ |
3080 | * FIXME: for now: | ||
3081 | * target_maxslots = maxslots | ||
3082 | * status_flags = 0 | ||
3083 | */ | ||
3084 | WRITE32(seq->maxslots); | 3141 | WRITE32(seq->maxslots); |
3085 | WRITE32(0); | 3142 | WRITE32(seq->status_flags); |
3086 | 3143 | ||
3087 | ADJUST_ARGS(); | 3144 | ADJUST_ARGS(); |
3088 | resp->cstate.datap = p; /* DRC cache data pointer */ | 3145 | resp->cstate.datap = p; /* DRC cache data pointer */ |
@@ -3143,7 +3200,7 @@ static nfsd4_enc nfsd4_enc_ops[] = { | |||
3143 | 3200 | ||
3144 | /* NFSv4.1 operations */ | 3201 | /* NFSv4.1 operations */ |
3145 | [OP_BACKCHANNEL_CTL] = (nfsd4_enc)nfsd4_encode_noop, | 3202 | [OP_BACKCHANNEL_CTL] = (nfsd4_enc)nfsd4_encode_noop, |
3146 | [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_noop, | 3203 | [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session, |
3147 | [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id, | 3204 | [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id, |
3148 | [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session, | 3205 | [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session, |
3149 | [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_destroy_session, | 3206 | [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_destroy_session, |
@@ -3154,7 +3211,7 @@ static nfsd4_enc nfsd4_enc_ops[] = { | |||
3154 | [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop, | 3211 | [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop, |
3155 | [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop, | 3212 | [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop, |
3156 | [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop, | 3213 | [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop, |
3157 | [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_noop, | 3214 | [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name, |
3158 | [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, | 3215 | [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, |
3159 | [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, | 3216 | [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, |
3160 | [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_noop, | 3217 | [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_noop, |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 4514ebbee4d6..33b3e2b06779 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -8,12 +8,12 @@ | |||
8 | #include <linux/namei.h> | 8 | #include <linux/namei.h> |
9 | #include <linux/ctype.h> | 9 | #include <linux/ctype.h> |
10 | 10 | ||
11 | #include <linux/nfsd_idmap.h> | ||
12 | #include <linux/sunrpc/svcsock.h> | 11 | #include <linux/sunrpc/svcsock.h> |
13 | #include <linux/nfsd/syscall.h> | 12 | #include <linux/nfsd/syscall.h> |
14 | #include <linux/lockd/lockd.h> | 13 | #include <linux/lockd/lockd.h> |
15 | #include <linux/sunrpc/clnt.h> | 14 | #include <linux/sunrpc/clnt.h> |
16 | 15 | ||
16 | #include "idmap.h" | ||
17 | #include "nfsd.h" | 17 | #include "nfsd.h" |
18 | #include "cache.h" | 18 | #include "cache.h" |
19 | 19 | ||
@@ -127,6 +127,7 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu | |||
127 | 127 | ||
128 | static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) | 128 | static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) |
129 | { | 129 | { |
130 | #ifdef CONFIG_NFSD_DEPRECATED | ||
130 | static int warned; | 131 | static int warned; |
131 | if (file->f_dentry->d_name.name[0] == '.' && !warned) { | 132 | if (file->f_dentry->d_name.name[0] == '.' && !warned) { |
132 | printk(KERN_INFO | 133 | printk(KERN_INFO |
@@ -135,6 +136,7 @@ static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size | |||
135 | current->comm, file->f_dentry->d_name.name); | 136 | current->comm, file->f_dentry->d_name.name); |
136 | warned = 1; | 137 | warned = 1; |
137 | } | 138 | } |
139 | #endif | ||
138 | if (! file->private_data) { | 140 | if (! file->private_data) { |
139 | /* An attempt to read a transaction file without writing | 141 | /* An attempt to read a transaction file without writing |
140 | * causes a 0-byte write so that the file can return | 142 | * causes a 0-byte write so that the file can return |
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 6b641cf2c19a..7ecfa2420307 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h | |||
@@ -158,6 +158,7 @@ void nfsd_lockd_shutdown(void); | |||
158 | #define nfserr_attrnotsupp cpu_to_be32(NFSERR_ATTRNOTSUPP) | 158 | #define nfserr_attrnotsupp cpu_to_be32(NFSERR_ATTRNOTSUPP) |
159 | #define nfserr_bad_xdr cpu_to_be32(NFSERR_BAD_XDR) | 159 | #define nfserr_bad_xdr cpu_to_be32(NFSERR_BAD_XDR) |
160 | #define nfserr_openmode cpu_to_be32(NFSERR_OPENMODE) | 160 | #define nfserr_openmode cpu_to_be32(NFSERR_OPENMODE) |
161 | #define nfserr_badowner cpu_to_be32(NFSERR_BADOWNER) | ||
161 | #define nfserr_locks_held cpu_to_be32(NFSERR_LOCKS_HELD) | 162 | #define nfserr_locks_held cpu_to_be32(NFSERR_LOCKS_HELD) |
162 | #define nfserr_op_illegal cpu_to_be32(NFSERR_OP_ILLEGAL) | 163 | #define nfserr_op_illegal cpu_to_be32(NFSERR_OP_ILLEGAL) |
163 | #define nfserr_grace cpu_to_be32(NFSERR_GRACE) | 164 | #define nfserr_grace cpu_to_be32(NFSERR_GRACE) |
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 08e17264784b..e15dc45fc5ec 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c | |||
@@ -735,9 +735,9 @@ nfserrno (int errno) | |||
735 | { nfserr_stale, -ESTALE }, | 735 | { nfserr_stale, -ESTALE }, |
736 | { nfserr_jukebox, -ETIMEDOUT }, | 736 | { nfserr_jukebox, -ETIMEDOUT }, |
737 | { nfserr_jukebox, -ERESTARTSYS }, | 737 | { nfserr_jukebox, -ERESTARTSYS }, |
738 | { nfserr_dropit, -EAGAIN }, | 738 | { nfserr_jukebox, -EAGAIN }, |
739 | { nfserr_dropit, -ENOMEM }, | 739 | { nfserr_jukebox, -EWOULDBLOCK }, |
740 | { nfserr_badname, -ESRCH }, | 740 | { nfserr_jukebox, -ENOMEM }, |
741 | { nfserr_io, -ETXTBSY }, | 741 | { nfserr_io, -ETXTBSY }, |
742 | { nfserr_notsupp, -EOPNOTSUPP }, | 742 | { nfserr_notsupp, -EOPNOTSUPP }, |
743 | { nfserr_toosmall, -ETOOSMALL }, | 743 | { nfserr_toosmall, -ETOOSMALL }, |
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 2bae1d86f5f2..18743c4d8bca 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
@@ -608,7 +608,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) | |||
608 | /* Now call the procedure handler, and encode NFS status. */ | 608 | /* Now call the procedure handler, and encode NFS status. */ |
609 | nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); | 609 | nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); |
610 | nfserr = map_new_errors(rqstp->rq_vers, nfserr); | 610 | nfserr = map_new_errors(rqstp->rq_vers, nfserr); |
611 | if (nfserr == nfserr_dropit) { | 611 | if (nfserr == nfserr_dropit || rqstp->rq_dropme) { |
612 | dprintk("nfsd: Dropping request; may be revisited later\n"); | 612 | dprintk("nfsd: Dropping request; may be revisited later\n"); |
613 | nfsd_cache_update(rqstp, RC_NOCACHE, NULL); | 613 | nfsd_cache_update(rqstp, RC_NOCACHE, NULL); |
614 | return 0; | 614 | return 0; |
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 39adc27b0685..3074656ba7bf 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h | |||
@@ -68,10 +68,12 @@ typedef struct { | |||
68 | struct nfsd4_callback { | 68 | struct nfsd4_callback { |
69 | void *cb_op; | 69 | void *cb_op; |
70 | struct nfs4_client *cb_clp; | 70 | struct nfs4_client *cb_clp; |
71 | struct list_head cb_per_client; | ||
71 | u32 cb_minorversion; | 72 | u32 cb_minorversion; |
72 | struct rpc_message cb_msg; | 73 | struct rpc_message cb_msg; |
73 | const struct rpc_call_ops *cb_ops; | 74 | const struct rpc_call_ops *cb_ops; |
74 | struct work_struct cb_work; | 75 | struct work_struct cb_work; |
76 | bool cb_done; | ||
75 | }; | 77 | }; |
76 | 78 | ||
77 | struct nfs4_delegation { | 79 | struct nfs4_delegation { |
@@ -81,6 +83,7 @@ struct nfs4_delegation { | |||
81 | atomic_t dl_count; /* ref count */ | 83 | atomic_t dl_count; /* ref count */ |
82 | struct nfs4_client *dl_client; | 84 | struct nfs4_client *dl_client; |
83 | struct nfs4_file *dl_file; | 85 | struct nfs4_file *dl_file; |
86 | struct file *dl_vfs_file; | ||
84 | struct file_lock *dl_flock; | 87 | struct file_lock *dl_flock; |
85 | u32 dl_type; | 88 | u32 dl_type; |
86 | time_t dl_time; | 89 | time_t dl_time; |
@@ -95,6 +98,7 @@ struct nfs4_delegation { | |||
95 | struct nfs4_cb_conn { | 98 | struct nfs4_cb_conn { |
96 | /* SETCLIENTID info */ | 99 | /* SETCLIENTID info */ |
97 | struct sockaddr_storage cb_addr; | 100 | struct sockaddr_storage cb_addr; |
101 | struct sockaddr_storage cb_saddr; | ||
98 | size_t cb_addrlen; | 102 | size_t cb_addrlen; |
99 | u32 cb_prog; /* used only in 4.0 case; | 103 | u32 cb_prog; /* used only in 4.0 case; |
100 | per-session otherwise */ | 104 | per-session otherwise */ |
@@ -146,6 +150,11 @@ struct nfsd4_create_session { | |||
146 | u32 gid; | 150 | u32 gid; |
147 | }; | 151 | }; |
148 | 152 | ||
153 | struct nfsd4_bind_conn_to_session { | ||
154 | struct nfs4_sessionid sessionid; | ||
155 | u32 dir; | ||
156 | }; | ||
157 | |||
149 | /* The single slot clientid cache structure */ | 158 | /* The single slot clientid cache structure */ |
150 | struct nfsd4_clid_slot { | 159 | struct nfsd4_clid_slot { |
151 | u32 sl_seqid; | 160 | u32 sl_seqid; |
@@ -235,9 +244,13 @@ struct nfs4_client { | |||
235 | unsigned long cl_cb_flags; | 244 | unsigned long cl_cb_flags; |
236 | struct rpc_clnt *cl_cb_client; | 245 | struct rpc_clnt *cl_cb_client; |
237 | u32 cl_cb_ident; | 246 | u32 cl_cb_ident; |
238 | atomic_t cl_cb_set; | 247 | #define NFSD4_CB_UP 0 |
248 | #define NFSD4_CB_UNKNOWN 1 | ||
249 | #define NFSD4_CB_DOWN 2 | ||
250 | int cl_cb_state; | ||
239 | struct nfsd4_callback cl_cb_null; | 251 | struct nfsd4_callback cl_cb_null; |
240 | struct nfsd4_session *cl_cb_session; | 252 | struct nfsd4_session *cl_cb_session; |
253 | struct list_head cl_callbacks; /* list of in-progress callbacks */ | ||
241 | 254 | ||
242 | /* for all client information that callback code might need: */ | 255 | /* for all client information that callback code might need: */ |
243 | spinlock_t cl_lock; | 256 | spinlock_t cl_lock; |
@@ -454,6 +467,7 @@ extern __be32 nfs4_check_open_reclaim(clientid_t *clid); | |||
454 | extern void nfs4_free_stateowner(struct kref *kref); | 467 | extern void nfs4_free_stateowner(struct kref *kref); |
455 | extern int set_callback_cred(void); | 468 | extern int set_callback_cred(void); |
456 | extern void nfsd4_probe_callback(struct nfs4_client *clp); | 469 | extern void nfsd4_probe_callback(struct nfs4_client *clp); |
470 | extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); | ||
457 | extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); | 471 | extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); |
458 | extern void nfsd4_do_callback_rpc(struct work_struct *); | 472 | extern void nfsd4_do_callback_rpc(struct work_struct *); |
459 | extern void nfsd4_cb_recall(struct nfs4_delegation *dp); | 473 | extern void nfsd4_cb_recall(struct nfs4_delegation *dp); |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 230b79fbf005..641117f2188d 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -1,4 +1,3 @@ | |||
1 | #define MSNFS /* HACK HACK */ | ||
2 | /* | 1 | /* |
3 | * File operations used by nfsd. Some of these have been ripped from | 2 | * File operations used by nfsd. Some of these have been ripped from |
4 | * other parts of the kernel because they weren't exported, others | 3 | * other parts of the kernel because they weren't exported, others |
@@ -35,8 +34,8 @@ | |||
35 | #endif /* CONFIG_NFSD_V3 */ | 34 | #endif /* CONFIG_NFSD_V3 */ |
36 | 35 | ||
37 | #ifdef CONFIG_NFSD_V4 | 36 | #ifdef CONFIG_NFSD_V4 |
38 | #include <linux/nfs4_acl.h> | 37 | #include "acl.h" |
39 | #include <linux/nfsd_idmap.h> | 38 | #include "idmap.h" |
40 | #endif /* CONFIG_NFSD_V4 */ | 39 | #endif /* CONFIG_NFSD_V4 */ |
41 | 40 | ||
42 | #include "nfsd.h" | 41 | #include "nfsd.h" |
@@ -88,8 +87,9 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, | |||
88 | .dentry = dget(dentry)}; | 87 | .dentry = dget(dentry)}; |
89 | int err = 0; | 88 | int err = 0; |
90 | 89 | ||
91 | while (d_mountpoint(path.dentry) && follow_down(&path)) | 90 | err = follow_down(&path, false); |
92 | ; | 91 | if (err < 0) |
92 | goto out; | ||
93 | 93 | ||
94 | exp2 = rqst_exp_get_by_name(rqstp, &path); | 94 | exp2 = rqst_exp_get_by_name(rqstp, &path); |
95 | if (IS_ERR(exp2)) { | 95 | if (IS_ERR(exp2)) { |
@@ -273,6 +273,13 @@ out: | |||
273 | return err; | 273 | return err; |
274 | } | 274 | } |
275 | 275 | ||
276 | static int nfsd_break_lease(struct inode *inode) | ||
277 | { | ||
278 | if (!S_ISREG(inode->i_mode)) | ||
279 | return 0; | ||
280 | return break_lease(inode, O_WRONLY | O_NONBLOCK); | ||
281 | } | ||
282 | |||
276 | /* | 283 | /* |
277 | * Commit metadata changes to stable storage. | 284 | * Commit metadata changes to stable storage. |
278 | */ | 285 | */ |
@@ -375,16 +382,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, | |||
375 | goto out; | 382 | goto out; |
376 | } | 383 | } |
377 | 384 | ||
378 | /* | ||
379 | * If we are changing the size of the file, then | ||
380 | * we need to break all leases. | ||
381 | */ | ||
382 | host_err = break_lease(inode, O_WRONLY | O_NONBLOCK); | ||
383 | if (host_err == -EWOULDBLOCK) | ||
384 | host_err = -ETIMEDOUT; | ||
385 | if (host_err) /* ENOMEM or EWOULDBLOCK */ | ||
386 | goto out_nfserr; | ||
387 | |||
388 | host_err = get_write_access(inode); | 385 | host_err = get_write_access(inode); |
389 | if (host_err) | 386 | if (host_err) |
390 | goto out_nfserr; | 387 | goto out_nfserr; |
@@ -425,7 +422,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, | |||
425 | 422 | ||
426 | err = nfserr_notsync; | 423 | err = nfserr_notsync; |
427 | if (!check_guard || guardtime == inode->i_ctime.tv_sec) { | 424 | if (!check_guard || guardtime == inode->i_ctime.tv_sec) { |
425 | host_err = nfsd_break_lease(inode); | ||
426 | if (host_err) | ||
427 | goto out_nfserr; | ||
428 | fh_lock(fhp); | 428 | fh_lock(fhp); |
429 | |||
429 | host_err = notify_change(dentry, iap); | 430 | host_err = notify_change(dentry, iap); |
430 | err = nfserrno(host_err); | 431 | err = nfserrno(host_err); |
431 | fh_unlock(fhp); | 432 | fh_unlock(fhp); |
@@ -752,8 +753,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, | |||
752 | */ | 753 | */ |
753 | if (!(access & NFSD_MAY_NOT_BREAK_LEASE)) | 754 | if (!(access & NFSD_MAY_NOT_BREAK_LEASE)) |
754 | host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0)); | 755 | host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0)); |
755 | if (host_err == -EWOULDBLOCK) | ||
756 | host_err = -ETIMEDOUT; | ||
757 | if (host_err) /* NOMEM or WOULDBLOCK */ | 756 | if (host_err) /* NOMEM or WOULDBLOCK */ |
758 | goto out_nfserr; | 757 | goto out_nfserr; |
759 | 758 | ||
@@ -874,15 +873,6 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe, | |||
874 | return __splice_from_pipe(pipe, sd, nfsd_splice_actor); | 873 | return __splice_from_pipe(pipe, sd, nfsd_splice_actor); |
875 | } | 874 | } |
876 | 875 | ||
877 | static inline int svc_msnfs(struct svc_fh *ffhp) | ||
878 | { | ||
879 | #ifdef MSNFS | ||
880 | return (ffhp->fh_export->ex_flags & NFSEXP_MSNFS); | ||
881 | #else | ||
882 | return 0; | ||
883 | #endif | ||
884 | } | ||
885 | |||
886 | static __be32 | 876 | static __be32 |
887 | nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | 877 | nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, |
888 | loff_t offset, struct kvec *vec, int vlen, unsigned long *count) | 878 | loff_t offset, struct kvec *vec, int vlen, unsigned long *count) |
@@ -895,9 +885,6 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
895 | err = nfserr_perm; | 885 | err = nfserr_perm; |
896 | inode = file->f_path.dentry->d_inode; | 886 | inode = file->f_path.dentry->d_inode; |
897 | 887 | ||
898 | if (svc_msnfs(fhp) && !lock_may_read(inode, offset, *count)) | ||
899 | goto out; | ||
900 | |||
901 | if (file->f_op->splice_read && rqstp->rq_splice_ok) { | 888 | if (file->f_op->splice_read && rqstp->rq_splice_ok) { |
902 | struct splice_desc sd = { | 889 | struct splice_desc sd = { |
903 | .len = 0, | 890 | .len = 0, |
@@ -922,7 +909,6 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
922 | fsnotify_access(file); | 909 | fsnotify_access(file); |
923 | } else | 910 | } else |
924 | err = nfserrno(host_err); | 911 | err = nfserrno(host_err); |
925 | out: | ||
926 | return err; | 912 | return err; |
927 | } | 913 | } |
928 | 914 | ||
@@ -987,14 +973,6 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
987 | int stable = *stablep; | 973 | int stable = *stablep; |
988 | int use_wgather; | 974 | int use_wgather; |
989 | 975 | ||
990 | #ifdef MSNFS | ||
991 | err = nfserr_perm; | ||
992 | |||
993 | if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && | ||
994 | (!lock_may_write(file->f_path.dentry->d_inode, offset, *cnt))) | ||
995 | goto out; | ||
996 | #endif | ||
997 | |||
998 | dentry = file->f_path.dentry; | 976 | dentry = file->f_path.dentry; |
999 | inode = dentry->d_inode; | 977 | inode = dentry->d_inode; |
1000 | exp = fhp->fh_export; | 978 | exp = fhp->fh_export; |
@@ -1045,7 +1023,6 @@ out_nfserr: | |||
1045 | err = 0; | 1023 | err = 0; |
1046 | else | 1024 | else |
1047 | err = nfserrno(host_err); | 1025 | err = nfserrno(host_err); |
1048 | out: | ||
1049 | return err; | 1026 | return err; |
1050 | } | 1027 | } |
1051 | 1028 | ||
@@ -1665,6 +1642,12 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, | |||
1665 | err = nfserrno(host_err); | 1642 | err = nfserrno(host_err); |
1666 | goto out_dput; | 1643 | goto out_dput; |
1667 | } | 1644 | } |
1645 | err = nfserr_noent; | ||
1646 | if (!dold->d_inode) | ||
1647 | goto out_drop_write; | ||
1648 | host_err = nfsd_break_lease(dold->d_inode); | ||
1649 | if (host_err) | ||
1650 | goto out_drop_write; | ||
1668 | host_err = vfs_link(dold, dirp, dnew); | 1651 | host_err = vfs_link(dold, dirp, dnew); |
1669 | if (!host_err) { | 1652 | if (!host_err) { |
1670 | err = nfserrno(commit_metadata(ffhp)); | 1653 | err = nfserrno(commit_metadata(ffhp)); |
@@ -1676,6 +1659,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, | |||
1676 | else | 1659 | else |
1677 | err = nfserrno(host_err); | 1660 | err = nfserrno(host_err); |
1678 | } | 1661 | } |
1662 | out_drop_write: | ||
1679 | mnt_drop_write(tfhp->fh_export->ex_path.mnt); | 1663 | mnt_drop_write(tfhp->fh_export->ex_path.mnt); |
1680 | out_dput: | 1664 | out_dput: |
1681 | dput(dnew); | 1665 | dput(dnew); |
@@ -1750,12 +1734,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, | |||
1750 | if (ndentry == trap) | 1734 | if (ndentry == trap) |
1751 | goto out_dput_new; | 1735 | goto out_dput_new; |
1752 | 1736 | ||
1753 | if (svc_msnfs(ffhp) && | ||
1754 | ((odentry->d_count > 1) || (ndentry->d_count > 1))) { | ||
1755 | host_err = -EPERM; | ||
1756 | goto out_dput_new; | ||
1757 | } | ||
1758 | |||
1759 | host_err = -EXDEV; | 1737 | host_err = -EXDEV; |
1760 | if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt) | 1738 | if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt) |
1761 | goto out_dput_new; | 1739 | goto out_dput_new; |
@@ -1763,15 +1741,17 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, | |||
1763 | if (host_err) | 1741 | if (host_err) |
1764 | goto out_dput_new; | 1742 | goto out_dput_new; |
1765 | 1743 | ||
1744 | host_err = nfsd_break_lease(odentry->d_inode); | ||
1745 | if (host_err) | ||
1746 | goto out_drop_write; | ||
1766 | host_err = vfs_rename(fdir, odentry, tdir, ndentry); | 1747 | host_err = vfs_rename(fdir, odentry, tdir, ndentry); |
1767 | if (!host_err) { | 1748 | if (!host_err) { |
1768 | host_err = commit_metadata(tfhp); | 1749 | host_err = commit_metadata(tfhp); |
1769 | if (!host_err) | 1750 | if (!host_err) |
1770 | host_err = commit_metadata(ffhp); | 1751 | host_err = commit_metadata(ffhp); |
1771 | } | 1752 | } |
1772 | 1753 | out_drop_write: | |
1773 | mnt_drop_write(ffhp->fh_export->ex_path.mnt); | 1754 | mnt_drop_write(ffhp->fh_export->ex_path.mnt); |
1774 | |||
1775 | out_dput_new: | 1755 | out_dput_new: |
1776 | dput(ndentry); | 1756 | dput(ndentry); |
1777 | out_dput_old: | 1757 | out_dput_old: |
@@ -1834,18 +1814,14 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, | |||
1834 | if (host_err) | 1814 | if (host_err) |
1835 | goto out_nfserr; | 1815 | goto out_nfserr; |
1836 | 1816 | ||
1837 | if (type != S_IFDIR) { /* It's UNLINK */ | 1817 | host_err = nfsd_break_lease(rdentry->d_inode); |
1838 | #ifdef MSNFS | 1818 | if (host_err) |
1839 | if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && | 1819 | goto out_put; |
1840 | (rdentry->d_count > 1)) { | 1820 | if (type != S_IFDIR) |
1841 | host_err = -EPERM; | ||
1842 | } else | ||
1843 | #endif | ||
1844 | host_err = vfs_unlink(dirp, rdentry); | 1821 | host_err = vfs_unlink(dirp, rdentry); |
1845 | } else { /* It's RMDIR */ | 1822 | else |
1846 | host_err = vfs_rmdir(dirp, rdentry); | 1823 | host_err = vfs_rmdir(dirp, rdentry); |
1847 | } | 1824 | out_put: |
1848 | |||
1849 | dput(rdentry); | 1825 | dput(rdentry); |
1850 | 1826 | ||
1851 | if (!host_err) | 1827 | if (!host_err) |
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 60fce3dc5cb5..366401e1a536 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h | |||
@@ -311,6 +311,11 @@ struct nfsd4_secinfo { | |||
311 | struct svc_export *si_exp; /* response */ | 311 | struct svc_export *si_exp; /* response */ |
312 | }; | 312 | }; |
313 | 313 | ||
314 | struct nfsd4_secinfo_no_name { | ||
315 | u32 sin_style; /* request */ | ||
316 | struct svc_export *sin_exp; /* response */ | ||
317 | }; | ||
318 | |||
314 | struct nfsd4_setattr { | 319 | struct nfsd4_setattr { |
315 | stateid_t sa_stateid; /* request */ | 320 | stateid_t sa_stateid; /* request */ |
316 | u32 sa_bmval[3]; /* request */ | 321 | u32 sa_bmval[3]; /* request */ |
@@ -373,8 +378,8 @@ struct nfsd4_sequence { | |||
373 | u32 cachethis; /* request */ | 378 | u32 cachethis; /* request */ |
374 | #if 0 | 379 | #if 0 |
375 | u32 target_maxslots; /* response */ | 380 | u32 target_maxslots; /* response */ |
376 | u32 status_flags; /* response */ | ||
377 | #endif /* not yet */ | 381 | #endif /* not yet */ |
382 | u32 status_flags; /* response */ | ||
378 | }; | 383 | }; |
379 | 384 | ||
380 | struct nfsd4_destroy_session { | 385 | struct nfsd4_destroy_session { |
@@ -422,6 +427,7 @@ struct nfsd4_op { | |||
422 | 427 | ||
423 | /* NFSv4.1 */ | 428 | /* NFSv4.1 */ |
424 | struct nfsd4_exchange_id exchange_id; | 429 | struct nfsd4_exchange_id exchange_id; |
430 | struct nfsd4_bind_conn_to_session bind_conn_to_session; | ||
425 | struct nfsd4_create_session create_session; | 431 | struct nfsd4_create_session create_session; |
426 | struct nfsd4_destroy_session destroy_session; | 432 | struct nfsd4_destroy_session destroy_session; |
427 | struct nfsd4_sequence sequence; | 433 | struct nfsd4_sequence sequence; |
@@ -518,6 +524,7 @@ extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, | |||
518 | struct nfsd4_sequence *seq); | 524 | struct nfsd4_sequence *seq); |
519 | extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, | 525 | extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, |
520 | struct nfsd4_compound_state *, struct nfsd4_exchange_id *); | 526 | struct nfsd4_compound_state *, struct nfsd4_exchange_id *); |
527 | extern __be32 nfsd4_bind_conn_to_session(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_bind_conn_to_session *); | ||
521 | extern __be32 nfsd4_create_session(struct svc_rqst *, | 528 | extern __be32 nfsd4_create_session(struct svc_rqst *, |
522 | struct nfsd4_compound_state *, | 529 | struct nfsd4_compound_state *, |
523 | struct nfsd4_create_session *); | 530 | struct nfsd4_create_session *); |
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 0994f6a76c07..58fd707174e1 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c | |||
@@ -704,7 +704,8 @@ skip_mount_setup: | |||
704 | sbp[0]->s_state = | 704 | sbp[0]->s_state = |
705 | cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS); | 705 | cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS); |
706 | /* synchronize sbp[1] with sbp[0] */ | 706 | /* synchronize sbp[1] with sbp[0] */ |
707 | memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); | 707 | if (sbp[1]) |
708 | memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); | ||
708 | return nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL); | 709 | return nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL); |
709 | } | 710 | } |
710 | 711 | ||
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index b572b6727181..326e7475a22a 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /** | 1 | /** |
2 | * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project. | 2 | * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project. |
3 | * | 3 | * |
4 | * Copyright (c) 2001-2006 Anton Altaparmakov | 4 | * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc. |
5 | * Copyright (c) 2002 Richard Russon | 5 | * Copyright (c) 2002 Richard Russon |
6 | * | 6 | * |
7 | * This program/include file is free software; you can redistribute it and/or | 7 | * This program/include file is free software; you can redistribute it and/or |
@@ -2576,6 +2576,8 @@ mft_rec_already_initialized: | |||
2576 | flush_dcache_page(page); | 2576 | flush_dcache_page(page); |
2577 | SetPageUptodate(page); | 2577 | SetPageUptodate(page); |
2578 | if (base_ni) { | 2578 | if (base_ni) { |
2579 | MFT_RECORD *m_tmp; | ||
2580 | |||
2579 | /* | 2581 | /* |
2580 | * Setup the base mft record in the extent mft record. This | 2582 | * Setup the base mft record in the extent mft record. This |
2581 | * completes initialization of the allocated extent mft record | 2583 | * completes initialization of the allocated extent mft record |
@@ -2588,11 +2590,11 @@ mft_rec_already_initialized: | |||
2588 | * attach it to the base inode @base_ni and map, pin, and lock | 2590 | * attach it to the base inode @base_ni and map, pin, and lock |
2589 | * its, i.e. the allocated, mft record. | 2591 | * its, i.e. the allocated, mft record. |
2590 | */ | 2592 | */ |
2591 | m = map_extent_mft_record(base_ni, bit, &ni); | 2593 | m_tmp = map_extent_mft_record(base_ni, bit, &ni); |
2592 | if (IS_ERR(m)) { | 2594 | if (IS_ERR(m_tmp)) { |
2593 | ntfs_error(vol->sb, "Failed to map allocated extent " | 2595 | ntfs_error(vol->sb, "Failed to map allocated extent " |
2594 | "mft record 0x%llx.", (long long)bit); | 2596 | "mft record 0x%llx.", (long long)bit); |
2595 | err = PTR_ERR(m); | 2597 | err = PTR_ERR(m_tmp); |
2596 | /* Set the mft record itself not in use. */ | 2598 | /* Set the mft record itself not in use. */ |
2597 | m->flags &= cpu_to_le16( | 2599 | m->flags &= cpu_to_le16( |
2598 | ~le16_to_cpu(MFT_RECORD_IN_USE)); | 2600 | ~le16_to_cpu(MFT_RECORD_IN_USE)); |
@@ -2603,6 +2605,7 @@ mft_rec_already_initialized: | |||
2603 | ntfs_unmap_page(page); | 2605 | ntfs_unmap_page(page); |
2604 | goto undo_mftbmp_alloc; | 2606 | goto undo_mftbmp_alloc; |
2605 | } | 2607 | } |
2608 | BUG_ON(m != m_tmp); | ||
2606 | /* | 2609 | /* |
2607 | * Make sure the allocated mft record is written out to disk. | 2610 | * Make sure the allocated mft record is written out to disk. |
2608 | * No need to set the inode dirty because the caller is going | 2611 | * No need to set the inode dirty because the caller is going |
diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig index ab152c00cd3a..77a8de5f7119 100644 --- a/fs/ocfs2/Kconfig +++ b/fs/ocfs2/Kconfig | |||
@@ -1,7 +1,6 @@ | |||
1 | config OCFS2_FS | 1 | config OCFS2_FS |
2 | tristate "OCFS2 file system support" | 2 | tristate "OCFS2 file system support" |
3 | depends on NET && SYSFS | 3 | depends on NET && SYSFS && CONFIGFS_FS |
4 | select CONFIGFS_FS | ||
5 | select JBD2 | 4 | select JBD2 |
6 | select CRC32 | 5 | select CRC32 |
7 | select QUOTA | 6 | select QUOTA |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 63e3fca266e0..a6651956482e 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -1989,20 +1989,20 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd, | |||
1989 | return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0); | 1989 | return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0); |
1990 | } | 1990 | } |
1991 | 1991 | ||
1992 | static long ocfs2_fallocate(struct inode *inode, int mode, loff_t offset, | 1992 | static long ocfs2_fallocate(struct file *file, int mode, loff_t offset, |
1993 | loff_t len) | 1993 | loff_t len) |
1994 | { | 1994 | { |
1995 | struct inode *inode = file->f_path.dentry->d_inode; | ||
1995 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1996 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1996 | struct ocfs2_space_resv sr; | 1997 | struct ocfs2_space_resv sr; |
1997 | int change_size = 1; | 1998 | int change_size = 1; |
1998 | int cmd = OCFS2_IOC_RESVSP64; | 1999 | int cmd = OCFS2_IOC_RESVSP64; |
1999 | 2000 | ||
2001 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) | ||
2002 | return -EOPNOTSUPP; | ||
2000 | if (!ocfs2_writes_unwritten_extents(osb)) | 2003 | if (!ocfs2_writes_unwritten_extents(osb)) |
2001 | return -EOPNOTSUPP; | 2004 | return -EOPNOTSUPP; |
2002 | 2005 | ||
2003 | if (S_ISDIR(inode->i_mode)) | ||
2004 | return -ENODEV; | ||
2005 | |||
2006 | if (mode & FALLOC_FL_KEEP_SIZE) | 2006 | if (mode & FALLOC_FL_KEEP_SIZE) |
2007 | change_size = 0; | 2007 | change_size = 0; |
2008 | 2008 | ||
@@ -2610,7 +2610,6 @@ const struct inode_operations ocfs2_file_iops = { | |||
2610 | .getxattr = generic_getxattr, | 2610 | .getxattr = generic_getxattr, |
2611 | .listxattr = ocfs2_listxattr, | 2611 | .listxattr = ocfs2_listxattr, |
2612 | .removexattr = generic_removexattr, | 2612 | .removexattr = generic_removexattr, |
2613 | .fallocate = ocfs2_fallocate, | ||
2614 | .fiemap = ocfs2_fiemap, | 2613 | .fiemap = ocfs2_fiemap, |
2615 | }; | 2614 | }; |
2616 | 2615 | ||
@@ -2642,6 +2641,7 @@ const struct file_operations ocfs2_fops = { | |||
2642 | .flock = ocfs2_flock, | 2641 | .flock = ocfs2_flock, |
2643 | .splice_read = ocfs2_file_splice_read, | 2642 | .splice_read = ocfs2_file_splice_read, |
2644 | .splice_write = ocfs2_file_splice_write, | 2643 | .splice_write = ocfs2_file_splice_write, |
2644 | .fallocate = ocfs2_fallocate, | ||
2645 | }; | 2645 | }; |
2646 | 2646 | ||
2647 | const struct file_operations ocfs2_dops = { | 2647 | const struct file_operations ocfs2_dops = { |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 06d1f749ca89..38f986d2447e 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -993,8 +993,7 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb) | |||
993 | } | 993 | } |
994 | 994 | ||
995 | /* Handle quota on quotactl */ | 995 | /* Handle quota on quotactl */ |
996 | static int ocfs2_quota_on(struct super_block *sb, int type, int format_id, | 996 | static int ocfs2_quota_on(struct super_block *sb, int type, int format_id) |
997 | char *path) | ||
998 | { | 997 | { |
999 | unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, | 998 | unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, |
1000 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; | 999 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; |
@@ -1013,7 +1012,7 @@ static int ocfs2_quota_off(struct super_block *sb, int type) | |||
1013 | } | 1012 | } |
1014 | 1013 | ||
1015 | static const struct quotactl_ops ocfs2_quotactl_ops = { | 1014 | static const struct quotactl_ops ocfs2_quotactl_ops = { |
1016 | .quota_on = ocfs2_quota_on, | 1015 | .quota_on_meta = ocfs2_quota_on, |
1017 | .quota_off = ocfs2_quota_off, | 1016 | .quota_off = ocfs2_quota_off, |
1018 | .quota_sync = dquot_quota_sync, | 1017 | .quota_sync = dquot_quota_sync, |
1019 | .get_info = dquot_get_dqinfo, | 1018 | .get_info = dquot_get_dqinfo, |
@@ -255,10 +255,10 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
255 | if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) | 255 | if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) |
256 | return -EFBIG; | 256 | return -EFBIG; |
257 | 257 | ||
258 | if (!inode->i_op->fallocate) | 258 | if (!file->f_op->fallocate) |
259 | return -EOPNOTSUPP; | 259 | return -EOPNOTSUPP; |
260 | 260 | ||
261 | return inode->i_op->fallocate(inode, mode, offset, len); | 261 | return file->f_op->fallocate(file, mode, offset, len); |
262 | } | 262 | } |
263 | 263 | ||
264 | SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) | 264 | SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) |
@@ -790,6 +790,8 @@ struct file *nameidata_to_filp(struct nameidata *nd) | |||
790 | 790 | ||
791 | /* Pick up the filp from the open intent */ | 791 | /* Pick up the filp from the open intent */ |
792 | filp = nd->intent.open.file; | 792 | filp = nd->intent.open.file; |
793 | nd->intent.open.file = NULL; | ||
794 | |||
793 | /* Has the filesystem initialised the file for us? */ | 795 | /* Has the filesystem initialised the file for us? */ |
794 | if (filp->f_path.dentry == NULL) { | 796 | if (filp->f_path.dentry == NULL) { |
795 | path_get(&nd->path); | 797 | path_get(&nd->path); |
@@ -441,7 +441,7 @@ redo: | |||
441 | break; | 441 | break; |
442 | } | 442 | } |
443 | if (do_wakeup) { | 443 | if (do_wakeup) { |
444 | wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT); | 444 | wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | POLLWRNORM); |
445 | kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); | 445 | kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); |
446 | } | 446 | } |
447 | pipe_wait(pipe); | 447 | pipe_wait(pipe); |
@@ -450,7 +450,7 @@ redo: | |||
450 | 450 | ||
451 | /* Signal writers asynchronously that there is more room. */ | 451 | /* Signal writers asynchronously that there is more room. */ |
452 | if (do_wakeup) { | 452 | if (do_wakeup) { |
453 | wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT); | 453 | wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | POLLWRNORM); |
454 | kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); | 454 | kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); |
455 | } | 455 | } |
456 | if (ret > 0) | 456 | if (ret > 0) |
@@ -612,7 +612,7 @@ redo2: | |||
612 | break; | 612 | break; |
613 | } | 613 | } |
614 | if (do_wakeup) { | 614 | if (do_wakeup) { |
615 | wake_up_interruptible_sync_poll(&pipe->wait, POLLIN); | 615 | wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM); |
616 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | 616 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); |
617 | do_wakeup = 0; | 617 | do_wakeup = 0; |
618 | } | 618 | } |
@@ -623,7 +623,7 @@ redo2: | |||
623 | out: | 623 | out: |
624 | mutex_unlock(&inode->i_mutex); | 624 | mutex_unlock(&inode->i_mutex); |
625 | if (do_wakeup) { | 625 | if (do_wakeup) { |
626 | wake_up_interruptible_sync_poll(&pipe->wait, POLLIN); | 626 | wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM); |
627 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | 627 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); |
628 | } | 628 | } |
629 | if (ret > 0) | 629 | if (ret > 0) |
@@ -715,7 +715,7 @@ pipe_release(struct inode *inode, int decr, int decw) | |||
715 | if (!pipe->readers && !pipe->writers) { | 715 | if (!pipe->readers && !pipe->writers) { |
716 | free_pipe_info(inode); | 716 | free_pipe_info(inode); |
717 | } else { | 717 | } else { |
718 | wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT); | 718 | wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM | POLLERR | POLLHUP); |
719 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | 719 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); |
720 | kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); | 720 | kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); |
721 | } | 721 | } |
@@ -1292,7 +1292,7 @@ static int __init init_pipe_fs(void) | |||
1292 | static void __exit exit_pipe_fs(void) | 1292 | static void __exit exit_pipe_fs(void) |
1293 | { | 1293 | { |
1294 | unregister_filesystem(&pipe_fs_type); | 1294 | unregister_filesystem(&pipe_fs_type); |
1295 | mntput_long(pipe_mnt); | 1295 | mntput(pipe_mnt); |
1296 | } | 1296 | } |
1297 | 1297 | ||
1298 | fs_initcall(init_pipe_fs); | 1298 | fs_initcall(init_pipe_fs); |
diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 39df95a0ec25..b1cf6bf4b41d 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c | |||
@@ -22,6 +22,7 @@ | |||
22 | 22 | ||
23 | #include <linux/errno.h> | 23 | #include <linux/errno.h> |
24 | 24 | ||
25 | EXPORT_SYMBOL(posix_acl_init); | ||
25 | EXPORT_SYMBOL(posix_acl_alloc); | 26 | EXPORT_SYMBOL(posix_acl_alloc); |
26 | EXPORT_SYMBOL(posix_acl_clone); | 27 | EXPORT_SYMBOL(posix_acl_clone); |
27 | EXPORT_SYMBOL(posix_acl_valid); | 28 | EXPORT_SYMBOL(posix_acl_valid); |
@@ -32,6 +33,16 @@ EXPORT_SYMBOL(posix_acl_chmod_masq); | |||
32 | EXPORT_SYMBOL(posix_acl_permission); | 33 | EXPORT_SYMBOL(posix_acl_permission); |
33 | 34 | ||
34 | /* | 35 | /* |
36 | * Init a fresh posix_acl | ||
37 | */ | ||
38 | void | ||
39 | posix_acl_init(struct posix_acl *acl, int count) | ||
40 | { | ||
41 | atomic_set(&acl->a_refcount, 1); | ||
42 | acl->a_count = count; | ||
43 | } | ||
44 | |||
45 | /* | ||
35 | * Allocate a new ACL with the specified number of entries. | 46 | * Allocate a new ACL with the specified number of entries. |
36 | */ | 47 | */ |
37 | struct posix_acl * | 48 | struct posix_acl * |
@@ -40,10 +51,8 @@ posix_acl_alloc(int count, gfp_t flags) | |||
40 | const size_t size = sizeof(struct posix_acl) + | 51 | const size_t size = sizeof(struct posix_acl) + |
41 | count * sizeof(struct posix_acl_entry); | 52 | count * sizeof(struct posix_acl_entry); |
42 | struct posix_acl *acl = kmalloc(size, flags); | 53 | struct posix_acl *acl = kmalloc(size, flags); |
43 | if (acl) { | 54 | if (acl) |
44 | atomic_set(&acl->a_refcount, 1); | 55 | posix_acl_init(acl, count); |
45 | acl->a_count = count; | ||
46 | } | ||
47 | return acl; | 56 | return acl; |
48 | } | 57 | } |
49 | 58 | ||
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 6a0068841d96..15af6222f8a4 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig | |||
@@ -1,5 +1,5 @@ | |||
1 | config PROC_FS | 1 | config PROC_FS |
2 | bool "/proc file system support" if EMBEDDED | 2 | bool "/proc file system support" if EXPERT |
3 | default y | 3 | default y |
4 | help | 4 | help |
5 | This is a virtual file system providing information about the status | 5 | This is a virtual file system providing information about the status |
@@ -40,7 +40,7 @@ config PROC_VMCORE | |||
40 | Exports the dump image of crashed kernel in ELF format. | 40 | Exports the dump image of crashed kernel in ELF format. |
41 | 41 | ||
42 | config PROC_SYSCTL | 42 | config PROC_SYSCTL |
43 | bool "Sysctl support (/proc/sys)" if EMBEDDED | 43 | bool "Sysctl support (/proc/sys)" if EXPERT |
44 | depends on PROC_FS | 44 | depends on PROC_FS |
45 | select SYSCTL | 45 | select SYSCTL |
46 | default y | 46 | default y |
@@ -61,7 +61,7 @@ config PROC_SYSCTL | |||
61 | config PROC_PAGE_MONITOR | 61 | config PROC_PAGE_MONITOR |
62 | default y | 62 | default y |
63 | depends on PROC_FS && MMU | 63 | depends on PROC_FS && MMU |
64 | bool "Enable /proc page monitoring" if EMBEDDED | 64 | bool "Enable /proc page monitoring" if EXPERT |
65 | help | 65 | help |
66 | Various /proc files exist to monitor process memory utilization: | 66 | Various /proc files exist to monitor process memory utilization: |
67 | /proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap, | 67 | /proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap, |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 93f1cdd5d3d7..9d096e82b201 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -1151,7 +1151,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, | |||
1151 | goto err_task_lock; | 1151 | goto err_task_lock; |
1152 | } | 1152 | } |
1153 | 1153 | ||
1154 | if (oom_score_adj < task->signal->oom_score_adj && | 1154 | if (oom_score_adj < task->signal->oom_score_adj_min && |
1155 | !capable(CAP_SYS_RESOURCE)) { | 1155 | !capable(CAP_SYS_RESOURCE)) { |
1156 | err = -EACCES; | 1156 | err = -EACCES; |
1157 | goto err_sighand; | 1157 | goto err_sighand; |
@@ -1164,6 +1164,8 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, | |||
1164 | atomic_dec(&task->mm->oom_disable_count); | 1164 | atomic_dec(&task->mm->oom_disable_count); |
1165 | } | 1165 | } |
1166 | task->signal->oom_score_adj = oom_score_adj; | 1166 | task->signal->oom_score_adj = oom_score_adj; |
1167 | if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) | ||
1168 | task->signal->oom_score_adj_min = oom_score_adj; | ||
1167 | /* | 1169 | /* |
1168 | * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is | 1170 | * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is |
1169 | * always attainable. | 1171 | * always attainable. |
diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c index eafc22ab1fdd..b701eaa482bf 100644 --- a/fs/proc/consoles.c +++ b/fs/proc/consoles.c | |||
@@ -67,7 +67,7 @@ static void *c_start(struct seq_file *m, loff_t *pos) | |||
67 | struct console *con; | 67 | struct console *con; |
68 | loff_t off = 0; | 68 | loff_t off = 0; |
69 | 69 | ||
70 | acquire_console_sem(); | 70 | console_lock(); |
71 | for_each_console(con) | 71 | for_each_console(con) |
72 | if (off++ == *pos) | 72 | if (off++ == *pos) |
73 | break; | 73 | break; |
@@ -84,7 +84,7 @@ static void *c_next(struct seq_file *m, void *v, loff_t *pos) | |||
84 | 84 | ||
85 | static void c_stop(struct seq_file *m, void *v) | 85 | static void c_stop(struct seq_file *m, void *v) |
86 | { | 86 | { |
87 | release_console_sem(); | 87 | console_unlock(); |
88 | } | 88 | } |
89 | 89 | ||
90 | static const struct seq_operations consoles_op = { | 90 | static const struct seq_operations consoles_op = { |
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index a65239cfd97e..ed257d141568 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c | |||
@@ -101,6 +101,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) | |||
101 | #ifdef CONFIG_MEMORY_FAILURE | 101 | #ifdef CONFIG_MEMORY_FAILURE |
102 | "HardwareCorrupted: %5lu kB\n" | 102 | "HardwareCorrupted: %5lu kB\n" |
103 | #endif | 103 | #endif |
104 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
105 | "AnonHugePages: %8lu kB\n" | ||
106 | #endif | ||
104 | , | 107 | , |
105 | K(i.totalram), | 108 | K(i.totalram), |
106 | K(i.freeram), | 109 | K(i.freeram), |
@@ -128,7 +131,12 @@ static int meminfo_proc_show(struct seq_file *m, void *v) | |||
128 | K(i.freeswap), | 131 | K(i.freeswap), |
129 | K(global_page_state(NR_FILE_DIRTY)), | 132 | K(global_page_state(NR_FILE_DIRTY)), |
130 | K(global_page_state(NR_WRITEBACK)), | 133 | K(global_page_state(NR_WRITEBACK)), |
131 | K(global_page_state(NR_ANON_PAGES)), | 134 | K(global_page_state(NR_ANON_PAGES) |
135 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
136 | + global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * | ||
137 | HPAGE_PMD_NR | ||
138 | #endif | ||
139 | ), | ||
132 | K(global_page_state(NR_FILE_MAPPED)), | 140 | K(global_page_state(NR_FILE_MAPPED)), |
133 | K(global_page_state(NR_SHMEM)), | 141 | K(global_page_state(NR_SHMEM)), |
134 | K(global_page_state(NR_SLAB_RECLAIMABLE) + | 142 | K(global_page_state(NR_SLAB_RECLAIMABLE) + |
@@ -151,6 +159,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) | |||
151 | #ifdef CONFIG_MEMORY_FAILURE | 159 | #ifdef CONFIG_MEMORY_FAILURE |
152 | ,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10) | 160 | ,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10) |
153 | #endif | 161 | #endif |
162 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
163 | ,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * | ||
164 | HPAGE_PMD_NR) | ||
165 | #endif | ||
154 | ); | 166 | ); |
155 | 167 | ||
156 | hugetlb_report_meminfo(m); | 168 | hugetlb_report_meminfo(m); |
diff --git a/fs/proc/page.c b/fs/proc/page.c index b06c674624e6..6d8e6a9e93ab 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c | |||
@@ -116,15 +116,17 @@ u64 stable_page_flags(struct page *page) | |||
116 | if (PageHuge(page)) | 116 | if (PageHuge(page)) |
117 | u |= 1 << KPF_HUGE; | 117 | u |= 1 << KPF_HUGE; |
118 | 118 | ||
119 | u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); | ||
120 | |||
121 | /* | 119 | /* |
122 | * Caveats on high order pages: | 120 | * Caveats on high order pages: page->_count will only be set |
123 | * PG_buddy will only be set on the head page; SLUB/SLQB do the same | 121 | * -1 on the head page; SLUB/SLQB do the same for PG_slab; |
124 | * for PG_slab; SLOB won't set PG_slab at all on compound pages. | 122 | * SLOB won't set PG_slab at all on compound pages. |
125 | */ | 123 | */ |
124 | if (PageBuddy(page)) | ||
125 | u |= 1 << KPF_BUDDY; | ||
126 | |||
127 | u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); | ||
128 | |||
126 | u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); | 129 | u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); |
127 | u |= kpf_copy_bit(k, KPF_BUDDY, PG_buddy); | ||
128 | 130 | ||
129 | u |= kpf_copy_bit(k, KPF_ERROR, PG_error); | 131 | u |= kpf_copy_bit(k, KPF_ERROR, PG_error); |
130 | u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty); | 132 | u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty); |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index c3755bd8dd3e..60b914860f81 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -418,7 +418,8 @@ static int show_smap(struct seq_file *m, void *v) | |||
418 | "Anonymous: %8lu kB\n" | 418 | "Anonymous: %8lu kB\n" |
419 | "Swap: %8lu kB\n" | 419 | "Swap: %8lu kB\n" |
420 | "KernelPageSize: %8lu kB\n" | 420 | "KernelPageSize: %8lu kB\n" |
421 | "MMUPageSize: %8lu kB\n", | 421 | "MMUPageSize: %8lu kB\n" |
422 | "Locked: %8lu kB\n", | ||
422 | (vma->vm_end - vma->vm_start) >> 10, | 423 | (vma->vm_end - vma->vm_start) >> 10, |
423 | mss.resident >> 10, | 424 | mss.resident >> 10, |
424 | (unsigned long)(mss.pss >> (10 + PSS_SHIFT)), | 425 | (unsigned long)(mss.pss >> (10 + PSS_SHIFT)), |
@@ -430,7 +431,9 @@ static int show_smap(struct seq_file *m, void *v) | |||
430 | mss.anonymous >> 10, | 431 | mss.anonymous >> 10, |
431 | mss.swap >> 10, | 432 | mss.swap >> 10, |
432 | vma_kernel_pagesize(vma) >> 10, | 433 | vma_kernel_pagesize(vma) >> 10, |
433 | vma_mmu_pagesize(vma) >> 10); | 434 | vma_mmu_pagesize(vma) >> 10, |
435 | (vma->vm_flags & VM_LOCKED) ? | ||
436 | (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); | ||
434 | 437 | ||
435 | if (m->count < m->size) /* vma is copied successfully */ | 438 | if (m->count < m->size) /* vma is copied successfully */ |
436 | m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; | 439 | m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; |
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 84becd3e4772..a2a622e079f0 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -2189,8 +2189,8 @@ int dquot_resume(struct super_block *sb, int type) | |||
2189 | } | 2189 | } |
2190 | EXPORT_SYMBOL(dquot_resume); | 2190 | EXPORT_SYMBOL(dquot_resume); |
2191 | 2191 | ||
2192 | int dquot_quota_on_path(struct super_block *sb, int type, int format_id, | 2192 | int dquot_quota_on(struct super_block *sb, int type, int format_id, |
2193 | struct path *path) | 2193 | struct path *path) |
2194 | { | 2194 | { |
2195 | int error = security_quota_on(path->dentry); | 2195 | int error = security_quota_on(path->dentry); |
2196 | if (error) | 2196 | if (error) |
@@ -2204,20 +2204,6 @@ int dquot_quota_on_path(struct super_block *sb, int type, int format_id, | |||
2204 | DQUOT_LIMITS_ENABLED); | 2204 | DQUOT_LIMITS_ENABLED); |
2205 | return error; | 2205 | return error; |
2206 | } | 2206 | } |
2207 | EXPORT_SYMBOL(dquot_quota_on_path); | ||
2208 | |||
2209 | int dquot_quota_on(struct super_block *sb, int type, int format_id, char *name) | ||
2210 | { | ||
2211 | struct path path; | ||
2212 | int error; | ||
2213 | |||
2214 | error = kern_path(name, LOOKUP_FOLLOW, &path); | ||
2215 | if (!error) { | ||
2216 | error = dquot_quota_on_path(sb, type, format_id, &path); | ||
2217 | path_put(&path); | ||
2218 | } | ||
2219 | return error; | ||
2220 | } | ||
2221 | EXPORT_SYMBOL(dquot_quota_on); | 2207 | EXPORT_SYMBOL(dquot_quota_on); |
2222 | 2208 | ||
2223 | /* | 2209 | /* |
diff --git a/fs/quota/quota.c b/fs/quota/quota.c index b299961e1edb..b34bdb25490c 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c | |||
@@ -64,18 +64,15 @@ static int quota_sync_all(int type) | |||
64 | } | 64 | } |
65 | 65 | ||
66 | static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id, | 66 | static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id, |
67 | void __user *addr) | 67 | struct path *path) |
68 | { | 68 | { |
69 | char *pathname; | 69 | if (!sb->s_qcop->quota_on && !sb->s_qcop->quota_on_meta) |
70 | int ret = -ENOSYS; | 70 | return -ENOSYS; |
71 | 71 | if (sb->s_qcop->quota_on_meta) | |
72 | pathname = getname(addr); | 72 | return sb->s_qcop->quota_on_meta(sb, type, id); |
73 | if (IS_ERR(pathname)) | 73 | if (IS_ERR(path)) |
74 | return PTR_ERR(pathname); | 74 | return PTR_ERR(path); |
75 | if (sb->s_qcop->quota_on) | 75 | return sb->s_qcop->quota_on(sb, type, id, path); |
76 | ret = sb->s_qcop->quota_on(sb, type, id, pathname); | ||
77 | putname(pathname); | ||
78 | return ret; | ||
79 | } | 76 | } |
80 | 77 | ||
81 | static int quota_getfmt(struct super_block *sb, int type, void __user *addr) | 78 | static int quota_getfmt(struct super_block *sb, int type, void __user *addr) |
@@ -241,7 +238,7 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id, | |||
241 | 238 | ||
242 | /* Copy parameters and call proper function */ | 239 | /* Copy parameters and call proper function */ |
243 | static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, | 240 | static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, |
244 | void __user *addr) | 241 | void __user *addr, struct path *path) |
245 | { | 242 | { |
246 | int ret; | 243 | int ret; |
247 | 244 | ||
@@ -256,7 +253,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, | |||
256 | 253 | ||
257 | switch (cmd) { | 254 | switch (cmd) { |
258 | case Q_QUOTAON: | 255 | case Q_QUOTAON: |
259 | return quota_quotaon(sb, type, cmd, id, addr); | 256 | return quota_quotaon(sb, type, cmd, id, path); |
260 | case Q_QUOTAOFF: | 257 | case Q_QUOTAOFF: |
261 | if (!sb->s_qcop->quota_off) | 258 | if (!sb->s_qcop->quota_off) |
262 | return -ENOSYS; | 259 | return -ENOSYS; |
@@ -335,6 +332,7 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special, | |||
335 | { | 332 | { |
336 | uint cmds, type; | 333 | uint cmds, type; |
337 | struct super_block *sb = NULL; | 334 | struct super_block *sb = NULL; |
335 | struct path path, *pathp = NULL; | ||
338 | int ret; | 336 | int ret; |
339 | 337 | ||
340 | cmds = cmd >> SUBCMDSHIFT; | 338 | cmds = cmd >> SUBCMDSHIFT; |
@@ -351,12 +349,27 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special, | |||
351 | return -ENODEV; | 349 | return -ENODEV; |
352 | } | 350 | } |
353 | 351 | ||
352 | /* | ||
353 | * Path for quotaon has to be resolved before grabbing superblock | ||
354 | * because that gets s_umount sem which is also possibly needed by path | ||
355 | * resolution (think about autofs) and thus deadlocks could arise. | ||
356 | */ | ||
357 | if (cmds == Q_QUOTAON) { | ||
358 | ret = user_path_at(AT_FDCWD, addr, LOOKUP_FOLLOW, &path); | ||
359 | if (ret) | ||
360 | pathp = ERR_PTR(ret); | ||
361 | else | ||
362 | pathp = &path; | ||
363 | } | ||
364 | |||
354 | sb = quotactl_block(special); | 365 | sb = quotactl_block(special); |
355 | if (IS_ERR(sb)) | 366 | if (IS_ERR(sb)) |
356 | return PTR_ERR(sb); | 367 | return PTR_ERR(sb); |
357 | 368 | ||
358 | ret = do_quotactl(sb, type, cmds, id, addr); | 369 | ret = do_quotactl(sb, type, cmds, id, addr, pathp); |
359 | 370 | ||
360 | drop_super(sb); | 371 | drop_super(sb); |
372 | if (pathp && !IS_ERR(pathp)) | ||
373 | path_put(pathp); | ||
361 | return ret; | 374 | return ret; |
362 | } | 375 | } |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 2575682a9ead..0aab04f46827 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -632,7 +632,7 @@ static int reiserfs_acquire_dquot(struct dquot *); | |||
632 | static int reiserfs_release_dquot(struct dquot *); | 632 | static int reiserfs_release_dquot(struct dquot *); |
633 | static int reiserfs_mark_dquot_dirty(struct dquot *); | 633 | static int reiserfs_mark_dquot_dirty(struct dquot *); |
634 | static int reiserfs_write_info(struct super_block *, int); | 634 | static int reiserfs_write_info(struct super_block *, int); |
635 | static int reiserfs_quota_on(struct super_block *, int, int, char *); | 635 | static int reiserfs_quota_on(struct super_block *, int, int, struct path *); |
636 | 636 | ||
637 | static const struct dquot_operations reiserfs_quota_operations = { | 637 | static const struct dquot_operations reiserfs_quota_operations = { |
638 | .write_dquot = reiserfs_write_dquot, | 638 | .write_dquot = reiserfs_write_dquot, |
@@ -2048,25 +2048,21 @@ static int reiserfs_quota_on_mount(struct super_block *sb, int type) | |||
2048 | * Standard function to be called on quota_on | 2048 | * Standard function to be called on quota_on |
2049 | */ | 2049 | */ |
2050 | static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, | 2050 | static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, |
2051 | char *name) | 2051 | struct path *path) |
2052 | { | 2052 | { |
2053 | int err; | 2053 | int err; |
2054 | struct path path; | ||
2055 | struct inode *inode; | 2054 | struct inode *inode; |
2056 | struct reiserfs_transaction_handle th; | 2055 | struct reiserfs_transaction_handle th; |
2057 | 2056 | ||
2058 | if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA))) | 2057 | if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA))) |
2059 | return -EINVAL; | 2058 | return -EINVAL; |
2060 | 2059 | ||
2061 | err = kern_path(name, LOOKUP_FOLLOW, &path); | ||
2062 | if (err) | ||
2063 | return err; | ||
2064 | /* Quotafile not on the same filesystem? */ | 2060 | /* Quotafile not on the same filesystem? */ |
2065 | if (path.mnt->mnt_sb != sb) { | 2061 | if (path->mnt->mnt_sb != sb) { |
2066 | err = -EXDEV; | 2062 | err = -EXDEV; |
2067 | goto out; | 2063 | goto out; |
2068 | } | 2064 | } |
2069 | inode = path.dentry->d_inode; | 2065 | inode = path->dentry->d_inode; |
2070 | /* We must not pack tails for quota files on reiserfs for quota IO to work */ | 2066 | /* We must not pack tails for quota files on reiserfs for quota IO to work */ |
2071 | if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) { | 2067 | if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) { |
2072 | err = reiserfs_unpack(inode, NULL); | 2068 | err = reiserfs_unpack(inode, NULL); |
@@ -2082,7 +2078,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, | |||
2082 | /* Journaling quota? */ | 2078 | /* Journaling quota? */ |
2083 | if (REISERFS_SB(sb)->s_qf_names[type]) { | 2079 | if (REISERFS_SB(sb)->s_qf_names[type]) { |
2084 | /* Quotafile not of fs root? */ | 2080 | /* Quotafile not of fs root? */ |
2085 | if (path.dentry->d_parent != sb->s_root) | 2081 | if (path->dentry->d_parent != sb->s_root) |
2086 | reiserfs_warning(sb, "super-6521", | 2082 | reiserfs_warning(sb, "super-6521", |
2087 | "Quota file not on filesystem root. " | 2083 | "Quota file not on filesystem root. " |
2088 | "Journalled quota will not work."); | 2084 | "Journalled quota will not work."); |
@@ -2101,9 +2097,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, | |||
2101 | if (err) | 2097 | if (err) |
2102 | goto out; | 2098 | goto out; |
2103 | } | 2099 | } |
2104 | err = dquot_quota_on_path(sb, type, format_id, &path); | 2100 | err = dquot_quota_on(sb, type, format_id, path); |
2105 | out: | 2101 | out: |
2106 | path_put(&path); | ||
2107 | return err; | 2102 | return err; |
2108 | } | 2103 | } |
2109 | 2104 | ||
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index e5f63da64d04..aa68a8a31518 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig | |||
@@ -29,7 +29,6 @@ config SQUASHFS | |||
29 | config SQUASHFS_XATTR | 29 | config SQUASHFS_XATTR |
30 | bool "Squashfs XATTR support" | 30 | bool "Squashfs XATTR support" |
31 | depends on SQUASHFS | 31 | depends on SQUASHFS |
32 | default n | ||
33 | help | 32 | help |
34 | Saying Y here includes support for extended attributes (xattrs). | 33 | Saying Y here includes support for extended attributes (xattrs). |
35 | Xattrs are name:value pairs associated with inodes by | 34 | Xattrs are name:value pairs associated with inodes by |
@@ -40,7 +39,6 @@ config SQUASHFS_XATTR | |||
40 | config SQUASHFS_LZO | 39 | config SQUASHFS_LZO |
41 | bool "Include support for LZO compressed file systems" | 40 | bool "Include support for LZO compressed file systems" |
42 | depends on SQUASHFS | 41 | depends on SQUASHFS |
43 | default n | ||
44 | select LZO_DECOMPRESS | 42 | select LZO_DECOMPRESS |
45 | help | 43 | help |
46 | Saying Y here includes support for reading Squashfs file systems | 44 | Saying Y here includes support for reading Squashfs file systems |
@@ -53,10 +51,24 @@ config SQUASHFS_LZO | |||
53 | 51 | ||
54 | If unsure, say N. | 52 | If unsure, say N. |
55 | 53 | ||
54 | config SQUASHFS_XZ | ||
55 | bool "Include support for XZ compressed file systems" | ||
56 | depends on SQUASHFS | ||
57 | select XZ_DEC | ||
58 | help | ||
59 | Saying Y here includes support for reading Squashfs file systems | ||
60 | compressed with XZ compresssion. XZ gives better compression than | ||
61 | the default zlib compression, at the expense of greater CPU and | ||
62 | memory overhead. | ||
63 | |||
64 | XZ is not the standard compression used in Squashfs and so most | ||
65 | file systems will be readable without selecting this option. | ||
66 | |||
67 | If unsure, say N. | ||
68 | |||
56 | config SQUASHFS_EMBEDDED | 69 | config SQUASHFS_EMBEDDED |
57 | bool "Additional option for memory-constrained systems" | 70 | bool "Additional option for memory-constrained systems" |
58 | depends on SQUASHFS | 71 | depends on SQUASHFS |
59 | default n | ||
60 | help | 72 | help |
61 | Saying Y here allows you to specify cache size. | 73 | Saying Y here allows you to specify cache size. |
62 | 74 | ||
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index 7672bac8d328..cecf2bea07af 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile | |||
@@ -7,3 +7,4 @@ squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o | |||
7 | squashfs-y += namei.o super.o symlink.o zlib_wrapper.o decompressor.o | 7 | squashfs-y += namei.o super.o symlink.o zlib_wrapper.o decompressor.o |
8 | squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o | 8 | squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o |
9 | squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o | 9 | squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o |
10 | squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o | ||
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 653c030eb840..8ab48bc2fa7d 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c | |||
@@ -34,7 +34,6 @@ | |||
34 | 34 | ||
35 | #include "squashfs_fs.h" | 35 | #include "squashfs_fs.h" |
36 | #include "squashfs_fs_sb.h" | 36 | #include "squashfs_fs_sb.h" |
37 | #include "squashfs_fs_i.h" | ||
38 | #include "squashfs.h" | 37 | #include "squashfs.h" |
39 | #include "decompressor.h" | 38 | #include "decompressor.h" |
40 | 39 | ||
@@ -64,6 +63,14 @@ static struct buffer_head *get_block_length(struct super_block *sb, | |||
64 | *length = (unsigned char) bh->b_data[*offset] | | 63 | *length = (unsigned char) bh->b_data[*offset] | |
65 | (unsigned char) bh->b_data[*offset + 1] << 8; | 64 | (unsigned char) bh->b_data[*offset + 1] << 8; |
66 | *offset += 2; | 65 | *offset += 2; |
66 | |||
67 | if (*offset == msblk->devblksize) { | ||
68 | put_bh(bh); | ||
69 | bh = sb_bread(sb, ++(*cur_index)); | ||
70 | if (bh == NULL) | ||
71 | return NULL; | ||
72 | *offset = 0; | ||
73 | } | ||
67 | } | 74 | } |
68 | 75 | ||
69 | return bh; | 76 | return bh; |
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index 57314bee9059..26b15ae34d6f 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c | |||
@@ -55,7 +55,6 @@ | |||
55 | 55 | ||
56 | #include "squashfs_fs.h" | 56 | #include "squashfs_fs.h" |
57 | #include "squashfs_fs_sb.h" | 57 | #include "squashfs_fs_sb.h" |
58 | #include "squashfs_fs_i.h" | ||
59 | #include "squashfs.h" | 58 | #include "squashfs.h" |
60 | 59 | ||
61 | /* | 60 | /* |
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index 24af9ce9722f..a5940e54c4dd 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c | |||
@@ -27,7 +27,6 @@ | |||
27 | 27 | ||
28 | #include "squashfs_fs.h" | 28 | #include "squashfs_fs.h" |
29 | #include "squashfs_fs_sb.h" | 29 | #include "squashfs_fs_sb.h" |
30 | #include "squashfs_fs_i.h" | ||
31 | #include "decompressor.h" | 30 | #include "decompressor.h" |
32 | #include "squashfs.h" | 31 | #include "squashfs.h" |
33 | 32 | ||
@@ -41,23 +40,26 @@ static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = { | |||
41 | }; | 40 | }; |
42 | 41 | ||
43 | #ifndef CONFIG_SQUASHFS_LZO | 42 | #ifndef CONFIG_SQUASHFS_LZO |
44 | static const struct squashfs_decompressor squashfs_lzo_unsupported_comp_ops = { | 43 | static const struct squashfs_decompressor squashfs_lzo_comp_ops = { |
45 | NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 | 44 | NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 |
46 | }; | 45 | }; |
47 | #endif | 46 | #endif |
48 | 47 | ||
48 | #ifndef CONFIG_SQUASHFS_XZ | ||
49 | static const struct squashfs_decompressor squashfs_xz_comp_ops = { | ||
50 | NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0 | ||
51 | }; | ||
52 | #endif | ||
53 | |||
49 | static const struct squashfs_decompressor squashfs_unknown_comp_ops = { | 54 | static const struct squashfs_decompressor squashfs_unknown_comp_ops = { |
50 | NULL, NULL, NULL, 0, "unknown", 0 | 55 | NULL, NULL, NULL, 0, "unknown", 0 |
51 | }; | 56 | }; |
52 | 57 | ||
53 | static const struct squashfs_decompressor *decompressor[] = { | 58 | static const struct squashfs_decompressor *decompressor[] = { |
54 | &squashfs_zlib_comp_ops, | 59 | &squashfs_zlib_comp_ops, |
55 | &squashfs_lzma_unsupported_comp_ops, | ||
56 | #ifdef CONFIG_SQUASHFS_LZO | ||
57 | &squashfs_lzo_comp_ops, | 60 | &squashfs_lzo_comp_ops, |
58 | #else | 61 | &squashfs_xz_comp_ops, |
59 | &squashfs_lzo_unsupported_comp_ops, | 62 | &squashfs_lzma_unsupported_comp_ops, |
60 | #endif | ||
61 | &squashfs_unknown_comp_ops | 63 | &squashfs_unknown_comp_ops |
62 | }; | 64 | }; |
63 | 65 | ||
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h index 7425f80783f6..3b305a70f7aa 100644 --- a/fs/squashfs/decompressor.h +++ b/fs/squashfs/decompressor.h | |||
@@ -52,4 +52,13 @@ static inline int squashfs_decompress(struct squashfs_sb_info *msblk, | |||
52 | return msblk->decompressor->decompress(msblk, buffer, bh, b, offset, | 52 | return msblk->decompressor->decompress(msblk, buffer, bh, b, offset, |
53 | length, srclength, pages); | 53 | length, srclength, pages); |
54 | } | 54 | } |
55 | |||
56 | #ifdef CONFIG_SQUASHFS_XZ | ||
57 | extern const struct squashfs_decompressor squashfs_xz_comp_ops; | ||
58 | #endif | ||
59 | |||
60 | #ifdef CONFIG_SQUASHFS_LZO | ||
61 | extern const struct squashfs_decompressor squashfs_lzo_comp_ops; | ||
62 | #endif | ||
63 | |||
55 | #endif | 64 | #endif |
diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c index 7c90bbd6879d..7eef571443c6 100644 --- a/fs/squashfs/fragment.c +++ b/fs/squashfs/fragment.c | |||
@@ -39,7 +39,6 @@ | |||
39 | 39 | ||
40 | #include "squashfs_fs.h" | 40 | #include "squashfs_fs.h" |
41 | #include "squashfs_fs_sb.h" | 41 | #include "squashfs_fs_sb.h" |
42 | #include "squashfs_fs_i.h" | ||
43 | #include "squashfs.h" | 42 | #include "squashfs.h" |
44 | 43 | ||
45 | /* | 44 | /* |
diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c index b7f64bcd2b70..d8f32452638e 100644 --- a/fs/squashfs/id.c +++ b/fs/squashfs/id.c | |||
@@ -37,7 +37,6 @@ | |||
37 | 37 | ||
38 | #include "squashfs_fs.h" | 38 | #include "squashfs_fs.h" |
39 | #include "squashfs_fs_sb.h" | 39 | #include "squashfs_fs_sb.h" |
40 | #include "squashfs_fs_i.h" | ||
41 | #include "squashfs.h" | 40 | #include "squashfs.h" |
42 | 41 | ||
43 | /* | 42 | /* |
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c index 5d87789bf1c1..7da759e34c52 100644 --- a/fs/squashfs/lzo_wrapper.c +++ b/fs/squashfs/lzo_wrapper.c | |||
@@ -29,7 +29,6 @@ | |||
29 | 29 | ||
30 | #include "squashfs_fs.h" | 30 | #include "squashfs_fs.h" |
31 | #include "squashfs_fs_sb.h" | 31 | #include "squashfs_fs_sb.h" |
32 | #include "squashfs_fs_i.h" | ||
33 | #include "squashfs.h" | 32 | #include "squashfs.h" |
34 | #include "decompressor.h" | 33 | #include "decompressor.h" |
35 | 34 | ||
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 5d45569d5f72..ba729d808876 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h | |||
@@ -27,11 +27,6 @@ | |||
27 | 27 | ||
28 | #define WARNING(s, args...) pr_warning("SQUASHFS: "s, ## args) | 28 | #define WARNING(s, args...) pr_warning("SQUASHFS: "s, ## args) |
29 | 29 | ||
30 | static inline struct squashfs_inode_info *squashfs_i(struct inode *inode) | ||
31 | { | ||
32 | return list_entry(inode, struct squashfs_inode_info, vfs_inode); | ||
33 | } | ||
34 | |||
35 | /* block.c */ | 30 | /* block.c */ |
36 | extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *, | 31 | extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *, |
37 | int, int); | 32 | int, int); |
@@ -104,6 +99,3 @@ extern const struct xattr_handler *squashfs_xattr_handlers[]; | |||
104 | 99 | ||
105 | /* zlib_wrapper.c */ | 100 | /* zlib_wrapper.c */ |
106 | extern const struct squashfs_decompressor squashfs_zlib_comp_ops; | 101 | extern const struct squashfs_decompressor squashfs_zlib_comp_ops; |
107 | |||
108 | /* lzo_wrapper.c */ | ||
109 | extern const struct squashfs_decompressor squashfs_lzo_comp_ops; | ||
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index c5137fc9ab11..39533feffd6d 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h | |||
@@ -238,6 +238,7 @@ struct meta_index { | |||
238 | #define ZLIB_COMPRESSION 1 | 238 | #define ZLIB_COMPRESSION 1 |
239 | #define LZMA_COMPRESSION 2 | 239 | #define LZMA_COMPRESSION 2 |
240 | #define LZO_COMPRESSION 3 | 240 | #define LZO_COMPRESSION 3 |
241 | #define XZ_COMPRESSION 4 | ||
241 | 242 | ||
242 | struct squashfs_super_block { | 243 | struct squashfs_super_block { |
243 | __le32 s_magic; | 244 | __le32 s_magic; |
diff --git a/fs/squashfs/squashfs_fs_i.h b/fs/squashfs/squashfs_fs_i.h index d3e3a37f28a1..359baefc01fc 100644 --- a/fs/squashfs/squashfs_fs_i.h +++ b/fs/squashfs/squashfs_fs_i.h | |||
@@ -45,4 +45,10 @@ struct squashfs_inode_info { | |||
45 | }; | 45 | }; |
46 | struct inode vfs_inode; | 46 | struct inode vfs_inode; |
47 | }; | 47 | }; |
48 | |||
49 | |||
50 | static inline struct squashfs_inode_info *squashfs_i(struct inode *inode) | ||
51 | { | ||
52 | return list_entry(inode, struct squashfs_inode_info, vfs_inode); | ||
53 | } | ||
48 | #endif | 54 | #endif |
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c index d33be5dd6c32..05385dbe1465 100644 --- a/fs/squashfs/xattr_id.c +++ b/fs/squashfs/xattr_id.c | |||
@@ -32,7 +32,6 @@ | |||
32 | 32 | ||
33 | #include "squashfs_fs.h" | 33 | #include "squashfs_fs.h" |
34 | #include "squashfs_fs_sb.h" | 34 | #include "squashfs_fs_sb.h" |
35 | #include "squashfs_fs_i.h" | ||
36 | #include "squashfs.h" | 35 | #include "squashfs.h" |
37 | #include "xattr.h" | 36 | #include "xattr.h" |
38 | 37 | ||
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c new file mode 100644 index 000000000000..c4eb40018256 --- /dev/null +++ b/fs/squashfs/xz_wrapper.c | |||
@@ -0,0 +1,147 @@ | |||
1 | /* | ||
2 | * Squashfs - a compressed read only filesystem for Linux | ||
3 | * | ||
4 | * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 | ||
5 | * Phillip Lougher <phillip@lougher.demon.co.uk> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version 2, | ||
10 | * or (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
20 | * | ||
21 | * xz_wrapper.c | ||
22 | */ | ||
23 | |||
24 | |||
25 | #include <linux/mutex.h> | ||
26 | #include <linux/buffer_head.h> | ||
27 | #include <linux/slab.h> | ||
28 | #include <linux/xz.h> | ||
29 | |||
30 | #include "squashfs_fs.h" | ||
31 | #include "squashfs_fs_sb.h" | ||
32 | #include "squashfs_fs_i.h" | ||
33 | #include "squashfs.h" | ||
34 | #include "decompressor.h" | ||
35 | |||
36 | struct squashfs_xz { | ||
37 | struct xz_dec *state; | ||
38 | struct xz_buf buf; | ||
39 | }; | ||
40 | |||
41 | static void *squashfs_xz_init(struct squashfs_sb_info *msblk) | ||
42 | { | ||
43 | int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE); | ||
44 | |||
45 | struct squashfs_xz *stream = kmalloc(sizeof(*stream), GFP_KERNEL); | ||
46 | if (stream == NULL) | ||
47 | goto failed; | ||
48 | |||
49 | stream->state = xz_dec_init(XZ_PREALLOC, block_size); | ||
50 | if (stream->state == NULL) | ||
51 | goto failed; | ||
52 | |||
53 | return stream; | ||
54 | |||
55 | failed: | ||
56 | ERROR("Failed to allocate xz workspace\n"); | ||
57 | kfree(stream); | ||
58 | return NULL; | ||
59 | } | ||
60 | |||
61 | |||
62 | static void squashfs_xz_free(void *strm) | ||
63 | { | ||
64 | struct squashfs_xz *stream = strm; | ||
65 | |||
66 | if (stream) { | ||
67 | xz_dec_end(stream->state); | ||
68 | kfree(stream); | ||
69 | } | ||
70 | } | ||
71 | |||
72 | |||
73 | static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer, | ||
74 | struct buffer_head **bh, int b, int offset, int length, int srclength, | ||
75 | int pages) | ||
76 | { | ||
77 | enum xz_ret xz_err; | ||
78 | int avail, total = 0, k = 0, page = 0; | ||
79 | struct squashfs_xz *stream = msblk->stream; | ||
80 | |||
81 | mutex_lock(&msblk->read_data_mutex); | ||
82 | |||
83 | xz_dec_reset(stream->state); | ||
84 | stream->buf.in_pos = 0; | ||
85 | stream->buf.in_size = 0; | ||
86 | stream->buf.out_pos = 0; | ||
87 | stream->buf.out_size = PAGE_CACHE_SIZE; | ||
88 | stream->buf.out = buffer[page++]; | ||
89 | |||
90 | do { | ||
91 | if (stream->buf.in_pos == stream->buf.in_size && k < b) { | ||
92 | avail = min(length, msblk->devblksize - offset); | ||
93 | length -= avail; | ||
94 | wait_on_buffer(bh[k]); | ||
95 | if (!buffer_uptodate(bh[k])) | ||
96 | goto release_mutex; | ||
97 | |||
98 | stream->buf.in = bh[k]->b_data + offset; | ||
99 | stream->buf.in_size = avail; | ||
100 | stream->buf.in_pos = 0; | ||
101 | offset = 0; | ||
102 | } | ||
103 | |||
104 | if (stream->buf.out_pos == stream->buf.out_size | ||
105 | && page < pages) { | ||
106 | stream->buf.out = buffer[page++]; | ||
107 | stream->buf.out_pos = 0; | ||
108 | total += PAGE_CACHE_SIZE; | ||
109 | } | ||
110 | |||
111 | xz_err = xz_dec_run(stream->state, &stream->buf); | ||
112 | |||
113 | if (stream->buf.in_pos == stream->buf.in_size && k < b) | ||
114 | put_bh(bh[k++]); | ||
115 | } while (xz_err == XZ_OK); | ||
116 | |||
117 | if (xz_err != XZ_STREAM_END) { | ||
118 | ERROR("xz_dec_run error, data probably corrupt\n"); | ||
119 | goto release_mutex; | ||
120 | } | ||
121 | |||
122 | if (k < b) { | ||
123 | ERROR("xz_uncompress error, input remaining\n"); | ||
124 | goto release_mutex; | ||
125 | } | ||
126 | |||
127 | total += stream->buf.out_pos; | ||
128 | mutex_unlock(&msblk->read_data_mutex); | ||
129 | return total; | ||
130 | |||
131 | release_mutex: | ||
132 | mutex_unlock(&msblk->read_data_mutex); | ||
133 | |||
134 | for (; k < b; k++) | ||
135 | put_bh(bh[k]); | ||
136 | |||
137 | return -EIO; | ||
138 | } | ||
139 | |||
140 | const struct squashfs_decompressor squashfs_xz_comp_ops = { | ||
141 | .init = squashfs_xz_init, | ||
142 | .free = squashfs_xz_free, | ||
143 | .decompress = squashfs_xz_uncompress, | ||
144 | .id = XZ_COMPRESSION, | ||
145 | .name = "xz", | ||
146 | .supported = 1 | ||
147 | }; | ||
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index 7a603874e483..4661ae2b1cec 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c | |||
@@ -29,7 +29,6 @@ | |||
29 | 29 | ||
30 | #include "squashfs_fs.h" | 30 | #include "squashfs_fs.h" |
31 | #include "squashfs_fs_sb.h" | 31 | #include "squashfs_fs_sb.h" |
32 | #include "squashfs_fs_i.h" | ||
33 | #include "squashfs.h" | 32 | #include "squashfs.h" |
34 | #include "decompressor.h" | 33 | #include "decompressor.h" |
35 | 34 | ||
@@ -66,8 +65,8 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, | |||
66 | struct buffer_head **bh, int b, int offset, int length, int srclength, | 65 | struct buffer_head **bh, int b, int offset, int length, int srclength, |
67 | int pages) | 66 | int pages) |
68 | { | 67 | { |
69 | int zlib_err = 0, zlib_init = 0; | 68 | int zlib_err, zlib_init = 0; |
70 | int avail, bytes, k = 0, page = 0; | 69 | int k = 0, page = 0; |
71 | z_stream *stream = msblk->stream; | 70 | z_stream *stream = msblk->stream; |
72 | 71 | ||
73 | mutex_lock(&msblk->read_data_mutex); | 72 | mutex_lock(&msblk->read_data_mutex); |
@@ -75,21 +74,14 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, | |||
75 | stream->avail_out = 0; | 74 | stream->avail_out = 0; |
76 | stream->avail_in = 0; | 75 | stream->avail_in = 0; |
77 | 76 | ||
78 | bytes = length; | ||
79 | do { | 77 | do { |
80 | if (stream->avail_in == 0 && k < b) { | 78 | if (stream->avail_in == 0 && k < b) { |
81 | avail = min(bytes, msblk->devblksize - offset); | 79 | int avail = min(length, msblk->devblksize - offset); |
82 | bytes -= avail; | 80 | length -= avail; |
83 | wait_on_buffer(bh[k]); | 81 | wait_on_buffer(bh[k]); |
84 | if (!buffer_uptodate(bh[k])) | 82 | if (!buffer_uptodate(bh[k])) |
85 | goto release_mutex; | 83 | goto release_mutex; |
86 | 84 | ||
87 | if (avail == 0) { | ||
88 | offset = 0; | ||
89 | put_bh(bh[k++]); | ||
90 | continue; | ||
91 | } | ||
92 | |||
93 | stream->next_in = bh[k]->b_data + offset; | 85 | stream->next_in = bh[k]->b_data + offset; |
94 | stream->avail_in = avail; | 86 | stream->avail_in = avail; |
95 | offset = 0; | 87 | offset = 0; |
@@ -128,6 +120,11 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, | |||
128 | goto release_mutex; | 120 | goto release_mutex; |
129 | } | 121 | } |
130 | 122 | ||
123 | if (k < b) { | ||
124 | ERROR("zlib_uncompress error, data remaining\n"); | ||
125 | goto release_mutex; | ||
126 | } | ||
127 | |||
131 | length = stream->total_out; | 128 | length = stream->total_out; |
132 | mutex_unlock(&msblk->read_data_mutex); | 129 | mutex_unlock(&msblk->read_data_mutex); |
133 | return length; | 130 | return length; |
@@ -75,11 +75,13 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, | |||
75 | int error = -EINVAL; | 75 | int error = -EINVAL; |
76 | int lookup_flags = 0; | 76 | int lookup_flags = 0; |
77 | 77 | ||
78 | if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) | 78 | if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT)) != 0) |
79 | goto out; | 79 | goto out; |
80 | 80 | ||
81 | if (!(flag & AT_SYMLINK_NOFOLLOW)) | 81 | if (!(flag & AT_SYMLINK_NOFOLLOW)) |
82 | lookup_flags |= LOOKUP_FOLLOW; | 82 | lookup_flags |= LOOKUP_FOLLOW; |
83 | if (flag & AT_NO_AUTOMOUNT) | ||
84 | lookup_flags |= LOOKUP_NO_AUTOMOUNT; | ||
83 | 85 | ||
84 | error = user_path_at(dfd, filename, lookup_flags, &path); | 86 | error = user_path_at(dfd, filename, lookup_flags, &path); |
85 | if (error) | 87 | if (error) |
diff --git a/fs/super.c b/fs/super.c index 4f6a3571a634..7e9dd4cc2c01 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -177,6 +177,11 @@ void deactivate_locked_super(struct super_block *s) | |||
177 | struct file_system_type *fs = s->s_type; | 177 | struct file_system_type *fs = s->s_type; |
178 | if (atomic_dec_and_test(&s->s_active)) { | 178 | if (atomic_dec_and_test(&s->s_active)) { |
179 | fs->kill_sb(s); | 179 | fs->kill_sb(s); |
180 | /* | ||
181 | * We need to call rcu_barrier so all the delayed rcu free | ||
182 | * inodes are flushed before we release the fs module. | ||
183 | */ | ||
184 | rcu_barrier(); | ||
180 | put_filesystem(fs); | 185 | put_filesystem(fs); |
181 | put_super(s); | 186 | put_super(s); |
182 | } else { | 187 | } else { |
@@ -1141,7 +1146,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) | |||
1141 | return mnt; | 1146 | return mnt; |
1142 | 1147 | ||
1143 | err: | 1148 | err: |
1144 | mntput_long(mnt); | 1149 | mntput(mnt); |
1145 | return ERR_PTR(err); | 1150 | return ERR_PTR(err); |
1146 | } | 1151 | } |
1147 | 1152 | ||
diff --git a/fs/sysfs/Kconfig b/fs/sysfs/Kconfig index f4b67588b9d6..8c41feacbac5 100644 --- a/fs/sysfs/Kconfig +++ b/fs/sysfs/Kconfig | |||
@@ -1,5 +1,5 @@ | |||
1 | config SYSFS | 1 | config SYSFS |
2 | bool "sysfs file system support" if EMBEDDED | 2 | bool "sysfs file system support" if EXPERT |
3 | default y | 3 | default y |
4 | help | 4 | help |
5 | The sysfs filesystem is a virtual filesystem that the kernel uses to | 5 | The sysfs filesystem is a virtual filesystem that the kernel uses to |
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 0dce969d6cad..faca44997099 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile | |||
@@ -98,6 +98,7 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \ | |||
98 | kmem.o \ | 98 | kmem.o \ |
99 | xfs_aops.o \ | 99 | xfs_aops.o \ |
100 | xfs_buf.o \ | 100 | xfs_buf.o \ |
101 | xfs_discard.o \ | ||
101 | xfs_export.o \ | 102 | xfs_export.o \ |
102 | xfs_file.o \ | 103 | xfs_file.o \ |
103 | xfs_fs_subr.o \ | 104 | xfs_fs_subr.o \ |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 92f1f2acc6ab..ac1c7e8378dd 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -896,7 +896,6 @@ xfs_buf_rele( | |||
896 | trace_xfs_buf_rele(bp, _RET_IP_); | 896 | trace_xfs_buf_rele(bp, _RET_IP_); |
897 | 897 | ||
898 | if (!pag) { | 898 | if (!pag) { |
899 | ASSERT(!bp->b_relse); | ||
900 | ASSERT(list_empty(&bp->b_lru)); | 899 | ASSERT(list_empty(&bp->b_lru)); |
901 | ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); | 900 | ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); |
902 | if (atomic_dec_and_test(&bp->b_hold)) | 901 | if (atomic_dec_and_test(&bp->b_hold)) |
@@ -908,11 +907,7 @@ xfs_buf_rele( | |||
908 | 907 | ||
909 | ASSERT(atomic_read(&bp->b_hold) > 0); | 908 | ASSERT(atomic_read(&bp->b_hold) > 0); |
910 | if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { | 909 | if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { |
911 | if (bp->b_relse) { | 910 | if (!(bp->b_flags & XBF_STALE) && |
912 | atomic_inc(&bp->b_hold); | ||
913 | spin_unlock(&pag->pag_buf_lock); | ||
914 | bp->b_relse(bp); | ||
915 | } else if (!(bp->b_flags & XBF_STALE) && | ||
916 | atomic_read(&bp->b_lru_ref)) { | 911 | atomic_read(&bp->b_lru_ref)) { |
917 | xfs_buf_lru_add(bp); | 912 | xfs_buf_lru_add(bp); |
918 | spin_unlock(&pag->pag_buf_lock); | 913 | spin_unlock(&pag->pag_buf_lock); |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index a76c2428faff..cbe65950e524 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -152,8 +152,6 @@ typedef struct xfs_buftarg { | |||
152 | 152 | ||
153 | struct xfs_buf; | 153 | struct xfs_buf; |
154 | typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); | 154 | typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); |
155 | typedef void (*xfs_buf_relse_t)(struct xfs_buf *); | ||
156 | typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *); | ||
157 | 155 | ||
158 | #define XB_PAGES 2 | 156 | #define XB_PAGES 2 |
159 | 157 | ||
@@ -183,7 +181,6 @@ typedef struct xfs_buf { | |||
183 | void *b_addr; /* virtual address of buffer */ | 181 | void *b_addr; /* virtual address of buffer */ |
184 | struct work_struct b_iodone_work; | 182 | struct work_struct b_iodone_work; |
185 | xfs_buf_iodone_t b_iodone; /* I/O completion function */ | 183 | xfs_buf_iodone_t b_iodone; /* I/O completion function */ |
186 | xfs_buf_relse_t b_relse; /* releasing function */ | ||
187 | struct completion b_iowait; /* queue for I/O waiters */ | 184 | struct completion b_iowait; /* queue for I/O waiters */ |
188 | void *b_fspriv; | 185 | void *b_fspriv; |
189 | void *b_fspriv2; | 186 | void *b_fspriv2; |
@@ -323,7 +320,6 @@ void xfs_buf_stale(struct xfs_buf *bp); | |||
323 | #define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) | 320 | #define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) |
324 | #define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) | 321 | #define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) |
325 | #define XFS_BUF_SET_START(bp) do { } while (0) | 322 | #define XFS_BUF_SET_START(bp) do { } while (0) |
326 | #define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func)) | ||
327 | 323 | ||
328 | #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr) | 324 | #define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr) |
329 | #define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt) | 325 | #define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt) |
@@ -360,8 +356,7 @@ xfs_buf_set_ref( | |||
360 | 356 | ||
361 | static inline void xfs_buf_relse(xfs_buf_t *bp) | 357 | static inline void xfs_buf_relse(xfs_buf_t *bp) |
362 | { | 358 | { |
363 | if (!bp->b_relse) | 359 | xfs_buf_unlock(bp); |
364 | xfs_buf_unlock(bp); | ||
365 | xfs_buf_rele(bp); | 360 | xfs_buf_rele(bp); |
366 | } | 361 | } |
367 | 362 | ||
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c new file mode 100644 index 000000000000..05201ae719e5 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_discard.c | |||
@@ -0,0 +1,191 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2010 Red Hat, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_sb.h" | ||
20 | #include "xfs_inum.h" | ||
21 | #include "xfs_log.h" | ||
22 | #include "xfs_ag.h" | ||
23 | #include "xfs_mount.h" | ||
24 | #include "xfs_quota.h" | ||
25 | #include "xfs_trans.h" | ||
26 | #include "xfs_alloc_btree.h" | ||
27 | #include "xfs_bmap_btree.h" | ||
28 | #include "xfs_ialloc_btree.h" | ||
29 | #include "xfs_btree.h" | ||
30 | #include "xfs_inode.h" | ||
31 | #include "xfs_alloc.h" | ||
32 | #include "xfs_error.h" | ||
33 | #include "xfs_discard.h" | ||
34 | #include "xfs_trace.h" | ||
35 | |||
36 | STATIC int | ||
37 | xfs_trim_extents( | ||
38 | struct xfs_mount *mp, | ||
39 | xfs_agnumber_t agno, | ||
40 | xfs_fsblock_t start, | ||
41 | xfs_fsblock_t len, | ||
42 | xfs_fsblock_t minlen, | ||
43 | __uint64_t *blocks_trimmed) | ||
44 | { | ||
45 | struct block_device *bdev = mp->m_ddev_targp->bt_bdev; | ||
46 | struct xfs_btree_cur *cur; | ||
47 | struct xfs_buf *agbp; | ||
48 | struct xfs_perag *pag; | ||
49 | int error; | ||
50 | int i; | ||
51 | |||
52 | pag = xfs_perag_get(mp, agno); | ||
53 | |||
54 | error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); | ||
55 | if (error || !agbp) | ||
56 | goto out_put_perag; | ||
57 | |||
58 | cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT); | ||
59 | |||
60 | /* | ||
61 | * Force out the log. This means any transactions that might have freed | ||
62 | * space before we took the AGF buffer lock are now on disk, and the | ||
63 | * volatile disk cache is flushed. | ||
64 | */ | ||
65 | xfs_log_force(mp, XFS_LOG_SYNC); | ||
66 | |||
67 | /* | ||
68 | * Look up the longest btree in the AGF and start with it. | ||
69 | */ | ||
70 | error = xfs_alloc_lookup_le(cur, 0, | ||
71 | XFS_BUF_TO_AGF(agbp)->agf_longest, &i); | ||
72 | if (error) | ||
73 | goto out_del_cursor; | ||
74 | |||
75 | /* | ||
76 | * Loop until we are done with all extents that are large | ||
77 | * enough to be worth discarding. | ||
78 | */ | ||
79 | while (i) { | ||
80 | xfs_agblock_t fbno; | ||
81 | xfs_extlen_t flen; | ||
82 | |||
83 | error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); | ||
84 | if (error) | ||
85 | goto out_del_cursor; | ||
86 | XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor); | ||
87 | ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest); | ||
88 | |||
89 | /* | ||
90 | * Too small? Give up. | ||
91 | */ | ||
92 | if (flen < minlen) { | ||
93 | trace_xfs_discard_toosmall(mp, agno, fbno, flen); | ||
94 | goto out_del_cursor; | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * If the extent is entirely outside of the range we are | ||
99 | * supposed to discard skip it. Do not bother to trim | ||
100 | * down partially overlapping ranges for now. | ||
101 | */ | ||
102 | if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start || | ||
103 | XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) { | ||
104 | trace_xfs_discard_exclude(mp, agno, fbno, flen); | ||
105 | goto next_extent; | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * If any blocks in the range are still busy, skip the | ||
110 | * discard and try again the next time. | ||
111 | */ | ||
112 | if (xfs_alloc_busy_search(mp, agno, fbno, flen)) { | ||
113 | trace_xfs_discard_busy(mp, agno, fbno, flen); | ||
114 | goto next_extent; | ||
115 | } | ||
116 | |||
117 | trace_xfs_discard_extent(mp, agno, fbno, flen); | ||
118 | error = -blkdev_issue_discard(bdev, | ||
119 | XFS_AGB_TO_DADDR(mp, agno, fbno), | ||
120 | XFS_FSB_TO_BB(mp, flen), | ||
121 | GFP_NOFS, 0); | ||
122 | if (error) | ||
123 | goto out_del_cursor; | ||
124 | *blocks_trimmed += flen; | ||
125 | |||
126 | next_extent: | ||
127 | error = xfs_btree_decrement(cur, 0, &i); | ||
128 | if (error) | ||
129 | goto out_del_cursor; | ||
130 | } | ||
131 | |||
132 | out_del_cursor: | ||
133 | xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); | ||
134 | xfs_buf_relse(agbp); | ||
135 | out_put_perag: | ||
136 | xfs_perag_put(pag); | ||
137 | return error; | ||
138 | } | ||
139 | |||
140 | int | ||
141 | xfs_ioc_trim( | ||
142 | struct xfs_mount *mp, | ||
143 | struct fstrim_range __user *urange) | ||
144 | { | ||
145 | struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; | ||
146 | unsigned int granularity = q->limits.discard_granularity; | ||
147 | struct fstrim_range range; | ||
148 | xfs_fsblock_t start, len, minlen; | ||
149 | xfs_agnumber_t start_agno, end_agno, agno; | ||
150 | __uint64_t blocks_trimmed = 0; | ||
151 | int error, last_error = 0; | ||
152 | |||
153 | if (!capable(CAP_SYS_ADMIN)) | ||
154 | return -XFS_ERROR(EPERM); | ||
155 | if (copy_from_user(&range, urange, sizeof(range))) | ||
156 | return -XFS_ERROR(EFAULT); | ||
157 | |||
158 | /* | ||
159 | * Truncating down the len isn't actually quite correct, but using | ||
160 | * XFS_B_TO_FSB would mean we trivially get overflows for values | ||
161 | * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default | ||
162 | * used by the fstrim application. In the end it really doesn't | ||
163 | * matter as trimming blocks is an advisory interface. | ||
164 | */ | ||
165 | start = XFS_B_TO_FSBT(mp, range.start); | ||
166 | len = XFS_B_TO_FSBT(mp, range.len); | ||
167 | minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen)); | ||
168 | |||
169 | start_agno = XFS_FSB_TO_AGNO(mp, start); | ||
170 | if (start_agno >= mp->m_sb.sb_agcount) | ||
171 | return -XFS_ERROR(EINVAL); | ||
172 | |||
173 | end_agno = XFS_FSB_TO_AGNO(mp, start + len); | ||
174 | if (end_agno >= mp->m_sb.sb_agcount) | ||
175 | end_agno = mp->m_sb.sb_agcount - 1; | ||
176 | |||
177 | for (agno = start_agno; agno <= end_agno; agno++) { | ||
178 | error = -xfs_trim_extents(mp, agno, start, len, minlen, | ||
179 | &blocks_trimmed); | ||
180 | if (error) | ||
181 | last_error = error; | ||
182 | } | ||
183 | |||
184 | if (last_error) | ||
185 | return last_error; | ||
186 | |||
187 | range.len = XFS_FSB_TO_B(mp, blocks_trimmed); | ||
188 | if (copy_to_user(urange, &range, sizeof(range))) | ||
189 | return -XFS_ERROR(EFAULT); | ||
190 | return 0; | ||
191 | } | ||
diff --git a/fs/xfs/linux-2.6/xfs_discard.h b/fs/xfs/linux-2.6/xfs_discard.h new file mode 100644 index 000000000000..e82b6dd3e127 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_discard.h | |||
@@ -0,0 +1,8 @@ | |||
1 | #ifndef XFS_DISCARD_H | ||
2 | #define XFS_DISCARD_H 1 | ||
3 | |||
4 | struct fstrim_range; | ||
5 | |||
6 | extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *); | ||
7 | |||
8 | #endif /* XFS_DISCARD_H */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index ba8ad422a165..a55c1b46b219 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -37,10 +37,45 @@ | |||
37 | #include "xfs_trace.h" | 37 | #include "xfs_trace.h" |
38 | 38 | ||
39 | #include <linux/dcache.h> | 39 | #include <linux/dcache.h> |
40 | #include <linux/falloc.h> | ||
40 | 41 | ||
41 | static const struct vm_operations_struct xfs_file_vm_ops; | 42 | static const struct vm_operations_struct xfs_file_vm_ops; |
42 | 43 | ||
43 | /* | 44 | /* |
45 | * Locking primitives for read and write IO paths to ensure we consistently use | ||
46 | * and order the inode->i_mutex, ip->i_lock and ip->i_iolock. | ||
47 | */ | ||
48 | static inline void | ||
49 | xfs_rw_ilock( | ||
50 | struct xfs_inode *ip, | ||
51 | int type) | ||
52 | { | ||
53 | if (type & XFS_IOLOCK_EXCL) | ||
54 | mutex_lock(&VFS_I(ip)->i_mutex); | ||
55 | xfs_ilock(ip, type); | ||
56 | } | ||
57 | |||
58 | static inline void | ||
59 | xfs_rw_iunlock( | ||
60 | struct xfs_inode *ip, | ||
61 | int type) | ||
62 | { | ||
63 | xfs_iunlock(ip, type); | ||
64 | if (type & XFS_IOLOCK_EXCL) | ||
65 | mutex_unlock(&VFS_I(ip)->i_mutex); | ||
66 | } | ||
67 | |||
68 | static inline void | ||
69 | xfs_rw_ilock_demote( | ||
70 | struct xfs_inode *ip, | ||
71 | int type) | ||
72 | { | ||
73 | xfs_ilock_demote(ip, type); | ||
74 | if (type & XFS_IOLOCK_EXCL) | ||
75 | mutex_unlock(&VFS_I(ip)->i_mutex); | ||
76 | } | ||
77 | |||
78 | /* | ||
44 | * xfs_iozero | 79 | * xfs_iozero |
45 | * | 80 | * |
46 | * xfs_iozero clears the specified range of buffer supplied, | 81 | * xfs_iozero clears the specified range of buffer supplied, |
@@ -262,22 +297,21 @@ xfs_file_aio_read( | |||
262 | if (XFS_FORCED_SHUTDOWN(mp)) | 297 | if (XFS_FORCED_SHUTDOWN(mp)) |
263 | return -EIO; | 298 | return -EIO; |
264 | 299 | ||
265 | if (unlikely(ioflags & IO_ISDIRECT)) | ||
266 | mutex_lock(&inode->i_mutex); | ||
267 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | ||
268 | |||
269 | if (unlikely(ioflags & IO_ISDIRECT)) { | 300 | if (unlikely(ioflags & IO_ISDIRECT)) { |
301 | xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); | ||
302 | |||
270 | if (inode->i_mapping->nrpages) { | 303 | if (inode->i_mapping->nrpages) { |
271 | ret = -xfs_flushinval_pages(ip, | 304 | ret = -xfs_flushinval_pages(ip, |
272 | (iocb->ki_pos & PAGE_CACHE_MASK), | 305 | (iocb->ki_pos & PAGE_CACHE_MASK), |
273 | -1, FI_REMAPF_LOCKED); | 306 | -1, FI_REMAPF_LOCKED); |
307 | if (ret) { | ||
308 | xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); | ||
309 | return ret; | ||
310 | } | ||
274 | } | 311 | } |
275 | mutex_unlock(&inode->i_mutex); | 312 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); |
276 | if (ret) { | 313 | } else |
277 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 314 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); |
278 | return ret; | ||
279 | } | ||
280 | } | ||
281 | 315 | ||
282 | trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); | 316 | trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); |
283 | 317 | ||
@@ -285,7 +319,7 @@ xfs_file_aio_read( | |||
285 | if (ret > 0) | 319 | if (ret > 0) |
286 | XFS_STATS_ADD(xs_read_bytes, ret); | 320 | XFS_STATS_ADD(xs_read_bytes, ret); |
287 | 321 | ||
288 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 322 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); |
289 | return ret; | 323 | return ret; |
290 | } | 324 | } |
291 | 325 | ||
@@ -309,7 +343,7 @@ xfs_file_splice_read( | |||
309 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 343 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
310 | return -EIO; | 344 | return -EIO; |
311 | 345 | ||
312 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | 346 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); |
313 | 347 | ||
314 | trace_xfs_file_splice_read(ip, count, *ppos, ioflags); | 348 | trace_xfs_file_splice_read(ip, count, *ppos, ioflags); |
315 | 349 | ||
@@ -317,10 +351,61 @@ xfs_file_splice_read( | |||
317 | if (ret > 0) | 351 | if (ret > 0) |
318 | XFS_STATS_ADD(xs_read_bytes, ret); | 352 | XFS_STATS_ADD(xs_read_bytes, ret); |
319 | 353 | ||
320 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 354 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); |
321 | return ret; | 355 | return ret; |
322 | } | 356 | } |
323 | 357 | ||
358 | STATIC void | ||
359 | xfs_aio_write_isize_update( | ||
360 | struct inode *inode, | ||
361 | loff_t *ppos, | ||
362 | ssize_t bytes_written) | ||
363 | { | ||
364 | struct xfs_inode *ip = XFS_I(inode); | ||
365 | xfs_fsize_t isize = i_size_read(inode); | ||
366 | |||
367 | if (bytes_written > 0) | ||
368 | XFS_STATS_ADD(xs_write_bytes, bytes_written); | ||
369 | |||
370 | if (unlikely(bytes_written < 0 && bytes_written != -EFAULT && | ||
371 | *ppos > isize)) | ||
372 | *ppos = isize; | ||
373 | |||
374 | if (*ppos > ip->i_size) { | ||
375 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); | ||
376 | if (*ppos > ip->i_size) | ||
377 | ip->i_size = *ppos; | ||
378 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | ||
379 | } | ||
380 | } | ||
381 | |||
382 | /* | ||
383 | * If this was a direct or synchronous I/O that failed (such as ENOSPC) then | ||
384 | * part of the I/O may have been written to disk before the error occured. In | ||
385 | * this case the on-disk file size may have been adjusted beyond the in-memory | ||
386 | * file size and now needs to be truncated back. | ||
387 | */ | ||
388 | STATIC void | ||
389 | xfs_aio_write_newsize_update( | ||
390 | struct xfs_inode *ip) | ||
391 | { | ||
392 | if (ip->i_new_size) { | ||
393 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); | ||
394 | ip->i_new_size = 0; | ||
395 | if (ip->i_d.di_size > ip->i_size) | ||
396 | ip->i_d.di_size = ip->i_size; | ||
397 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | ||
398 | } | ||
399 | } | ||
400 | |||
401 | /* | ||
402 | * xfs_file_splice_write() does not use xfs_rw_ilock() because | ||
403 | * generic_file_splice_write() takes the i_mutex itself. This, in theory, | ||
404 | * couuld cause lock inversions between the aio_write path and the splice path | ||
405 | * if someone is doing concurrent splice(2) based writes and write(2) based | ||
406 | * writes to the same inode. The only real way to fix this is to re-implement | ||
407 | * the generic code here with correct locking orders. | ||
408 | */ | ||
324 | STATIC ssize_t | 409 | STATIC ssize_t |
325 | xfs_file_splice_write( | 410 | xfs_file_splice_write( |
326 | struct pipe_inode_info *pipe, | 411 | struct pipe_inode_info *pipe, |
@@ -331,7 +416,7 @@ xfs_file_splice_write( | |||
331 | { | 416 | { |
332 | struct inode *inode = outfilp->f_mapping->host; | 417 | struct inode *inode = outfilp->f_mapping->host; |
333 | struct xfs_inode *ip = XFS_I(inode); | 418 | struct xfs_inode *ip = XFS_I(inode); |
334 | xfs_fsize_t isize, new_size; | 419 | xfs_fsize_t new_size; |
335 | int ioflags = 0; | 420 | int ioflags = 0; |
336 | ssize_t ret; | 421 | ssize_t ret; |
337 | 422 | ||
@@ -355,27 +440,9 @@ xfs_file_splice_write( | |||
355 | trace_xfs_file_splice_write(ip, count, *ppos, ioflags); | 440 | trace_xfs_file_splice_write(ip, count, *ppos, ioflags); |
356 | 441 | ||
357 | ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); | 442 | ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); |
358 | if (ret > 0) | ||
359 | XFS_STATS_ADD(xs_write_bytes, ret); | ||
360 | |||
361 | isize = i_size_read(inode); | ||
362 | if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize)) | ||
363 | *ppos = isize; | ||
364 | |||
365 | if (*ppos > ip->i_size) { | ||
366 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
367 | if (*ppos > ip->i_size) | ||
368 | ip->i_size = *ppos; | ||
369 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
370 | } | ||
371 | 443 | ||
372 | if (ip->i_new_size) { | 444 | xfs_aio_write_isize_update(inode, ppos, ret); |
373 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 445 | xfs_aio_write_newsize_update(ip); |
374 | ip->i_new_size = 0; | ||
375 | if (ip->i_d.di_size > ip->i_size) | ||
376 | ip->i_d.di_size = ip->i_size; | ||
377 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
378 | } | ||
379 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | 446 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
380 | return ret; | 447 | return ret; |
381 | } | 448 | } |
@@ -562,247 +629,314 @@ out_lock: | |||
562 | return error; | 629 | return error; |
563 | } | 630 | } |
564 | 631 | ||
632 | /* | ||
633 | * Common pre-write limit and setup checks. | ||
634 | * | ||
635 | * Returns with iolock held according to @iolock. | ||
636 | */ | ||
565 | STATIC ssize_t | 637 | STATIC ssize_t |
566 | xfs_file_aio_write( | 638 | xfs_file_aio_write_checks( |
567 | struct kiocb *iocb, | 639 | struct file *file, |
568 | const struct iovec *iovp, | 640 | loff_t *pos, |
569 | unsigned long nr_segs, | 641 | size_t *count, |
570 | loff_t pos) | 642 | int *iolock) |
571 | { | 643 | { |
572 | struct file *file = iocb->ki_filp; | 644 | struct inode *inode = file->f_mapping->host; |
573 | struct address_space *mapping = file->f_mapping; | ||
574 | struct inode *inode = mapping->host; | ||
575 | struct xfs_inode *ip = XFS_I(inode); | 645 | struct xfs_inode *ip = XFS_I(inode); |
576 | struct xfs_mount *mp = ip->i_mount; | 646 | xfs_fsize_t new_size; |
577 | ssize_t ret = 0, error = 0; | 647 | int error = 0; |
578 | int ioflags = 0; | ||
579 | xfs_fsize_t isize, new_size; | ||
580 | int iolock; | ||
581 | size_t ocount = 0, count; | ||
582 | int need_i_mutex; | ||
583 | 648 | ||
584 | XFS_STATS_INC(xs_write_calls); | 649 | error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); |
650 | if (error) { | ||
651 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); | ||
652 | *iolock = 0; | ||
653 | return error; | ||
654 | } | ||
585 | 655 | ||
586 | BUG_ON(iocb->ki_pos != pos); | 656 | new_size = *pos + *count; |
657 | if (new_size > ip->i_size) | ||
658 | ip->i_new_size = new_size; | ||
587 | 659 | ||
588 | if (unlikely(file->f_flags & O_DIRECT)) | 660 | if (likely(!(file->f_mode & FMODE_NOCMTIME))) |
589 | ioflags |= IO_ISDIRECT; | 661 | file_update_time(file); |
590 | if (file->f_mode & FMODE_NOCMTIME) | 662 | |
591 | ioflags |= IO_INVIS; | 663 | /* |
664 | * If the offset is beyond the size of the file, we need to zero any | ||
665 | * blocks that fall between the existing EOF and the start of this | ||
666 | * write. | ||
667 | */ | ||
668 | if (*pos > ip->i_size) | ||
669 | error = -xfs_zero_eof(ip, *pos, ip->i_size); | ||
592 | 670 | ||
593 | error = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); | 671 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); |
594 | if (error) | 672 | if (error) |
595 | return error; | 673 | return error; |
596 | 674 | ||
597 | count = ocount; | 675 | /* |
598 | if (count == 0) | 676 | * If we're writing the file then make sure to clear the setuid and |
599 | return 0; | 677 | * setgid bits if the process is not being run by root. This keeps |
600 | 678 | * people from modifying setuid and setgid binaries. | |
601 | xfs_wait_for_freeze(mp, SB_FREEZE_WRITE); | 679 | */ |
680 | return file_remove_suid(file); | ||
602 | 681 | ||
603 | if (XFS_FORCED_SHUTDOWN(mp)) | 682 | } |
604 | return -EIO; | ||
605 | 683 | ||
606 | relock: | 684 | /* |
607 | if (ioflags & IO_ISDIRECT) { | 685 | * xfs_file_dio_aio_write - handle direct IO writes |
608 | iolock = XFS_IOLOCK_SHARED; | 686 | * |
609 | need_i_mutex = 0; | 687 | * Lock the inode appropriately to prepare for and issue a direct IO write. |
610 | } else { | 688 | * By separating it from the buffered write path we remove all the tricky to |
611 | iolock = XFS_IOLOCK_EXCL; | 689 | * follow locking changes and looping. |
612 | need_i_mutex = 1; | 690 | * |
613 | mutex_lock(&inode->i_mutex); | 691 | * If there are cached pages or we're extending the file, we need IOLOCK_EXCL |
692 | * until we're sure the bytes at the new EOF have been zeroed and/or the cached | ||
693 | * pages are flushed out. | ||
694 | * | ||
695 | * In most cases the direct IO writes will be done holding IOLOCK_SHARED | ||
696 | * allowing them to be done in parallel with reads and other direct IO writes. | ||
697 | * However, if the IO is not aligned to filesystem blocks, the direct IO layer | ||
698 | * needs to do sub-block zeroing and that requires serialisation against other | ||
699 | * direct IOs to the same block. In this case we need to serialise the | ||
700 | * submission of the unaligned IOs so that we don't get racing block zeroing in | ||
701 | * the dio layer. To avoid the problem with aio, we also need to wait for | ||
702 | * outstanding IOs to complete so that unwritten extent conversion is completed | ||
703 | * before we try to map the overlapping block. This is currently implemented by | ||
704 | * hitting it with a big hammer (i.e. xfs_ioend_wait()). | ||
705 | * | ||
706 | * Returns with locks held indicated by @iolock and errors indicated by | ||
707 | * negative return values. | ||
708 | */ | ||
709 | STATIC ssize_t | ||
710 | xfs_file_dio_aio_write( | ||
711 | struct kiocb *iocb, | ||
712 | const struct iovec *iovp, | ||
713 | unsigned long nr_segs, | ||
714 | loff_t pos, | ||
715 | size_t ocount, | ||
716 | int *iolock) | ||
717 | { | ||
718 | struct file *file = iocb->ki_filp; | ||
719 | struct address_space *mapping = file->f_mapping; | ||
720 | struct inode *inode = mapping->host; | ||
721 | struct xfs_inode *ip = XFS_I(inode); | ||
722 | struct xfs_mount *mp = ip->i_mount; | ||
723 | ssize_t ret = 0; | ||
724 | size_t count = ocount; | ||
725 | int unaligned_io = 0; | ||
726 | struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? | ||
727 | mp->m_rtdev_targp : mp->m_ddev_targp; | ||
728 | |||
729 | *iolock = 0; | ||
730 | if ((pos & target->bt_smask) || (count & target->bt_smask)) | ||
731 | return -XFS_ERROR(EINVAL); | ||
732 | |||
733 | if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) | ||
734 | unaligned_io = 1; | ||
735 | |||
736 | if (unaligned_io || mapping->nrpages || pos > ip->i_size) | ||
737 | *iolock = XFS_IOLOCK_EXCL; | ||
738 | else | ||
739 | *iolock = XFS_IOLOCK_SHARED; | ||
740 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); | ||
741 | |||
742 | ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); | ||
743 | if (ret) | ||
744 | return ret; | ||
745 | |||
746 | if (mapping->nrpages) { | ||
747 | WARN_ON(*iolock != XFS_IOLOCK_EXCL); | ||
748 | ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, | ||
749 | FI_REMAPF_LOCKED); | ||
750 | if (ret) | ||
751 | return ret; | ||
614 | } | 752 | } |
615 | 753 | ||
616 | xfs_ilock(ip, XFS_ILOCK_EXCL|iolock); | 754 | /* |
617 | 755 | * If we are doing unaligned IO, wait for all other IO to drain, | |
618 | start: | 756 | * otherwise demote the lock if we had to flush cached pages |
619 | error = -generic_write_checks(file, &pos, &count, | 757 | */ |
620 | S_ISBLK(inode->i_mode)); | 758 | if (unaligned_io) |
621 | if (error) { | 759 | xfs_ioend_wait(ip); |
622 | xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); | 760 | else if (*iolock == XFS_IOLOCK_EXCL) { |
623 | goto out_unlock_mutex; | 761 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); |
762 | *iolock = XFS_IOLOCK_SHARED; | ||
624 | } | 763 | } |
625 | 764 | ||
626 | if (ioflags & IO_ISDIRECT) { | 765 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); |
627 | xfs_buftarg_t *target = | 766 | ret = generic_file_direct_write(iocb, iovp, |
628 | XFS_IS_REALTIME_INODE(ip) ? | 767 | &nr_segs, pos, &iocb->ki_pos, count, ocount); |
629 | mp->m_rtdev_targp : mp->m_ddev_targp; | ||
630 | 768 | ||
631 | if ((pos & target->bt_smask) || (count & target->bt_smask)) { | 769 | /* No fallback to buffered IO on errors for XFS. */ |
632 | xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); | 770 | ASSERT(ret < 0 || ret == count); |
633 | return XFS_ERROR(-EINVAL); | 771 | return ret; |
634 | } | 772 | } |
635 | 773 | ||
636 | if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) { | 774 | STATIC ssize_t |
637 | xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); | 775 | xfs_file_buffered_aio_write( |
638 | iolock = XFS_IOLOCK_EXCL; | 776 | struct kiocb *iocb, |
639 | need_i_mutex = 1; | 777 | const struct iovec *iovp, |
640 | mutex_lock(&inode->i_mutex); | 778 | unsigned long nr_segs, |
641 | xfs_ilock(ip, XFS_ILOCK_EXCL|iolock); | 779 | loff_t pos, |
642 | goto start; | 780 | size_t ocount, |
643 | } | 781 | int *iolock) |
644 | } | 782 | { |
783 | struct file *file = iocb->ki_filp; | ||
784 | struct address_space *mapping = file->f_mapping; | ||
785 | struct inode *inode = mapping->host; | ||
786 | struct xfs_inode *ip = XFS_I(inode); | ||
787 | ssize_t ret; | ||
788 | int enospc = 0; | ||
789 | size_t count = ocount; | ||
645 | 790 | ||
646 | new_size = pos + count; | 791 | *iolock = XFS_IOLOCK_EXCL; |
647 | if (new_size > ip->i_size) | 792 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); |
648 | ip->i_new_size = new_size; | ||
649 | 793 | ||
650 | if (likely(!(ioflags & IO_INVIS))) | 794 | ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); |
651 | file_update_time(file); | 795 | if (ret) |
796 | return ret; | ||
652 | 797 | ||
798 | /* We can write back this queue in page reclaim */ | ||
799 | current->backing_dev_info = mapping->backing_dev_info; | ||
800 | |||
801 | write_retry: | ||
802 | trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); | ||
803 | ret = generic_file_buffered_write(iocb, iovp, nr_segs, | ||
804 | pos, &iocb->ki_pos, count, ret); | ||
653 | /* | 805 | /* |
654 | * If the offset is beyond the size of the file, we have a couple | 806 | * if we just got an ENOSPC, flush the inode now we aren't holding any |
655 | * of things to do. First, if there is already space allocated | 807 | * page locks and retry *once* |
656 | * we need to either create holes or zero the disk or ... | ||
657 | * | ||
658 | * If there is a page where the previous size lands, we need | ||
659 | * to zero it out up to the new size. | ||
660 | */ | 808 | */ |
661 | 809 | if (ret == -ENOSPC && !enospc) { | |
662 | if (pos > ip->i_size) { | 810 | ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE); |
663 | error = xfs_zero_eof(ip, pos, ip->i_size); | 811 | if (ret) |
664 | if (error) { | 812 | return ret; |
665 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 813 | enospc = 1; |
666 | goto out_unlock_internal; | 814 | goto write_retry; |
667 | } | ||
668 | } | 815 | } |
669 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 816 | current->backing_dev_info = NULL; |
817 | return ret; | ||
818 | } | ||
670 | 819 | ||
671 | /* | 820 | STATIC ssize_t |
672 | * If we're writing the file then make sure to clear the | 821 | xfs_file_aio_write( |
673 | * setuid and setgid bits if the process is not being run | 822 | struct kiocb *iocb, |
674 | * by root. This keeps people from modifying setuid and | 823 | const struct iovec *iovp, |
675 | * setgid binaries. | 824 | unsigned long nr_segs, |
676 | */ | 825 | loff_t pos) |
677 | error = -file_remove_suid(file); | 826 | { |
678 | if (unlikely(error)) | 827 | struct file *file = iocb->ki_filp; |
679 | goto out_unlock_internal; | 828 | struct address_space *mapping = file->f_mapping; |
829 | struct inode *inode = mapping->host; | ||
830 | struct xfs_inode *ip = XFS_I(inode); | ||
831 | ssize_t ret; | ||
832 | int iolock; | ||
833 | size_t ocount = 0; | ||
680 | 834 | ||
681 | /* We can write back this queue in page reclaim */ | 835 | XFS_STATS_INC(xs_write_calls); |
682 | current->backing_dev_info = mapping->backing_dev_info; | ||
683 | 836 | ||
684 | if ((ioflags & IO_ISDIRECT)) { | 837 | BUG_ON(iocb->ki_pos != pos); |
685 | if (mapping->nrpages) { | ||
686 | WARN_ON(need_i_mutex == 0); | ||
687 | error = xfs_flushinval_pages(ip, | ||
688 | (pos & PAGE_CACHE_MASK), | ||
689 | -1, FI_REMAPF_LOCKED); | ||
690 | if (error) | ||
691 | goto out_unlock_internal; | ||
692 | } | ||
693 | 838 | ||
694 | if (need_i_mutex) { | 839 | ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); |
695 | /* demote the lock now the cached pages are gone */ | 840 | if (ret) |
696 | xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); | 841 | return ret; |
697 | mutex_unlock(&inode->i_mutex); | ||
698 | 842 | ||
699 | iolock = XFS_IOLOCK_SHARED; | 843 | if (ocount == 0) |
700 | need_i_mutex = 0; | 844 | return 0; |
701 | } | ||
702 | 845 | ||
703 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags); | 846 | xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE); |
704 | ret = generic_file_direct_write(iocb, iovp, | ||
705 | &nr_segs, pos, &iocb->ki_pos, count, ocount); | ||
706 | 847 | ||
707 | /* | 848 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
708 | * direct-io write to a hole: fall through to buffered I/O | 849 | return -EIO; |
709 | * for completing the rest of the request. | ||
710 | */ | ||
711 | if (ret >= 0 && ret != count) { | ||
712 | XFS_STATS_ADD(xs_write_bytes, ret); | ||
713 | 850 | ||
714 | pos += ret; | 851 | if (unlikely(file->f_flags & O_DIRECT)) |
715 | count -= ret; | 852 | ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, |
853 | ocount, &iolock); | ||
854 | else | ||
855 | ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, | ||
856 | ocount, &iolock); | ||
716 | 857 | ||
717 | ioflags &= ~IO_ISDIRECT; | 858 | xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret); |
718 | xfs_iunlock(ip, iolock); | ||
719 | goto relock; | ||
720 | } | ||
721 | } else { | ||
722 | int enospc = 0; | ||
723 | ssize_t ret2 = 0; | ||
724 | 859 | ||
725 | write_retry: | 860 | if (ret <= 0) |
726 | trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, ioflags); | 861 | goto out_unlock; |
727 | ret2 = generic_file_buffered_write(iocb, iovp, nr_segs, | ||
728 | pos, &iocb->ki_pos, count, ret); | ||
729 | /* | ||
730 | * if we just got an ENOSPC, flush the inode now we | ||
731 | * aren't holding any page locks and retry *once* | ||
732 | */ | ||
733 | if (ret2 == -ENOSPC && !enospc) { | ||
734 | error = xfs_flush_pages(ip, 0, -1, 0, FI_NONE); | ||
735 | if (error) | ||
736 | goto out_unlock_internal; | ||
737 | enospc = 1; | ||
738 | goto write_retry; | ||
739 | } | ||
740 | ret = ret2; | ||
741 | } | ||
742 | 862 | ||
743 | current->backing_dev_info = NULL; | 863 | /* Handle various SYNC-type writes */ |
864 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | ||
865 | loff_t end = pos + ret - 1; | ||
866 | int error, error2; | ||
744 | 867 | ||
745 | isize = i_size_read(inode); | 868 | xfs_rw_iunlock(ip, iolock); |
746 | if (unlikely(ret < 0 && ret != -EFAULT && iocb->ki_pos > isize)) | 869 | error = filemap_write_and_wait_range(mapping, pos, end); |
747 | iocb->ki_pos = isize; | 870 | xfs_rw_ilock(ip, iolock); |
748 | 871 | ||
749 | if (iocb->ki_pos > ip->i_size) { | 872 | error2 = -xfs_file_fsync(file, |
750 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 873 | (file->f_flags & __O_SYNC) ? 0 : 1); |
751 | if (iocb->ki_pos > ip->i_size) | 874 | if (error) |
752 | ip->i_size = iocb->ki_pos; | 875 | ret = error; |
753 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 876 | else if (error2) |
877 | ret = error2; | ||
754 | } | 878 | } |
755 | 879 | ||
756 | error = -ret; | 880 | out_unlock: |
757 | if (ret <= 0) | 881 | xfs_aio_write_newsize_update(ip); |
758 | goto out_unlock_internal; | 882 | xfs_rw_iunlock(ip, iolock); |
883 | return ret; | ||
884 | } | ||
759 | 885 | ||
760 | XFS_STATS_ADD(xs_write_bytes, ret); | 886 | STATIC long |
887 | xfs_file_fallocate( | ||
888 | struct file *file, | ||
889 | int mode, | ||
890 | loff_t offset, | ||
891 | loff_t len) | ||
892 | { | ||
893 | struct inode *inode = file->f_path.dentry->d_inode; | ||
894 | long error; | ||
895 | loff_t new_size = 0; | ||
896 | xfs_flock64_t bf; | ||
897 | xfs_inode_t *ip = XFS_I(inode); | ||
898 | int cmd = XFS_IOC_RESVSP; | ||
761 | 899 | ||
762 | /* Handle various SYNC-type writes */ | 900 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) |
763 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | 901 | return -EOPNOTSUPP; |
764 | loff_t end = pos + ret - 1; | ||
765 | int error2; | ||
766 | 902 | ||
767 | xfs_iunlock(ip, iolock); | 903 | bf.l_whence = 0; |
768 | if (need_i_mutex) | 904 | bf.l_start = offset; |
769 | mutex_unlock(&inode->i_mutex); | 905 | bf.l_len = len; |
770 | 906 | ||
771 | error2 = filemap_write_and_wait_range(mapping, pos, end); | 907 | xfs_ilock(ip, XFS_IOLOCK_EXCL); |
772 | if (!error) | ||
773 | error = error2; | ||
774 | if (need_i_mutex) | ||
775 | mutex_lock(&inode->i_mutex); | ||
776 | xfs_ilock(ip, iolock); | ||
777 | 908 | ||
778 | error2 = -xfs_file_fsync(file, | 909 | if (mode & FALLOC_FL_PUNCH_HOLE) |
779 | (file->f_flags & __O_SYNC) ? 0 : 1); | 910 | cmd = XFS_IOC_UNRESVSP; |
780 | if (!error) | 911 | |
781 | error = error2; | 912 | /* check the new inode size is valid before allocating */ |
913 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | ||
914 | offset + len > i_size_read(inode)) { | ||
915 | new_size = offset + len; | ||
916 | error = inode_newsize_ok(inode, new_size); | ||
917 | if (error) | ||
918 | goto out_unlock; | ||
782 | } | 919 | } |
783 | 920 | ||
784 | out_unlock_internal: | 921 | error = -xfs_change_file_space(ip, cmd, &bf, 0, XFS_ATTR_NOLOCK); |
785 | if (ip->i_new_size) { | 922 | if (error) |
786 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 923 | goto out_unlock; |
787 | ip->i_new_size = 0; | 924 | |
788 | /* | 925 | /* Change file size if needed */ |
789 | * If this was a direct or synchronous I/O that failed (such | 926 | if (new_size) { |
790 | * as ENOSPC) then part of the I/O may have been written to | 927 | struct iattr iattr; |
791 | * disk before the error occured. In this case the on-disk | 928 | |
792 | * file size may have been adjusted beyond the in-memory file | 929 | iattr.ia_valid = ATTR_SIZE; |
793 | * size and now needs to be truncated back. | 930 | iattr.ia_size = new_size; |
794 | */ | 931 | error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); |
795 | if (ip->i_d.di_size > ip->i_size) | ||
796 | ip->i_d.di_size = ip->i_size; | ||
797 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
798 | } | 932 | } |
799 | xfs_iunlock(ip, iolock); | 933 | |
800 | out_unlock_mutex: | 934 | out_unlock: |
801 | if (need_i_mutex) | 935 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
802 | mutex_unlock(&inode->i_mutex); | 936 | return error; |
803 | return -error; | ||
804 | } | 937 | } |
805 | 938 | ||
939 | |||
806 | STATIC int | 940 | STATIC int |
807 | xfs_file_open( | 941 | xfs_file_open( |
808 | struct inode *inode, | 942 | struct inode *inode, |
@@ -921,6 +1055,7 @@ const struct file_operations xfs_file_operations = { | |||
921 | .open = xfs_file_open, | 1055 | .open = xfs_file_open, |
922 | .release = xfs_file_release, | 1056 | .release = xfs_file_release, |
923 | .fsync = xfs_file_fsync, | 1057 | .fsync = xfs_file_fsync, |
1058 | .fallocate = xfs_file_fallocate, | ||
924 | }; | 1059 | }; |
925 | 1060 | ||
926 | const struct file_operations xfs_dir_file_operations = { | 1061 | const struct file_operations xfs_dir_file_operations = { |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index ad442d9e392e..f5e2a19e0f8e 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include "xfs_dfrag.h" | 39 | #include "xfs_dfrag.h" |
40 | #include "xfs_fsops.h" | 40 | #include "xfs_fsops.h" |
41 | #include "xfs_vnodeops.h" | 41 | #include "xfs_vnodeops.h" |
42 | #include "xfs_discard.h" | ||
42 | #include "xfs_quota.h" | 43 | #include "xfs_quota.h" |
43 | #include "xfs_inode_item.h" | 44 | #include "xfs_inode_item.h" |
44 | #include "xfs_export.h" | 45 | #include "xfs_export.h" |
@@ -984,10 +985,22 @@ xfs_ioctl_setattr( | |||
984 | 985 | ||
985 | /* | 986 | /* |
986 | * Extent size must be a multiple of the appropriate block | 987 | * Extent size must be a multiple of the appropriate block |
987 | * size, if set at all. | 988 | * size, if set at all. It must also be smaller than the |
989 | * maximum extent size supported by the filesystem. | ||
990 | * | ||
991 | * Also, for non-realtime files, limit the extent size hint to | ||
992 | * half the size of the AGs in the filesystem so alignment | ||
993 | * doesn't result in extents larger than an AG. | ||
988 | */ | 994 | */ |
989 | if (fa->fsx_extsize != 0) { | 995 | if (fa->fsx_extsize != 0) { |
990 | xfs_extlen_t size; | 996 | xfs_extlen_t size; |
997 | xfs_fsblock_t extsize_fsb; | ||
998 | |||
999 | extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); | ||
1000 | if (extsize_fsb > MAXEXTLEN) { | ||
1001 | code = XFS_ERROR(EINVAL); | ||
1002 | goto error_return; | ||
1003 | } | ||
991 | 1004 | ||
992 | if (XFS_IS_REALTIME_INODE(ip) || | 1005 | if (XFS_IS_REALTIME_INODE(ip) || |
993 | ((mask & FSX_XFLAGS) && | 1006 | ((mask & FSX_XFLAGS) && |
@@ -996,6 +1009,10 @@ xfs_ioctl_setattr( | |||
996 | mp->m_sb.sb_blocklog; | 1009 | mp->m_sb.sb_blocklog; |
997 | } else { | 1010 | } else { |
998 | size = mp->m_sb.sb_blocksize; | 1011 | size = mp->m_sb.sb_blocksize; |
1012 | if (extsize_fsb > mp->m_sb.sb_agblocks / 2) { | ||
1013 | code = XFS_ERROR(EINVAL); | ||
1014 | goto error_return; | ||
1015 | } | ||
999 | } | 1016 | } |
1000 | 1017 | ||
1001 | if (fa->fsx_extsize % size) { | 1018 | if (fa->fsx_extsize % size) { |
@@ -1294,6 +1311,8 @@ xfs_file_ioctl( | |||
1294 | trace_xfs_file_ioctl(ip); | 1311 | trace_xfs_file_ioctl(ip); |
1295 | 1312 | ||
1296 | switch (cmd) { | 1313 | switch (cmd) { |
1314 | case FITRIM: | ||
1315 | return xfs_ioc_trim(mp, arg); | ||
1297 | case XFS_IOC_ALLOCSP: | 1316 | case XFS_IOC_ALLOCSP: |
1298 | case XFS_IOC_FREESP: | 1317 | case XFS_IOC_FREESP: |
1299 | case XFS_IOC_RESVSP: | 1318 | case XFS_IOC_RESVSP: |
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index da54403633b6..bd5727852fd6 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -46,7 +46,6 @@ | |||
46 | #include <linux/namei.h> | 46 | #include <linux/namei.h> |
47 | #include <linux/posix_acl.h> | 47 | #include <linux/posix_acl.h> |
48 | #include <linux/security.h> | 48 | #include <linux/security.h> |
49 | #include <linux/falloc.h> | ||
50 | #include <linux/fiemap.h> | 49 | #include <linux/fiemap.h> |
51 | #include <linux/slab.h> | 50 | #include <linux/slab.h> |
52 | 51 | ||
@@ -505,61 +504,6 @@ xfs_vn_setattr( | |||
505 | return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); | 504 | return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); |
506 | } | 505 | } |
507 | 506 | ||
508 | STATIC long | ||
509 | xfs_vn_fallocate( | ||
510 | struct inode *inode, | ||
511 | int mode, | ||
512 | loff_t offset, | ||
513 | loff_t len) | ||
514 | { | ||
515 | long error; | ||
516 | loff_t new_size = 0; | ||
517 | xfs_flock64_t bf; | ||
518 | xfs_inode_t *ip = XFS_I(inode); | ||
519 | int cmd = XFS_IOC_RESVSP; | ||
520 | |||
521 | /* preallocation on directories not yet supported */ | ||
522 | error = -ENODEV; | ||
523 | if (S_ISDIR(inode->i_mode)) | ||
524 | goto out_error; | ||
525 | |||
526 | bf.l_whence = 0; | ||
527 | bf.l_start = offset; | ||
528 | bf.l_len = len; | ||
529 | |||
530 | xfs_ilock(ip, XFS_IOLOCK_EXCL); | ||
531 | |||
532 | if (mode & FALLOC_FL_PUNCH_HOLE) | ||
533 | cmd = XFS_IOC_UNRESVSP; | ||
534 | |||
535 | /* check the new inode size is valid before allocating */ | ||
536 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | ||
537 | offset + len > i_size_read(inode)) { | ||
538 | new_size = offset + len; | ||
539 | error = inode_newsize_ok(inode, new_size); | ||
540 | if (error) | ||
541 | goto out_unlock; | ||
542 | } | ||
543 | |||
544 | error = -xfs_change_file_space(ip, cmd, &bf, 0, XFS_ATTR_NOLOCK); | ||
545 | if (error) | ||
546 | goto out_unlock; | ||
547 | |||
548 | /* Change file size if needed */ | ||
549 | if (new_size) { | ||
550 | struct iattr iattr; | ||
551 | |||
552 | iattr.ia_valid = ATTR_SIZE; | ||
553 | iattr.ia_size = new_size; | ||
554 | error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); | ||
555 | } | ||
556 | |||
557 | out_unlock: | ||
558 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
559 | out_error: | ||
560 | return error; | ||
561 | } | ||
562 | |||
563 | #define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) | 507 | #define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) |
564 | 508 | ||
565 | /* | 509 | /* |
@@ -653,7 +597,6 @@ static const struct inode_operations xfs_inode_operations = { | |||
653 | .getxattr = generic_getxattr, | 597 | .getxattr = generic_getxattr, |
654 | .removexattr = generic_removexattr, | 598 | .removexattr = generic_removexattr, |
655 | .listxattr = xfs_vn_listxattr, | 599 | .listxattr = xfs_vn_listxattr, |
656 | .fallocate = xfs_vn_fallocate, | ||
657 | .fiemap = xfs_vn_fiemap, | 600 | .fiemap = xfs_vn_fiemap, |
658 | }; | 601 | }; |
659 | 602 | ||
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index bd07f7339366..9731898083ae 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -1414,7 +1414,7 @@ xfs_fs_freeze( | |||
1414 | 1414 | ||
1415 | xfs_save_resvblks(mp); | 1415 | xfs_save_resvblks(mp); |
1416 | xfs_quiesce_attr(mp); | 1416 | xfs_quiesce_attr(mp); |
1417 | return -xfs_fs_log_dummy(mp, SYNC_WAIT); | 1417 | return -xfs_fs_log_dummy(mp); |
1418 | } | 1418 | } |
1419 | 1419 | ||
1420 | STATIC int | 1420 | STATIC int |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index a02480de9759..e22f0057d21f 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -362,7 +362,7 @@ xfs_quiesce_data( | |||
362 | 362 | ||
363 | /* mark the log as covered if needed */ | 363 | /* mark the log as covered if needed */ |
364 | if (xfs_log_need_covered(mp)) | 364 | if (xfs_log_need_covered(mp)) |
365 | error2 = xfs_fs_log_dummy(mp, SYNC_WAIT); | 365 | error2 = xfs_fs_log_dummy(mp); |
366 | 366 | ||
367 | /* flush data-only devices */ | 367 | /* flush data-only devices */ |
368 | if (mp->m_rtdev_targp) | 368 | if (mp->m_rtdev_targp) |
@@ -503,13 +503,14 @@ xfs_sync_worker( | |||
503 | int error; | 503 | int error; |
504 | 504 | ||
505 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { | 505 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { |
506 | xfs_log_force(mp, 0); | ||
507 | xfs_reclaim_inodes(mp, 0); | ||
508 | /* dgc: errors ignored here */ | 506 | /* dgc: errors ignored here */ |
509 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); | ||
510 | if (mp->m_super->s_frozen == SB_UNFROZEN && | 507 | if (mp->m_super->s_frozen == SB_UNFROZEN && |
511 | xfs_log_need_covered(mp)) | 508 | xfs_log_need_covered(mp)) |
512 | error = xfs_fs_log_dummy(mp, 0); | 509 | error = xfs_fs_log_dummy(mp); |
510 | else | ||
511 | xfs_log_force(mp, 0); | ||
512 | xfs_reclaim_inodes(mp, 0); | ||
513 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); | ||
513 | } | 514 | } |
514 | mp->m_sync_seq++; | 515 | mp->m_sync_seq++; |
515 | wake_up(&mp->m_wait_single_sync_task); | 516 | wake_up(&mp->m_wait_single_sync_task); |
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c index 7bb5092d6ae4..ee3cee097e7e 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/linux-2.6/xfs_sysctl.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include <linux/sysctl.h> | 19 | #include <linux/sysctl.h> |
20 | #include <linux/proc_fs.h> | 20 | #include <linux/proc_fs.h> |
21 | #include "xfs_error.h" | ||
21 | 22 | ||
22 | static struct ctl_table_header *xfs_table_header; | 23 | static struct ctl_table_header *xfs_table_header; |
23 | 24 | ||
@@ -51,6 +52,26 @@ xfs_stats_clear_proc_handler( | |||
51 | 52 | ||
52 | return ret; | 53 | return ret; |
53 | } | 54 | } |
55 | |||
56 | STATIC int | ||
57 | xfs_panic_mask_proc_handler( | ||
58 | ctl_table *ctl, | ||
59 | int write, | ||
60 | void __user *buffer, | ||
61 | size_t *lenp, | ||
62 | loff_t *ppos) | ||
63 | { | ||
64 | int ret, *valp = ctl->data; | ||
65 | |||
66 | ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); | ||
67 | if (!ret && write) { | ||
68 | xfs_panic_mask = *valp; | ||
69 | #ifdef DEBUG | ||
70 | xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES); | ||
71 | #endif | ||
72 | } | ||
73 | return ret; | ||
74 | } | ||
54 | #endif /* CONFIG_PROC_FS */ | 75 | #endif /* CONFIG_PROC_FS */ |
55 | 76 | ||
56 | static ctl_table xfs_table[] = { | 77 | static ctl_table xfs_table[] = { |
@@ -77,7 +98,7 @@ static ctl_table xfs_table[] = { | |||
77 | .data = &xfs_params.panic_mask.val, | 98 | .data = &xfs_params.panic_mask.val, |
78 | .maxlen = sizeof(int), | 99 | .maxlen = sizeof(int), |
79 | .mode = 0644, | 100 | .mode = 0644, |
80 | .proc_handler = proc_dointvec_minmax, | 101 | .proc_handler = xfs_panic_mask_proc_handler, |
81 | .extra1 = &xfs_params.panic_mask.min, | 102 | .extra1 = &xfs_params.panic_mask.min, |
82 | .extra2 = &xfs_params.panic_mask.max | 103 | .extra2 = &xfs_params.panic_mask.max |
83 | }, | 104 | }, |
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 647af2a2e7aa..2d0bcb479075 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h | |||
@@ -1759,6 +1759,39 @@ DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover); | |||
1759 | DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel); | 1759 | DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel); |
1760 | DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip); | 1760 | DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip); |
1761 | 1761 | ||
1762 | DECLARE_EVENT_CLASS(xfs_discard_class, | ||
1763 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
1764 | xfs_agblock_t agbno, xfs_extlen_t len), | ||
1765 | TP_ARGS(mp, agno, agbno, len), | ||
1766 | TP_STRUCT__entry( | ||
1767 | __field(dev_t, dev) | ||
1768 | __field(xfs_agnumber_t, agno) | ||
1769 | __field(xfs_agblock_t, agbno) | ||
1770 | __field(xfs_extlen_t, len) | ||
1771 | ), | ||
1772 | TP_fast_assign( | ||
1773 | __entry->dev = mp->m_super->s_dev; | ||
1774 | __entry->agno = agno; | ||
1775 | __entry->agbno = agbno; | ||
1776 | __entry->len = len; | ||
1777 | ), | ||
1778 | TP_printk("dev %d:%d agno %u agbno %u len %u\n", | ||
1779 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
1780 | __entry->agno, | ||
1781 | __entry->agbno, | ||
1782 | __entry->len) | ||
1783 | ) | ||
1784 | |||
1785 | #define DEFINE_DISCARD_EVENT(name) \ | ||
1786 | DEFINE_EVENT(xfs_discard_class, name, \ | ||
1787 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ | ||
1788 | xfs_agblock_t agbno, xfs_extlen_t len), \ | ||
1789 | TP_ARGS(mp, agno, agbno, len)) | ||
1790 | DEFINE_DISCARD_EVENT(xfs_discard_extent); | ||
1791 | DEFINE_DISCARD_EVENT(xfs_discard_toosmall); | ||
1792 | DEFINE_DISCARD_EVENT(xfs_discard_exclude); | ||
1793 | DEFINE_DISCARD_EVENT(xfs_discard_busy); | ||
1794 | |||
1762 | #endif /* _TRACE_XFS_H */ | 1795 | #endif /* _TRACE_XFS_H */ |
1763 | 1796 | ||
1764 | #undef TRACE_INCLUDE_PATH | 1797 | #undef TRACE_INCLUDE_PATH |
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index f8e854b4fde8..206a2815ced6 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c | |||
@@ -1863,12 +1863,14 @@ xfs_qm_dqreclaim_one(void) | |||
1863 | xfs_dquot_t *dqpout; | 1863 | xfs_dquot_t *dqpout; |
1864 | xfs_dquot_t *dqp; | 1864 | xfs_dquot_t *dqp; |
1865 | int restarts; | 1865 | int restarts; |
1866 | int startagain; | ||
1866 | 1867 | ||
1867 | restarts = 0; | 1868 | restarts = 0; |
1868 | dqpout = NULL; | 1869 | dqpout = NULL; |
1869 | 1870 | ||
1870 | /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */ | 1871 | /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */ |
1871 | startagain: | 1872 | again: |
1873 | startagain = 0; | ||
1872 | mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); | 1874 | mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); |
1873 | 1875 | ||
1874 | list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) { | 1876 | list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) { |
@@ -1885,13 +1887,10 @@ startagain: | |||
1885 | ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); | 1887 | ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); |
1886 | 1888 | ||
1887 | trace_xfs_dqreclaim_want(dqp); | 1889 | trace_xfs_dqreclaim_want(dqp); |
1888 | |||
1889 | xfs_dqunlock(dqp); | ||
1890 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); | ||
1891 | if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) | ||
1892 | return NULL; | ||
1893 | XQM_STATS_INC(xqmstats.xs_qm_dqwants); | 1890 | XQM_STATS_INC(xqmstats.xs_qm_dqwants); |
1894 | goto startagain; | 1891 | restarts++; |
1892 | startagain = 1; | ||
1893 | goto dqunlock; | ||
1895 | } | 1894 | } |
1896 | 1895 | ||
1897 | /* | 1896 | /* |
@@ -1906,23 +1905,20 @@ startagain: | |||
1906 | ASSERT(list_empty(&dqp->q_mplist)); | 1905 | ASSERT(list_empty(&dqp->q_mplist)); |
1907 | list_del_init(&dqp->q_freelist); | 1906 | list_del_init(&dqp->q_freelist); |
1908 | xfs_Gqm->qm_dqfrlist_cnt--; | 1907 | xfs_Gqm->qm_dqfrlist_cnt--; |
1909 | xfs_dqunlock(dqp); | ||
1910 | dqpout = dqp; | 1908 | dqpout = dqp; |
1911 | XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); | 1909 | XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); |
1912 | break; | 1910 | goto dqunlock; |
1913 | } | 1911 | } |
1914 | 1912 | ||
1915 | ASSERT(dqp->q_hash); | 1913 | ASSERT(dqp->q_hash); |
1916 | ASSERT(!list_empty(&dqp->q_mplist)); | 1914 | ASSERT(!list_empty(&dqp->q_mplist)); |
1917 | 1915 | ||
1918 | /* | 1916 | /* |
1919 | * Try to grab the flush lock. If this dquot is in the process of | 1917 | * Try to grab the flush lock. If this dquot is in the process |
1920 | * getting flushed to disk, we don't want to reclaim it. | 1918 | * of getting flushed to disk, we don't want to reclaim it. |
1921 | */ | 1919 | */ |
1922 | if (!xfs_dqflock_nowait(dqp)) { | 1920 | if (!xfs_dqflock_nowait(dqp)) |
1923 | xfs_dqunlock(dqp); | 1921 | goto dqunlock; |
1924 | continue; | ||
1925 | } | ||
1926 | 1922 | ||
1927 | /* | 1923 | /* |
1928 | * We have the flush lock so we know that this is not in the | 1924 | * We have the flush lock so we know that this is not in the |
@@ -1944,8 +1940,7 @@ startagain: | |||
1944 | xfs_fs_cmn_err(CE_WARN, mp, | 1940 | xfs_fs_cmn_err(CE_WARN, mp, |
1945 | "xfs_qm_dqreclaim: dquot %p flush failed", dqp); | 1941 | "xfs_qm_dqreclaim: dquot %p flush failed", dqp); |
1946 | } | 1942 | } |
1947 | xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ | 1943 | goto dqunlock; |
1948 | continue; | ||
1949 | } | 1944 | } |
1950 | 1945 | ||
1951 | /* | 1946 | /* |
@@ -1967,13 +1962,8 @@ startagain: | |||
1967 | */ | 1962 | */ |
1968 | if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) { | 1963 | if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) { |
1969 | restarts++; | 1964 | restarts++; |
1970 | mutex_unlock(&dqp->q_hash->qh_lock); | 1965 | startagain = 1; |
1971 | xfs_dqfunlock(dqp); | 1966 | goto qhunlock; |
1972 | xfs_dqunlock(dqp); | ||
1973 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); | ||
1974 | if (restarts++ >= XFS_QM_RECLAIM_MAX_RESTARTS) | ||
1975 | return NULL; | ||
1976 | goto startagain; | ||
1977 | } | 1967 | } |
1978 | 1968 | ||
1979 | ASSERT(dqp->q_nrefs == 0); | 1969 | ASSERT(dqp->q_nrefs == 0); |
@@ -1986,14 +1976,20 @@ startagain: | |||
1986 | xfs_Gqm->qm_dqfrlist_cnt--; | 1976 | xfs_Gqm->qm_dqfrlist_cnt--; |
1987 | dqpout = dqp; | 1977 | dqpout = dqp; |
1988 | mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); | 1978 | mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); |
1979 | qhunlock: | ||
1989 | mutex_unlock(&dqp->q_hash->qh_lock); | 1980 | mutex_unlock(&dqp->q_hash->qh_lock); |
1990 | dqfunlock: | 1981 | dqfunlock: |
1991 | xfs_dqfunlock(dqp); | 1982 | xfs_dqfunlock(dqp); |
1983 | dqunlock: | ||
1992 | xfs_dqunlock(dqp); | 1984 | xfs_dqunlock(dqp); |
1993 | if (dqpout) | 1985 | if (dqpout) |
1994 | break; | 1986 | break; |
1995 | if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) | 1987 | if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) |
1996 | return NULL; | 1988 | break; |
1989 | if (startagain) { | ||
1990 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); | ||
1991 | goto again; | ||
1992 | } | ||
1997 | } | 1993 | } |
1998 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); | 1994 | mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); |
1999 | return dqpout; | 1995 | return dqpout; |
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index 975aa10e1a47..0df88897ef84 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c | |||
@@ -25,86 +25,78 @@ | |||
25 | #include "xfs_mount.h" | 25 | #include "xfs_mount.h" |
26 | #include "xfs_error.h" | 26 | #include "xfs_error.h" |
27 | 27 | ||
28 | static char message[1024]; /* keep it off the stack */ | ||
29 | static DEFINE_SPINLOCK(xfs_err_lock); | ||
30 | |||
31 | /* Translate from CE_FOO to KERN_FOO, err_level(CE_FOO) == KERN_FOO */ | ||
32 | #define XFS_MAX_ERR_LEVEL 7 | ||
33 | #define XFS_ERR_MASK ((1 << 3) - 1) | ||
34 | static const char * const err_level[XFS_MAX_ERR_LEVEL+1] = | ||
35 | {KERN_EMERG, KERN_ALERT, KERN_CRIT, | ||
36 | KERN_ERR, KERN_WARNING, KERN_NOTICE, | ||
37 | KERN_INFO, KERN_DEBUG}; | ||
38 | |||
39 | void | 28 | void |
40 | cmn_err(register int level, char *fmt, ...) | 29 | cmn_err( |
30 | const char *lvl, | ||
31 | const char *fmt, | ||
32 | ...) | ||
41 | { | 33 | { |
42 | char *fp = fmt; | 34 | struct va_format vaf; |
43 | int len; | 35 | va_list args; |
44 | ulong flags; | 36 | |
45 | va_list ap; | 37 | va_start(args, fmt); |
46 | 38 | vaf.fmt = fmt; | |
47 | level &= XFS_ERR_MASK; | 39 | vaf.va = &args; |
48 | if (level > XFS_MAX_ERR_LEVEL) | 40 | |
49 | level = XFS_MAX_ERR_LEVEL; | 41 | printk("%s%pV", lvl, &vaf); |
50 | spin_lock_irqsave(&xfs_err_lock,flags); | 42 | va_end(args); |
51 | va_start(ap, fmt); | 43 | |
52 | if (*fmt == '!') fp++; | 44 | BUG_ON(strncmp(lvl, KERN_EMERG, strlen(KERN_EMERG)) == 0); |
53 | len = vsnprintf(message, sizeof(message), fp, ap); | ||
54 | if (len >= sizeof(message)) | ||
55 | len = sizeof(message) - 1; | ||
56 | if (message[len-1] == '\n') | ||
57 | message[len-1] = 0; | ||
58 | printk("%s%s\n", err_level[level], message); | ||
59 | va_end(ap); | ||
60 | spin_unlock_irqrestore(&xfs_err_lock,flags); | ||
61 | BUG_ON(level == CE_PANIC); | ||
62 | } | 45 | } |
63 | 46 | ||
64 | void | 47 | void |
65 | xfs_fs_vcmn_err( | 48 | xfs_fs_cmn_err( |
66 | int level, | 49 | const char *lvl, |
67 | struct xfs_mount *mp, | 50 | struct xfs_mount *mp, |
68 | char *fmt, | 51 | const char *fmt, |
69 | va_list ap) | 52 | ...) |
70 | { | 53 | { |
71 | unsigned long flags; | 54 | struct va_format vaf; |
72 | int len = 0; | 55 | va_list args; |
73 | 56 | ||
74 | level &= XFS_ERR_MASK; | 57 | va_start(args, fmt); |
75 | if (level > XFS_MAX_ERR_LEVEL) | 58 | vaf.fmt = fmt; |
76 | level = XFS_MAX_ERR_LEVEL; | 59 | vaf.va = &args; |
77 | 60 | ||
78 | spin_lock_irqsave(&xfs_err_lock,flags); | 61 | printk("%sFilesystem %s: %pV", lvl, mp->m_fsname, &vaf); |
62 | va_end(args); | ||
79 | 63 | ||
80 | if (mp) { | 64 | BUG_ON(strncmp(lvl, KERN_EMERG, strlen(KERN_EMERG)) == 0); |
81 | len = sprintf(message, "Filesystem \"%s\": ", mp->m_fsname); | 65 | } |
66 | |||
67 | /* All callers to xfs_cmn_err use CE_ALERT, so don't bother testing lvl */ | ||
68 | void | ||
69 | xfs_cmn_err( | ||
70 | int panic_tag, | ||
71 | const char *lvl, | ||
72 | struct xfs_mount *mp, | ||
73 | const char *fmt, | ||
74 | ...) | ||
75 | { | ||
76 | struct va_format vaf; | ||
77 | va_list args; | ||
78 | int do_panic = 0; | ||
82 | 79 | ||
83 | /* | 80 | if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { |
84 | * Skip the printk if we can't print anything useful | 81 | printk(KERN_ALERT "XFS: Transforming an alert into a BUG."); |
85 | * due to an over-long device name. | 82 | do_panic = 1; |
86 | */ | ||
87 | if (len >= sizeof(message)) | ||
88 | goto out; | ||
89 | } | 83 | } |
90 | 84 | ||
91 | len = vsnprintf(message + len, sizeof(message) - len, fmt, ap); | 85 | va_start(args, fmt); |
92 | if (len >= sizeof(message)) | 86 | vaf.fmt = fmt; |
93 | len = sizeof(message) - 1; | 87 | vaf.va = &args; |
94 | if (message[len-1] == '\n') | ||
95 | message[len-1] = 0; | ||
96 | 88 | ||
97 | printk("%s%s\n", err_level[level], message); | 89 | printk(KERN_ALERT "Filesystem %s: %pV", mp->m_fsname, &vaf); |
98 | out: | 90 | va_end(args); |
99 | spin_unlock_irqrestore(&xfs_err_lock,flags); | ||
100 | 91 | ||
101 | BUG_ON(level == CE_PANIC); | 92 | BUG_ON(do_panic); |
102 | } | 93 | } |
103 | 94 | ||
104 | void | 95 | void |
105 | assfail(char *expr, char *file, int line) | 96 | assfail(char *expr, char *file, int line) |
106 | { | 97 | { |
107 | printk("Assertion failed: %s, file: %s, line: %d\n", expr, file, line); | 98 | printk(KERN_CRIT "Assertion failed: %s, file: %s, line: %d\n", expr, |
99 | file, line); | ||
108 | BUG(); | 100 | BUG(); |
109 | } | 101 | } |
110 | 102 | ||
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h index d2d20462fd4f..05699f67d475 100644 --- a/fs/xfs/support/debug.h +++ b/fs/xfs/support/debug.h | |||
@@ -20,15 +20,22 @@ | |||
20 | 20 | ||
21 | #include <stdarg.h> | 21 | #include <stdarg.h> |
22 | 22 | ||
23 | #define CE_DEBUG 7 /* debug */ | 23 | struct xfs_mount; |
24 | #define CE_CONT 6 /* continuation */ | 24 | |
25 | #define CE_NOTE 5 /* notice */ | 25 | #define CE_DEBUG KERN_DEBUG |
26 | #define CE_WARN 4 /* warning */ | 26 | #define CE_CONT KERN_INFO |
27 | #define CE_ALERT 1 /* alert */ | 27 | #define CE_NOTE KERN_NOTICE |
28 | #define CE_PANIC 0 /* panic */ | 28 | #define CE_WARN KERN_WARNING |
29 | 29 | #define CE_ALERT KERN_ALERT | |
30 | extern void cmn_err(int, char *, ...) | 30 | #define CE_PANIC KERN_EMERG |
31 | __attribute__ ((format (printf, 2, 3))); | 31 | |
32 | void cmn_err(const char *lvl, const char *fmt, ...) | ||
33 | __attribute__ ((format (printf, 2, 3))); | ||
34 | void xfs_fs_cmn_err( const char *lvl, struct xfs_mount *mp, | ||
35 | const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); | ||
36 | void xfs_cmn_err( int panic_tag, const char *lvl, struct xfs_mount *mp, | ||
37 | const char *fmt, ...) __attribute__ ((format (printf, 4, 5))); | ||
38 | |||
32 | extern void assfail(char *expr, char *f, int l); | 39 | extern void assfail(char *expr, char *f, int l); |
33 | 40 | ||
34 | #define ASSERT_ALWAYS(expr) \ | 41 | #define ASSERT_ALWAYS(expr) \ |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index fa8723f5870a..f3227984a9bf 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -41,10 +41,6 @@ | |||
41 | #define XFSA_FIXUP_BNO_OK 1 | 41 | #define XFSA_FIXUP_BNO_OK 1 |
42 | #define XFSA_FIXUP_CNT_OK 2 | 42 | #define XFSA_FIXUP_CNT_OK 2 |
43 | 43 | ||
44 | static int | ||
45 | xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
46 | xfs_agblock_t bno, xfs_extlen_t len); | ||
47 | |||
48 | /* | 44 | /* |
49 | * Prototypes for per-ag allocation routines | 45 | * Prototypes for per-ag allocation routines |
50 | */ | 46 | */ |
@@ -94,7 +90,7 @@ xfs_alloc_lookup_ge( | |||
94 | * Lookup the first record less than or equal to [bno, len] | 90 | * Lookup the first record less than or equal to [bno, len] |
95 | * in the btree given by cur. | 91 | * in the btree given by cur. |
96 | */ | 92 | */ |
97 | STATIC int /* error */ | 93 | int /* error */ |
98 | xfs_alloc_lookup_le( | 94 | xfs_alloc_lookup_le( |
99 | struct xfs_btree_cur *cur, /* btree cursor */ | 95 | struct xfs_btree_cur *cur, /* btree cursor */ |
100 | xfs_agblock_t bno, /* starting block of extent */ | 96 | xfs_agblock_t bno, /* starting block of extent */ |
@@ -127,7 +123,7 @@ xfs_alloc_update( | |||
127 | /* | 123 | /* |
128 | * Get the data from the pointed-to record. | 124 | * Get the data from the pointed-to record. |
129 | */ | 125 | */ |
130 | STATIC int /* error */ | 126 | int /* error */ |
131 | xfs_alloc_get_rec( | 127 | xfs_alloc_get_rec( |
132 | struct xfs_btree_cur *cur, /* btree cursor */ | 128 | struct xfs_btree_cur *cur, /* btree cursor */ |
133 | xfs_agblock_t *bno, /* output: starting block of extent */ | 129 | xfs_agblock_t *bno, /* output: starting block of extent */ |
@@ -2615,7 +2611,7 @@ restart: | |||
2615 | * will require a synchronous transaction, but it can still be | 2611 | * will require a synchronous transaction, but it can still be |
2616 | * used to distinguish between a partial or exact match. | 2612 | * used to distinguish between a partial or exact match. |
2617 | */ | 2613 | */ |
2618 | static int | 2614 | int |
2619 | xfs_alloc_busy_search( | 2615 | xfs_alloc_busy_search( |
2620 | struct xfs_mount *mp, | 2616 | struct xfs_mount *mp, |
2621 | xfs_agnumber_t agno, | 2617 | xfs_agnumber_t agno, |
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 895009a97271..d0b3bc72005b 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h | |||
@@ -19,6 +19,7 @@ | |||
19 | #define __XFS_ALLOC_H__ | 19 | #define __XFS_ALLOC_H__ |
20 | 20 | ||
21 | struct xfs_buf; | 21 | struct xfs_buf; |
22 | struct xfs_btree_cur; | ||
22 | struct xfs_mount; | 23 | struct xfs_mount; |
23 | struct xfs_perag; | 24 | struct xfs_perag; |
24 | struct xfs_trans; | 25 | struct xfs_trans; |
@@ -74,6 +75,22 @@ typedef unsigned int xfs_alloctype_t; | |||
74 | #define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4)) | 75 | #define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4)) |
75 | 76 | ||
76 | /* | 77 | /* |
78 | * When deciding how much space to allocate out of an AG, we limit the | ||
79 | * allocation maximum size to the size the AG. However, we cannot use all the | ||
80 | * blocks in the AG - some are permanently used by metadata. These | ||
81 | * blocks are generally: | ||
82 | * - the AG superblock, AGF, AGI and AGFL | ||
83 | * - the AGF (bno and cnt) and AGI btree root blocks | ||
84 | * - 4 blocks on the AGFL according to XFS_ALLOC_SET_ASIDE() limits | ||
85 | * | ||
86 | * The AG headers are sector sized, so the amount of space they take up is | ||
87 | * dependent on filesystem geometry. The others are all single blocks. | ||
88 | */ | ||
89 | #define XFS_ALLOC_AG_MAX_USABLE(mp) \ | ||
90 | ((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7) | ||
91 | |||
92 | |||
93 | /* | ||
77 | * Argument structure for xfs_alloc routines. | 94 | * Argument structure for xfs_alloc routines. |
78 | * This is turned into a structure to avoid having 20 arguments passed | 95 | * This is turned into a structure to avoid having 20 arguments passed |
79 | * down several levels of the stack. | 96 | * down several levels of the stack. |
@@ -118,16 +135,16 @@ xfs_alloc_longest_free_extent(struct xfs_mount *mp, | |||
118 | struct xfs_perag *pag); | 135 | struct xfs_perag *pag); |
119 | 136 | ||
120 | #ifdef __KERNEL__ | 137 | #ifdef __KERNEL__ |
121 | |||
122 | void | 138 | void |
123 | xfs_alloc_busy_insert(xfs_trans_t *tp, | 139 | xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno, |
124 | xfs_agnumber_t agno, | 140 | xfs_agblock_t bno, xfs_extlen_t len); |
125 | xfs_agblock_t bno, | ||
126 | xfs_extlen_t len); | ||
127 | 141 | ||
128 | void | 142 | void |
129 | xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp); | 143 | xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp); |
130 | 144 | ||
145 | int | ||
146 | xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
147 | xfs_agblock_t bno, xfs_extlen_t len); | ||
131 | #endif /* __KERNEL__ */ | 148 | #endif /* __KERNEL__ */ |
132 | 149 | ||
133 | /* | 150 | /* |
@@ -205,4 +222,18 @@ xfs_free_extent( | |||
205 | xfs_fsblock_t bno, /* starting block number of extent */ | 222 | xfs_fsblock_t bno, /* starting block number of extent */ |
206 | xfs_extlen_t len); /* length of extent */ | 223 | xfs_extlen_t len); /* length of extent */ |
207 | 224 | ||
225 | int /* error */ | ||
226 | xfs_alloc_lookup_le( | ||
227 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
228 | xfs_agblock_t bno, /* starting block of extent */ | ||
229 | xfs_extlen_t len, /* length of extent */ | ||
230 | int *stat); /* success/failure */ | ||
231 | |||
232 | int /* error */ | ||
233 | xfs_alloc_get_rec( | ||
234 | struct xfs_btree_cur *cur, /* btree cursor */ | ||
235 | xfs_agblock_t *bno, /* output: starting block of extent */ | ||
236 | xfs_extlen_t *len, /* output: length of extent */ | ||
237 | int *stat); /* output: success/failure */ | ||
238 | |||
208 | #endif /* __XFS_ALLOC_H__ */ | 239 | #endif /* __XFS_ALLOC_H__ */ |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 4111cd3966c7..dc3afd7739ff 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -1038,17 +1038,34 @@ xfs_bmap_add_extent_delay_real( | |||
1038 | * Filling in the middle part of a previous delayed allocation. | 1038 | * Filling in the middle part of a previous delayed allocation. |
1039 | * Contiguity is impossible here. | 1039 | * Contiguity is impossible here. |
1040 | * This case is avoided almost all the time. | 1040 | * This case is avoided almost all the time. |
1041 | * | ||
1042 | * We start with a delayed allocation: | ||
1043 | * | ||
1044 | * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+ | ||
1045 | * PREV @ idx | ||
1046 | * | ||
1047 | * and we are allocating: | ||
1048 | * +rrrrrrrrrrrrrrrrr+ | ||
1049 | * new | ||
1050 | * | ||
1051 | * and we set it up for insertion as: | ||
1052 | * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+ | ||
1053 | * new | ||
1054 | * PREV @ idx LEFT RIGHT | ||
1055 | * inserted at idx + 1 | ||
1041 | */ | 1056 | */ |
1042 | temp = new->br_startoff - PREV.br_startoff; | 1057 | temp = new->br_startoff - PREV.br_startoff; |
1043 | trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_); | ||
1044 | xfs_bmbt_set_blockcount(ep, temp); | ||
1045 | r[0] = *new; | ||
1046 | r[1].br_state = PREV.br_state; | ||
1047 | r[1].br_startblock = 0; | ||
1048 | r[1].br_startoff = new_endoff; | ||
1049 | temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; | 1058 | temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; |
1050 | r[1].br_blockcount = temp2; | 1059 | trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_); |
1051 | xfs_iext_insert(ip, idx + 1, 2, &r[0], state); | 1060 | xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */ |
1061 | LEFT = *new; | ||
1062 | RIGHT.br_state = PREV.br_state; | ||
1063 | RIGHT.br_startblock = nullstartblock( | ||
1064 | (int)xfs_bmap_worst_indlen(ip, temp2)); | ||
1065 | RIGHT.br_startoff = new_endoff; | ||
1066 | RIGHT.br_blockcount = temp2; | ||
1067 | /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */ | ||
1068 | xfs_iext_insert(ip, idx + 1, 2, &LEFT, state); | ||
1052 | ip->i_df.if_lastex = idx + 1; | 1069 | ip->i_df.if_lastex = idx + 1; |
1053 | ip->i_d.di_nextents++; | 1070 | ip->i_d.di_nextents++; |
1054 | if (cur == NULL) | 1071 | if (cur == NULL) |
@@ -2430,7 +2447,7 @@ xfs_bmap_btalloc_nullfb( | |||
2430 | startag = ag = 0; | 2447 | startag = ag = 0; |
2431 | 2448 | ||
2432 | pag = xfs_perag_get(mp, ag); | 2449 | pag = xfs_perag_get(mp, ag); |
2433 | while (*blen < ap->alen) { | 2450 | while (*blen < args->maxlen) { |
2434 | if (!pag->pagf_init) { | 2451 | if (!pag->pagf_init) { |
2435 | error = xfs_alloc_pagf_init(mp, args->tp, ag, | 2452 | error = xfs_alloc_pagf_init(mp, args->tp, ag, |
2436 | XFS_ALLOC_FLAG_TRYLOCK); | 2453 | XFS_ALLOC_FLAG_TRYLOCK); |
@@ -2452,7 +2469,7 @@ xfs_bmap_btalloc_nullfb( | |||
2452 | notinit = 1; | 2469 | notinit = 1; |
2453 | 2470 | ||
2454 | if (xfs_inode_is_filestream(ap->ip)) { | 2471 | if (xfs_inode_is_filestream(ap->ip)) { |
2455 | if (*blen >= ap->alen) | 2472 | if (*blen >= args->maxlen) |
2456 | break; | 2473 | break; |
2457 | 2474 | ||
2458 | if (ap->userdata) { | 2475 | if (ap->userdata) { |
@@ -2498,14 +2515,14 @@ xfs_bmap_btalloc_nullfb( | |||
2498 | * If the best seen length is less than the request | 2515 | * If the best seen length is less than the request |
2499 | * length, use the best as the minimum. | 2516 | * length, use the best as the minimum. |
2500 | */ | 2517 | */ |
2501 | else if (*blen < ap->alen) | 2518 | else if (*blen < args->maxlen) |
2502 | args->minlen = *blen; | 2519 | args->minlen = *blen; |
2503 | /* | 2520 | /* |
2504 | * Otherwise we've seen an extent as big as alen, | 2521 | * Otherwise we've seen an extent as big as maxlen, |
2505 | * use that as the minimum. | 2522 | * use that as the minimum. |
2506 | */ | 2523 | */ |
2507 | else | 2524 | else |
2508 | args->minlen = ap->alen; | 2525 | args->minlen = args->maxlen; |
2509 | 2526 | ||
2510 | /* | 2527 | /* |
2511 | * set the failure fallback case to look in the selected | 2528 | * set the failure fallback case to look in the selected |
@@ -2573,7 +2590,9 @@ xfs_bmap_btalloc( | |||
2573 | args.tp = ap->tp; | 2590 | args.tp = ap->tp; |
2574 | args.mp = mp; | 2591 | args.mp = mp; |
2575 | args.fsbno = ap->rval; | 2592 | args.fsbno = ap->rval; |
2576 | args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks); | 2593 | |
2594 | /* Trim the allocation back to the maximum an AG can fit. */ | ||
2595 | args.maxlen = MIN(ap->alen, XFS_ALLOC_AG_MAX_USABLE(mp)); | ||
2577 | args.firstblock = ap->firstblock; | 2596 | args.firstblock = ap->firstblock; |
2578 | blen = 0; | 2597 | blen = 0; |
2579 | if (nullfb) { | 2598 | if (nullfb) { |
@@ -2621,7 +2640,7 @@ xfs_bmap_btalloc( | |||
2621 | /* | 2640 | /* |
2622 | * Adjust for alignment | 2641 | * Adjust for alignment |
2623 | */ | 2642 | */ |
2624 | if (blen > args.alignment && blen <= ap->alen) | 2643 | if (blen > args.alignment && blen <= args.maxlen) |
2625 | args.minlen = blen - args.alignment; | 2644 | args.minlen = blen - args.alignment; |
2626 | args.minalignslop = 0; | 2645 | args.minalignslop = 0; |
2627 | } else { | 2646 | } else { |
@@ -2640,7 +2659,7 @@ xfs_bmap_btalloc( | |||
2640 | * of minlen+alignment+slop doesn't go up | 2659 | * of minlen+alignment+slop doesn't go up |
2641 | * between the calls. | 2660 | * between the calls. |
2642 | */ | 2661 | */ |
2643 | if (blen > mp->m_dalign && blen <= ap->alen) | 2662 | if (blen > mp->m_dalign && blen <= args.maxlen) |
2644 | nextminlen = blen - mp->m_dalign; | 2663 | nextminlen = blen - mp->m_dalign; |
2645 | else | 2664 | else |
2646 | nextminlen = args.minlen; | 2665 | nextminlen = args.minlen; |
@@ -4485,6 +4504,16 @@ xfs_bmapi( | |||
4485 | /* Figure out the extent size, adjust alen */ | 4504 | /* Figure out the extent size, adjust alen */ |
4486 | extsz = xfs_get_extsz_hint(ip); | 4505 | extsz = xfs_get_extsz_hint(ip); |
4487 | if (extsz) { | 4506 | if (extsz) { |
4507 | /* | ||
4508 | * make sure we don't exceed a single | ||
4509 | * extent length when we align the | ||
4510 | * extent by reducing length we are | ||
4511 | * going to allocate by the maximum | ||
4512 | * amount extent size aligment may | ||
4513 | * require. | ||
4514 | */ | ||
4515 | alen = XFS_FILBLKS_MIN(len, | ||
4516 | MAXEXTLEN - (2 * extsz - 1)); | ||
4488 | error = xfs_bmap_extsize_align(mp, | 4517 | error = xfs_bmap_extsize_align(mp, |
4489 | &got, &prev, extsz, | 4518 | &got, &prev, extsz, |
4490 | rt, eof, | 4519 | rt, eof, |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index ed2b65f3f8b9..6f8c21ce0d6d 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -141,7 +141,6 @@ xfs_buf_item_log_check( | |||
141 | #define xfs_buf_item_log_check(x) | 141 | #define xfs_buf_item_log_check(x) |
142 | #endif | 142 | #endif |
143 | 143 | ||
144 | STATIC void xfs_buf_error_relse(xfs_buf_t *bp); | ||
145 | STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp); | 144 | STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp); |
146 | 145 | ||
147 | /* | 146 | /* |
@@ -428,13 +427,15 @@ xfs_buf_item_unpin( | |||
428 | 427 | ||
429 | if (remove) { | 428 | if (remove) { |
430 | /* | 429 | /* |
431 | * We have to remove the log item from the transaction | 430 | * If we are in a transaction context, we have to |
432 | * as we are about to release our reference to the | 431 | * remove the log item from the transaction as we are |
433 | * buffer. If we don't, the unlock that occurs later | 432 | * about to release our reference to the buffer. If we |
434 | * in xfs_trans_uncommit() will ry to reference the | 433 | * don't, the unlock that occurs later in |
434 | * xfs_trans_uncommit() will try to reference the | ||
435 | * buffer which we no longer have a hold on. | 435 | * buffer which we no longer have a hold on. |
436 | */ | 436 | */ |
437 | xfs_trans_del_item(lip); | 437 | if (lip->li_desc) |
438 | xfs_trans_del_item(lip); | ||
438 | 439 | ||
439 | /* | 440 | /* |
440 | * Since the transaction no longer refers to the buffer, | 441 | * Since the transaction no longer refers to the buffer, |
@@ -959,128 +960,76 @@ xfs_buf_do_callbacks( | |||
959 | */ | 960 | */ |
960 | void | 961 | void |
961 | xfs_buf_iodone_callbacks( | 962 | xfs_buf_iodone_callbacks( |
962 | xfs_buf_t *bp) | 963 | struct xfs_buf *bp) |
963 | { | 964 | { |
964 | xfs_log_item_t *lip; | 965 | struct xfs_log_item *lip = bp->b_fspriv; |
965 | static ulong lasttime; | 966 | struct xfs_mount *mp = lip->li_mountp; |
966 | static xfs_buftarg_t *lasttarg; | 967 | static ulong lasttime; |
967 | xfs_mount_t *mp; | 968 | static xfs_buftarg_t *lasttarg; |
968 | 969 | ||
969 | ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); | 970 | if (likely(!XFS_BUF_GETERROR(bp))) |
970 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | 971 | goto do_callbacks; |
971 | 972 | ||
972 | if (XFS_BUF_GETERROR(bp) != 0) { | 973 | /* |
973 | /* | 974 | * If we've already decided to shutdown the filesystem because of |
974 | * If we've already decided to shutdown the filesystem | 975 | * I/O errors, there's no point in giving this a retry. |
975 | * because of IO errors, there's no point in giving this | 976 | */ |
976 | * a retry. | 977 | if (XFS_FORCED_SHUTDOWN(mp)) { |
977 | */ | 978 | XFS_BUF_SUPER_STALE(bp); |
978 | mp = lip->li_mountp; | 979 | trace_xfs_buf_item_iodone(bp, _RET_IP_); |
979 | if (XFS_FORCED_SHUTDOWN(mp)) { | 980 | goto do_callbacks; |
980 | ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); | 981 | } |
981 | XFS_BUF_SUPER_STALE(bp); | ||
982 | trace_xfs_buf_item_iodone(bp, _RET_IP_); | ||
983 | xfs_buf_do_callbacks(bp); | ||
984 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | ||
985 | XFS_BUF_CLR_IODONE_FUNC(bp); | ||
986 | xfs_buf_ioend(bp, 0); | ||
987 | return; | ||
988 | } | ||
989 | 982 | ||
990 | if ((XFS_BUF_TARGET(bp) != lasttarg) || | 983 | if (XFS_BUF_TARGET(bp) != lasttarg || |
991 | (time_after(jiffies, (lasttime + 5*HZ)))) { | 984 | time_after(jiffies, (lasttime + 5*HZ))) { |
992 | lasttime = jiffies; | 985 | lasttime = jiffies; |
993 | cmn_err(CE_ALERT, "Device %s, XFS metadata write error" | 986 | cmn_err(CE_ALERT, "Device %s, XFS metadata write error" |
994 | " block 0x%llx in %s", | 987 | " block 0x%llx in %s", |
995 | XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), | 988 | XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), |
996 | (__uint64_t)XFS_BUF_ADDR(bp), mp->m_fsname); | 989 | (__uint64_t)XFS_BUF_ADDR(bp), mp->m_fsname); |
997 | } | 990 | } |
998 | lasttarg = XFS_BUF_TARGET(bp); | 991 | lasttarg = XFS_BUF_TARGET(bp); |
999 | 992 | ||
1000 | if (XFS_BUF_ISASYNC(bp)) { | 993 | /* |
1001 | /* | 994 | * If the write was asynchronous then noone will be looking for the |
1002 | * If the write was asynchronous then noone will be | 995 | * error. Clear the error state and write the buffer out again. |
1003 | * looking for the error. Clear the error state | 996 | * |
1004 | * and write the buffer out again delayed write. | 997 | * During sync or umount we'll write all pending buffers again |
1005 | * | 998 | * synchronous, which will catch these errors if they keep hanging |
1006 | * XXXsup This is OK, so long as we catch these | 999 | * around. |
1007 | * before we start the umount; we don't want these | 1000 | */ |
1008 | * DELWRI metadata bufs to be hanging around. | 1001 | if (XFS_BUF_ISASYNC(bp)) { |
1009 | */ | 1002 | XFS_BUF_ERROR(bp, 0); /* errno of 0 unsets the flag */ |
1010 | XFS_BUF_ERROR(bp,0); /* errno of 0 unsets the flag */ | 1003 | |
1011 | 1004 | if (!XFS_BUF_ISSTALE(bp)) { | |
1012 | if (!(XFS_BUF_ISSTALE(bp))) { | 1005 | XFS_BUF_DELAYWRITE(bp); |
1013 | XFS_BUF_DELAYWRITE(bp); | ||
1014 | XFS_BUF_DONE(bp); | ||
1015 | XFS_BUF_SET_START(bp); | ||
1016 | } | ||
1017 | ASSERT(XFS_BUF_IODONE_FUNC(bp)); | ||
1018 | trace_xfs_buf_item_iodone_async(bp, _RET_IP_); | ||
1019 | xfs_buf_relse(bp); | ||
1020 | } else { | ||
1021 | /* | ||
1022 | * If the write of the buffer was not asynchronous, | ||
1023 | * then we want to make sure to return the error | ||
1024 | * to the caller of bwrite(). Because of this we | ||
1025 | * cannot clear the B_ERROR state at this point. | ||
1026 | * Instead we install a callback function that | ||
1027 | * will be called when the buffer is released, and | ||
1028 | * that routine will clear the error state and | ||
1029 | * set the buffer to be written out again after | ||
1030 | * some delay. | ||
1031 | */ | ||
1032 | /* We actually overwrite the existing b-relse | ||
1033 | function at times, but we're gonna be shutting down | ||
1034 | anyway. */ | ||
1035 | XFS_BUF_SET_BRELSE_FUNC(bp,xfs_buf_error_relse); | ||
1036 | XFS_BUF_DONE(bp); | 1006 | XFS_BUF_DONE(bp); |
1037 | XFS_BUF_FINISH_IOWAIT(bp); | 1007 | XFS_BUF_SET_START(bp); |
1038 | } | 1008 | } |
1009 | ASSERT(XFS_BUF_IODONE_FUNC(bp)); | ||
1010 | trace_xfs_buf_item_iodone_async(bp, _RET_IP_); | ||
1011 | xfs_buf_relse(bp); | ||
1039 | return; | 1012 | return; |
1040 | } | 1013 | } |
1041 | 1014 | ||
1042 | xfs_buf_do_callbacks(bp); | 1015 | /* |
1043 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 1016 | * If the write of the buffer was synchronous, we want to make |
1044 | XFS_BUF_CLR_IODONE_FUNC(bp); | 1017 | * sure to return the error to the caller of xfs_bwrite(). |
1045 | xfs_buf_ioend(bp, 0); | 1018 | */ |
1046 | } | ||
1047 | |||
1048 | /* | ||
1049 | * This is a callback routine attached to a buffer which gets an error | ||
1050 | * when being written out synchronously. | ||
1051 | */ | ||
1052 | STATIC void | ||
1053 | xfs_buf_error_relse( | ||
1054 | xfs_buf_t *bp) | ||
1055 | { | ||
1056 | xfs_log_item_t *lip; | ||
1057 | xfs_mount_t *mp; | ||
1058 | |||
1059 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | ||
1060 | mp = (xfs_mount_t *)lip->li_mountp; | ||
1061 | ASSERT(XFS_BUF_TARGET(bp) == mp->m_ddev_targp); | ||
1062 | |||
1063 | XFS_BUF_STALE(bp); | 1019 | XFS_BUF_STALE(bp); |
1064 | XFS_BUF_DONE(bp); | 1020 | XFS_BUF_DONE(bp); |
1065 | XFS_BUF_UNDELAYWRITE(bp); | 1021 | XFS_BUF_UNDELAYWRITE(bp); |
1066 | XFS_BUF_ERROR(bp,0); | ||
1067 | 1022 | ||
1068 | trace_xfs_buf_error_relse(bp, _RET_IP_); | 1023 | trace_xfs_buf_error_relse(bp, _RET_IP_); |
1024 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); | ||
1069 | 1025 | ||
1070 | if (! XFS_FORCED_SHUTDOWN(mp)) | 1026 | do_callbacks: |
1071 | xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); | ||
1072 | /* | ||
1073 | * We have to unpin the pinned buffers so do the | ||
1074 | * callbacks. | ||
1075 | */ | ||
1076 | xfs_buf_do_callbacks(bp); | 1027 | xfs_buf_do_callbacks(bp); |
1077 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 1028 | XFS_BUF_SET_FSPRIVATE(bp, NULL); |
1078 | XFS_BUF_CLR_IODONE_FUNC(bp); | 1029 | XFS_BUF_CLR_IODONE_FUNC(bp); |
1079 | XFS_BUF_SET_BRELSE_FUNC(bp,NULL); | 1030 | xfs_buf_ioend(bp, 0); |
1080 | xfs_buf_relse(bp); | ||
1081 | } | 1031 | } |
1082 | 1032 | ||
1083 | |||
1084 | /* | 1033 | /* |
1085 | * This is the iodone() function for buffers which have been | 1034 | * This is the iodone() function for buffers which have been |
1086 | * logged. It is called when they are eventually flushed out. | 1035 | * logged. It is called when they are eventually flushed out. |
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index c78cc6a3d87c..4c7db74a05f7 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c | |||
@@ -152,37 +152,6 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud) | |||
152 | } | 152 | } |
153 | #endif /* DEBUG */ | 153 | #endif /* DEBUG */ |
154 | 154 | ||
155 | |||
156 | void | ||
157 | xfs_fs_cmn_err(int level, xfs_mount_t *mp, char *fmt, ...) | ||
158 | { | ||
159 | va_list ap; | ||
160 | |||
161 | va_start(ap, fmt); | ||
162 | xfs_fs_vcmn_err(level, mp, fmt, ap); | ||
163 | va_end(ap); | ||
164 | } | ||
165 | |||
166 | void | ||
167 | xfs_cmn_err(int panic_tag, int level, xfs_mount_t *mp, char *fmt, ...) | ||
168 | { | ||
169 | va_list ap; | ||
170 | |||
171 | #ifdef DEBUG | ||
172 | xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES); | ||
173 | #endif | ||
174 | |||
175 | if (xfs_panic_mask && (xfs_panic_mask & panic_tag) | ||
176 | && (level & CE_ALERT)) { | ||
177 | level &= ~CE_ALERT; | ||
178 | level |= CE_PANIC; | ||
179 | cmn_err(CE_ALERT, "XFS: Transforming an alert into a BUG."); | ||
180 | } | ||
181 | va_start(ap, fmt); | ||
182 | xfs_fs_vcmn_err(level, mp, fmt, ap); | ||
183 | va_end(ap); | ||
184 | } | ||
185 | |||
186 | void | 155 | void |
187 | xfs_error_report( | 156 | xfs_error_report( |
188 | const char *tag, | 157 | const char *tag, |
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index f338847f80b8..10dce5475f02 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h | |||
@@ -136,8 +136,8 @@ extern int xfs_error_test(int, int *, char *, int, char *, unsigned long); | |||
136 | xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \ | 136 | xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \ |
137 | (rf)))) | 137 | (rf)))) |
138 | 138 | ||
139 | extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp); | 139 | extern int xfs_errortag_add(int error_tag, struct xfs_mount *mp); |
140 | extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); | 140 | extern int xfs_errortag_clearall(struct xfs_mount *mp, int loud); |
141 | #else | 141 | #else |
142 | #define XFS_TEST_ERROR(expr, mp, tag, rf) (expr) | 142 | #define XFS_TEST_ERROR(expr, mp, tag, rf) (expr) |
143 | #define xfs_errortag_add(tag, mp) (ENOSYS) | 143 | #define xfs_errortag_add(tag, mp) (ENOSYS) |
@@ -162,21 +162,15 @@ extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); | |||
162 | 162 | ||
163 | struct xfs_mount; | 163 | struct xfs_mount; |
164 | 164 | ||
165 | extern void xfs_fs_vcmn_err(int level, struct xfs_mount *mp, | ||
166 | char *fmt, va_list ap) | ||
167 | __attribute__ ((format (printf, 3, 0))); | ||
168 | extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp, | ||
169 | char *fmt, ...) | ||
170 | __attribute__ ((format (printf, 4, 5))); | ||
171 | extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...) | ||
172 | __attribute__ ((format (printf, 3, 4))); | ||
173 | |||
174 | extern void xfs_hex_dump(void *p, int length); | 165 | extern void xfs_hex_dump(void *p, int length); |
175 | 166 | ||
176 | #define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \ | 167 | #define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \ |
177 | xfs_fs_cmn_err(level, mp, fmt " Unmount and run xfs_repair.", ## args) | 168 | xfs_fs_cmn_err(level, mp, fmt " Unmount and run xfs_repair.", ## args) |
178 | 169 | ||
179 | #define xfs_fs_mount_cmn_err(f, fmt, args...) \ | 170 | #define xfs_fs_mount_cmn_err(f, fmt, args...) \ |
180 | ((f & XFS_MFSI_QUIET)? (void)0 : cmn_err(CE_WARN, "XFS: " fmt, ## args)) | 171 | do { \ |
172 | if (!(f & XFS_MFSI_QUIET)) \ | ||
173 | cmn_err(CE_WARN, "XFS: " fmt, ## args); \ | ||
174 | } while (0) | ||
181 | 175 | ||
182 | #endif /* __XFS_ERROR_H__ */ | 176 | #endif /* __XFS_ERROR_H__ */ |
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 75f2ef60e579..d22e62623437 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c | |||
@@ -138,7 +138,8 @@ xfs_efi_item_unpin( | |||
138 | 138 | ||
139 | if (remove) { | 139 | if (remove) { |
140 | ASSERT(!(lip->li_flags & XFS_LI_IN_AIL)); | 140 | ASSERT(!(lip->li_flags & XFS_LI_IN_AIL)); |
141 | xfs_trans_del_item(lip); | 141 | if (lip->li_desc) |
142 | xfs_trans_del_item(lip); | ||
142 | xfs_efi_item_free(efip); | 143 | xfs_efi_item_free(efip); |
143 | return; | 144 | return; |
144 | } | 145 | } |
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index f56d30e8040c..cec89dd5d7d2 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -612,12 +612,13 @@ out: | |||
612 | * | 612 | * |
613 | * We cannot use an inode here for this - that will push dirty state back up | 613 | * We cannot use an inode here for this - that will push dirty state back up |
614 | * into the VFS and then periodic inode flushing will prevent log covering from | 614 | * into the VFS and then periodic inode flushing will prevent log covering from |
615 | * making progress. Hence we log a field in the superblock instead. | 615 | * making progress. Hence we log a field in the superblock instead and use a |
616 | * synchronous transaction to ensure the superblock is immediately unpinned | ||
617 | * and can be written back. | ||
616 | */ | 618 | */ |
617 | int | 619 | int |
618 | xfs_fs_log_dummy( | 620 | xfs_fs_log_dummy( |
619 | xfs_mount_t *mp, | 621 | xfs_mount_t *mp) |
620 | int flags) | ||
621 | { | 622 | { |
622 | xfs_trans_t *tp; | 623 | xfs_trans_t *tp; |
623 | int error; | 624 | int error; |
@@ -632,8 +633,7 @@ xfs_fs_log_dummy( | |||
632 | 633 | ||
633 | /* log the UUID because it is an unchanging field */ | 634 | /* log the UUID because it is an unchanging field */ |
634 | xfs_mod_sb(tp, XFS_SB_UUID); | 635 | xfs_mod_sb(tp, XFS_SB_UUID); |
635 | if (flags & SYNC_WAIT) | 636 | xfs_trans_set_sync(tp); |
636 | xfs_trans_set_sync(tp); | ||
637 | return xfs_trans_commit(tp, 0); | 637 | return xfs_trans_commit(tp, 0); |
638 | } | 638 | } |
639 | 639 | ||
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h index a786c5212c1e..1b6a98b66886 100644 --- a/fs/xfs/xfs_fsops.h +++ b/fs/xfs/xfs_fsops.h | |||
@@ -25,6 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt); | |||
25 | extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, | 25 | extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, |
26 | xfs_fsop_resblks_t *outval); | 26 | xfs_fsop_resblks_t *outval); |
27 | extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); | 27 | extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); |
28 | extern int xfs_fs_log_dummy(xfs_mount_t *mp, int flags); | 28 | extern int xfs_fs_log_dummy(struct xfs_mount *mp); |
29 | 29 | ||
30 | #endif /* __XFS_FSOPS_H__ */ | 30 | #endif /* __XFS_FSOPS_H__ */ |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 55582bd66659..8a0f044750c3 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -337,7 +337,12 @@ xfs_iomap_prealloc_size( | |||
337 | int shift = 0; | 337 | int shift = 0; |
338 | int64_t freesp; | 338 | int64_t freesp; |
339 | 339 | ||
340 | alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size); | 340 | /* |
341 | * rounddown_pow_of_two() returns an undefined result | ||
342 | * if we pass in alloc_blocks = 0. Hence the "+ 1" to | ||
343 | * ensure we always pass in a non-zero value. | ||
344 | */ | ||
345 | alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size) + 1; | ||
341 | alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, | 346 | alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, |
342 | rounddown_pow_of_two(alloc_blocks)); | 347 | rounddown_pow_of_two(alloc_blocks)); |
343 | 348 | ||
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 0bf24b11d0c4..ae6fef1ff563 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -377,7 +377,7 @@ xfs_log_mount( | |||
377 | cmn_err(CE_NOTE, "XFS mounting filesystem %s", mp->m_fsname); | 377 | cmn_err(CE_NOTE, "XFS mounting filesystem %s", mp->m_fsname); |
378 | else { | 378 | else { |
379 | cmn_err(CE_NOTE, | 379 | cmn_err(CE_NOTE, |
380 | "!Mounting filesystem \"%s\" in no-recovery mode. Filesystem will be inconsistent.", | 380 | "Mounting filesystem \"%s\" in no-recovery mode. Filesystem will be inconsistent.", |
381 | mp->m_fsname); | 381 | mp->m_fsname); |
382 | ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); | 382 | ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); |
383 | } | 383 | } |
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 916eb7db14d9..3bd3291ef8d2 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
@@ -191,7 +191,7 @@ void xfs_log_ticket_put(struct xlog_ticket *ticket); | |||
191 | 191 | ||
192 | xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp); | 192 | xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp); |
193 | 193 | ||
194 | int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, | 194 | void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, |
195 | struct xfs_log_vec *log_vector, | 195 | struct xfs_log_vec *log_vector, |
196 | xfs_lsn_t *commit_lsn, int flags); | 196 | xfs_lsn_t *commit_lsn, int flags); |
197 | bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); | 197 | bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 9dc8125d04e5..9ca59be08977 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -543,7 +543,7 @@ xlog_cil_push( | |||
543 | 543 | ||
544 | error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0); | 544 | error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0); |
545 | if (error) | 545 | if (error) |
546 | goto out_abort; | 546 | goto out_abort_free_ticket; |
547 | 547 | ||
548 | /* | 548 | /* |
549 | * now that we've written the checkpoint into the log, strictly | 549 | * now that we've written the checkpoint into the log, strictly |
@@ -569,8 +569,9 @@ restart: | |||
569 | } | 569 | } |
570 | spin_unlock(&cil->xc_cil_lock); | 570 | spin_unlock(&cil->xc_cil_lock); |
571 | 571 | ||
572 | /* xfs_log_done always frees the ticket on error. */ | ||
572 | commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0); | 573 | commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0); |
573 | if (error || commit_lsn == -1) | 574 | if (commit_lsn == -1) |
574 | goto out_abort; | 575 | goto out_abort; |
575 | 576 | ||
576 | /* attach all the transactions w/ busy extents to iclog */ | 577 | /* attach all the transactions w/ busy extents to iclog */ |
@@ -600,6 +601,8 @@ out_free_ticket: | |||
600 | kmem_free(new_ctx); | 601 | kmem_free(new_ctx); |
601 | return 0; | 602 | return 0; |
602 | 603 | ||
604 | out_abort_free_ticket: | ||
605 | xfs_log_ticket_put(tic); | ||
603 | out_abort: | 606 | out_abort: |
604 | xlog_cil_committed(ctx, XFS_LI_ABORTED); | 607 | xlog_cil_committed(ctx, XFS_LI_ABORTED); |
605 | return XFS_ERROR(EIO); | 608 | return XFS_ERROR(EIO); |
@@ -622,7 +625,7 @@ out_abort: | |||
622 | * background commit, returns without it held once background commits are | 625 | * background commit, returns without it held once background commits are |
623 | * allowed again. | 626 | * allowed again. |
624 | */ | 627 | */ |
625 | int | 628 | void |
626 | xfs_log_commit_cil( | 629 | xfs_log_commit_cil( |
627 | struct xfs_mount *mp, | 630 | struct xfs_mount *mp, |
628 | struct xfs_trans *tp, | 631 | struct xfs_trans *tp, |
@@ -637,11 +640,6 @@ xfs_log_commit_cil( | |||
637 | if (flags & XFS_TRANS_RELEASE_LOG_RES) | 640 | if (flags & XFS_TRANS_RELEASE_LOG_RES) |
638 | log_flags = XFS_LOG_REL_PERM_RESERV; | 641 | log_flags = XFS_LOG_REL_PERM_RESERV; |
639 | 642 | ||
640 | if (XLOG_FORCED_SHUTDOWN(log)) { | ||
641 | xlog_cil_free_logvec(log_vector); | ||
642 | return XFS_ERROR(EIO); | ||
643 | } | ||
644 | |||
645 | /* | 643 | /* |
646 | * do all the hard work of formatting items (including memory | 644 | * do all the hard work of formatting items (including memory |
647 | * allocation) outside the CIL context lock. This prevents stalling CIL | 645 | * allocation) outside the CIL context lock. This prevents stalling CIL |
@@ -701,7 +699,6 @@ xfs_log_commit_cil( | |||
701 | */ | 699 | */ |
702 | if (push) | 700 | if (push) |
703 | xlog_cil_push(log, 0); | 701 | xlog_cil_push(log, 0); |
704 | return 0; | ||
705 | } | 702 | } |
706 | 703 | ||
707 | /* | 704 | /* |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 204d8e5fa7fa..aa0ebb776903 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -3800,7 +3800,7 @@ xlog_recover_finish( | |||
3800 | log->l_flags &= ~XLOG_RECOVERY_NEEDED; | 3800 | log->l_flags &= ~XLOG_RECOVERY_NEEDED; |
3801 | } else { | 3801 | } else { |
3802 | cmn_err(CE_DEBUG, | 3802 | cmn_err(CE_DEBUG, |
3803 | "!Ending clean XFS mount for filesystem: %s\n", | 3803 | "Ending clean XFS mount for filesystem: %s\n", |
3804 | log->l_mp->m_fsname); | 3804 | log->l_mp->m_fsname); |
3805 | } | 3805 | } |
3806 | return 0; | 3806 | return 0; |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index f80a067a4658..76922793f64f 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -1137,7 +1137,7 @@ out_undo_fdblocks: | |||
1137 | if (blkdelta) | 1137 | if (blkdelta) |
1138 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd); | 1138 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd); |
1139 | out: | 1139 | out: |
1140 | ASSERT(error = 0); | 1140 | ASSERT(error == 0); |
1141 | return; | 1141 | return; |
1142 | } | 1142 | } |
1143 | 1143 | ||
@@ -1446,6 +1446,14 @@ xfs_log_item_batch_insert( | |||
1446 | * Bulk operation version of xfs_trans_committed that takes a log vector of | 1446 | * Bulk operation version of xfs_trans_committed that takes a log vector of |
1447 | * items to insert into the AIL. This uses bulk AIL insertion techniques to | 1447 | * items to insert into the AIL. This uses bulk AIL insertion techniques to |
1448 | * minimise lock traffic. | 1448 | * minimise lock traffic. |
1449 | * | ||
1450 | * If we are called with the aborted flag set, it is because a log write during | ||
1451 | * a CIL checkpoint commit has failed. In this case, all the items in the | ||
1452 | * checkpoint have already gone through IOP_COMMITED and IOP_UNLOCK, which | ||
1453 | * means that checkpoint commit abort handling is treated exactly the same | ||
1454 | * as an iclog write error even though we haven't started any IO yet. Hence in | ||
1455 | * this case all we need to do is IOP_COMMITTED processing, followed by an | ||
1456 | * IOP_UNPIN(aborted) call. | ||
1449 | */ | 1457 | */ |
1450 | void | 1458 | void |
1451 | xfs_trans_committed_bulk( | 1459 | xfs_trans_committed_bulk( |
@@ -1472,6 +1480,16 @@ xfs_trans_committed_bulk( | |||
1472 | if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) | 1480 | if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) |
1473 | continue; | 1481 | continue; |
1474 | 1482 | ||
1483 | /* | ||
1484 | * if we are aborting the operation, no point in inserting the | ||
1485 | * object into the AIL as we are in a shutdown situation. | ||
1486 | */ | ||
1487 | if (aborted) { | ||
1488 | ASSERT(XFS_FORCED_SHUTDOWN(ailp->xa_mount)); | ||
1489 | IOP_UNPIN(lip, 1); | ||
1490 | continue; | ||
1491 | } | ||
1492 | |||
1475 | if (item_lsn != commit_lsn) { | 1493 | if (item_lsn != commit_lsn) { |
1476 | 1494 | ||
1477 | /* | 1495 | /* |
@@ -1503,20 +1521,24 @@ xfs_trans_committed_bulk( | |||
1503 | } | 1521 | } |
1504 | 1522 | ||
1505 | /* | 1523 | /* |
1506 | * Called from the trans_commit code when we notice that | 1524 | * Called from the trans_commit code when we notice that the filesystem is in |
1507 | * the filesystem is in the middle of a forced shutdown. | 1525 | * the middle of a forced shutdown. |
1526 | * | ||
1527 | * When we are called here, we have already pinned all the items in the | ||
1528 | * transaction. However, neither IOP_COMMITTING or IOP_UNLOCK has been called | ||
1529 | * so we can simply walk the items in the transaction, unpin them with an abort | ||
1530 | * flag and then free the items. Note that unpinning the items can result in | ||
1531 | * them being freed immediately, so we need to use a safe list traversal method | ||
1532 | * here. | ||
1508 | */ | 1533 | */ |
1509 | STATIC void | 1534 | STATIC void |
1510 | xfs_trans_uncommit( | 1535 | xfs_trans_uncommit( |
1511 | struct xfs_trans *tp, | 1536 | struct xfs_trans *tp, |
1512 | uint flags) | 1537 | uint flags) |
1513 | { | 1538 | { |
1514 | struct xfs_log_item_desc *lidp; | 1539 | struct xfs_log_item_desc *lidp, *n; |
1515 | 1540 | ||
1516 | list_for_each_entry(lidp, &tp->t_items, lid_trans) { | 1541 | list_for_each_entry_safe(lidp, n, &tp->t_items, lid_trans) { |
1517 | /* | ||
1518 | * Unpin all but those that aren't dirty. | ||
1519 | */ | ||
1520 | if (lidp->lid_flags & XFS_LID_DIRTY) | 1542 | if (lidp->lid_flags & XFS_LID_DIRTY) |
1521 | IOP_UNPIN(lidp->lid_item, 1); | 1543 | IOP_UNPIN(lidp->lid_item, 1); |
1522 | } | 1544 | } |
@@ -1733,7 +1755,6 @@ xfs_trans_commit_cil( | |||
1733 | int flags) | 1755 | int flags) |
1734 | { | 1756 | { |
1735 | struct xfs_log_vec *log_vector; | 1757 | struct xfs_log_vec *log_vector; |
1736 | int error; | ||
1737 | 1758 | ||
1738 | /* | 1759 | /* |
1739 | * Get each log item to allocate a vector structure for | 1760 | * Get each log item to allocate a vector structure for |
@@ -1744,9 +1765,7 @@ xfs_trans_commit_cil( | |||
1744 | if (!log_vector) | 1765 | if (!log_vector) |
1745 | return ENOMEM; | 1766 | return ENOMEM; |
1746 | 1767 | ||
1747 | error = xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags); | 1768 | xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags); |
1748 | if (error) | ||
1749 | return error; | ||
1750 | 1769 | ||
1751 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | 1770 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
1752 | xfs_trans_free(tp); | 1771 | xfs_trans_free(tp); |